Last active
April 10, 2023 10:11
-
-
Save kitsamho/f11e20f6eded9525abf972ab9c624c13 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import numpy as np | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| def rank_vectors(vector_a: np.ndarray, vectors_n: np.ndarray, labels: pd.Series) -> pd.DataFrame: | |
| """ | |
| Ranks the similarity of a target vector with a list of other vectors, and returns the results in a DataFrame. | |
| Args: | |
| vector_a (np.ndarray): The target vector to compare against other vectors. | |
| vectors_n (np.ndarray): An array of vectors to compare against the target vector. | |
| labels (pd.Series): A series of labels corresponding to each vector in vectors_n. | |
| Returns: | |
| pd.DataFrame: A DataFrame containing the labels and their corresponding similarity scores sorted in descending order. | |
| """ | |
| # Calculate cosine similarity between the target vector and all other vectors | |
| similarities = pd.DataFrame(cosine_similarity(vector_a.reshape(1,-1),np.vstack(vectors_n)).T) | |
| # Combine labels with similarity scores in a DataFrame | |
| df_results = pd.concat([labels, similarities],axis=1) | |
| # Sort the DataFrame by similarity scores in descending order | |
| return df_results.sort_values(by=0, ascending=False).reset_index(drop=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment