kitsamho/rank_vectors.py

## rank_vectors.py
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity


def rank_vectors(vector_a: np.ndarray, vectors_n: np.ndarray, labels: pd.Series) -> pd.DataFrame:
    """
    Ranks the similarity of a target vector with a list of other vectors, and returns the results in a DataFrame.

    Args:
        vector_a (np.ndarray): The target vector to compare against other vectors.
        vectors_n (np.ndarray): An array of vectors to compare against the target vector.
        labels (pd.Series): A series of labels corresponding to each vector in vectors_n.

    Returns:
        pd.DataFrame: A DataFrame containing the labels and their corresponding similarity scores sorted in descending order.
    """
    # Calculate cosine similarity between the target vector and all other vectors
    similarities = pd.DataFrame(cosine_similarity(vector_a.reshape(1,-1),np.vstack(vectors_n)).T)

    # Combine labels with similarity scores in a DataFrame
    df_results = pd.concat([labels, similarities],axis=1)

    # Sort the DataFrame by similarity scores in descending order
    return df_results.sort_values(by=0, ascending=False).reset_index(drop=True)
	import pandas as pd
	import numpy as np
	from sklearn.metrics.pairwise import cosine_similarity


	def rank_vectors(vector_a: np.ndarray, vectors_n: np.ndarray, labels: pd.Series) -> pd.DataFrame:
	"""
	Ranks the similarity of a target vector with a list of other vectors, and returns the results in a DataFrame.

	Args:
	vector_a (np.ndarray): The target vector to compare against other vectors.
	vectors_n (np.ndarray): An array of vectors to compare against the target vector.
	labels (pd.Series): A series of labels corresponding to each vector in vectors_n.

	Returns:
	pd.DataFrame: A DataFrame containing the labels and their corresponding similarity scores sorted in descending order.
	"""
	# Calculate cosine similarity between the target vector and all other vectors
	similarities = pd.DataFrame(cosine_similarity(vector_a.reshape(1,-1),np.vstack(vectors_n)).T)

	# Combine labels with similarity scores in a DataFrame
	df_results = pd.concat([labels, similarities],axis=1)

	# Sort the DataFrame by similarity scores in descending order
	return df_results.sort_values(by=0, ascending=False).reset_index(drop=True)
No results found