kitsamho

## ActorGraphTransformer.py
class ActorGraphTransformer:
    """
    A class for transforming a DataFrame into an actor graph and calculating various graph metrics.

    Attributes:
        df_d3 (pandas.DataFrame): The input DataFrame containing the graph data.
        graph (networkx.Graph): The graph representation of the DataFrame.
        actor_graph_metrics_df (pandas.DataFrame): DataFrame containing the calculated graph metrics for each actor.
        actor_graph_metrics_dict (dict): Dictionary containing the graph metrics for each actor.
        edge_frequency_dict (dict): Dictionary containing the frequency of edges in the graph.

## d3graph.py
def cache_d3_network_plot(df_d3_masked: pd.DataFrame, file_path: str, edge_distance: int = 100,
                          node_size: int = 4, fontsize: int = 8) -> None:
    """
    Caches a D3 network plot to a file.

    Args:
        df_d3_masked: The DataFrame containing masked edge data.
        file_path: The path to save the cached plot file.
        edge_distance: The distance between nodes in the plot.
        node_size: The size of the nodes in the plot.

## D3Transformer.py
class D3Transformer:
    """
    Class to handle the transformation of processed data into a shape that can be used by the d3blocks network graph

    Parameters:
        df_transformed (pandas.DataFrame): The processed dataframe.

    """

    def __init__(self, df_transformed):

## MovieCastTransformer.py
class MovieCastTransformer:
    """
    A class for processing movie data based on user-selected filters.

    Args:
        merged_df (pandas.DataFrame): The merged dataframe containing movie and cast data.

    Attributes:
        merged_df (pandas.DataFrame): The merged dataframe containing movie and cast data.
        year_start (int): The start year selected by the user.

## DataLoader.py
class DataLoader:
    def __init__(self, config_path):
        """
        DataLoader class for loading movie and cast data.

        Args:
            config_path (str): Path to the YAML config file. Default is 'config.yaml'.
        """
        self.config_path = config_path
        self.data_path = None

## get_payload.py
def get_movie_payload():
    movie_payload = {'keywords': {'movie_response': [], 'cols': ['id', 'keywords'], 'results_parsed': []},
                     'reviews': {'movie_response': [], 'cols': ['id', 'results'], 'results_parsed': []},
                     'info': {'movie_response': [],
                              'cols': ['id', 'budget', 'revenue', 'genres', 'production_countries', 'tagline'],
                              'results_parsed': []}}

    return movie_payload

## TMDDBCastCrewScraper.py
class TMDBCastCrewScraper(TMDBMovieScraper):

    def __init__(self, movie_ids: list, popularity_threshold=1, max_threads=30):
        self.movie_ids = movie_ids
        self.cast_results = []
        self.crew_results = []
        self.popularity_threshold = popularity_threshold
        self.max_threads = max_threads

    def _append_data_to_list(self, list_dic_results, popularity_threshold, results, movie_id):

## TMDBMovieScraper.py
class TMDBMovieScraper:
    def __init__(self, years_check: list):
        """
        Initializes the TMDBMovieScraper class with a list of years to iterate over for querying movie data from TMDB.

        Args:
            years_check (list): List of years to iterate over for querying movie data.
        """
        self.discover_api = tmdb.Discover()  # Instantiate tmdb.Discover module
        self.years_check = years_check  # List of years to iterate over

## rank_vectors.py
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity


def rank_vectors(vector_a: np.ndarray, vectors_n: np.ndarray, labels: pd.Series) -> pd.DataFrame:
    """
    Ranks the similarity of a target vector with a list of other vectors, and returns the results in a DataFrame.

    Args:

## semantic_search_loop.py
def semantic_search_loop(df_cached_embeddings, model, tokenizer):
    """
    This function takes a dataframe of cached embeddings, a model, and a tokenizer, and performs semantic search for
    images related to the user's search query.

    Args:
        df_cached_embeddings (pandas.DataFrame): A dataframe of cached image embeddings
        tokeniser: Tokeniser to use for pre-processing the text data
        model: model : A pre-trained CLIP model.
	class ActorGraphTransformer:
	"""
	A class for transforming a DataFrame into an actor graph and calculating various graph metrics.

	Attributes:
	df_d3 (pandas.DataFrame): The input DataFrame containing the graph data.
	graph (networkx.Graph): The graph representation of the DataFrame.
	actor_graph_metrics_df (pandas.DataFrame): DataFrame containing the calculated graph metrics for each actor.
	actor_graph_metrics_dict (dict): Dictionary containing the graph metrics for each actor.
	edge_frequency_dict (dict): Dictionary containing the frequency of edges in the graph.
	def cache_d3_network_plot(df_d3_masked: pd.DataFrame, file_path: str, edge_distance: int = 100,
	node_size: int = 4, fontsize: int = 8) -> None:
	"""
	Caches a D3 network plot to a file.

	Args:
	df_d3_masked: The DataFrame containing masked edge data.
	file_path: The path to save the cached plot file.
	edge_distance: The distance between nodes in the plot.
	node_size: The size of the nodes in the plot.
	class D3Transformer:
	"""
	Class to handle the transformation of processed data into a shape that can be used by the d3blocks network graph

	Parameters:
	df_transformed (pandas.DataFrame): The processed dataframe.

	"""

	def __init__(self, df_transformed):
	class MovieCastTransformer:
	"""
	A class for processing movie data based on user-selected filters.

	Args:
	merged_df (pandas.DataFrame): The merged dataframe containing movie and cast data.

	Attributes:
	merged_df (pandas.DataFrame): The merged dataframe containing movie and cast data.
	year_start (int): The start year selected by the user.
	class DataLoader:
	def __init__(self, config_path):
	"""
	DataLoader class for loading movie and cast data.

	Args:
	config_path (str): Path to the YAML config file. Default is 'config.yaml'.
	"""
	self.config_path = config_path
	self.data_path = None
	def get_movie_payload():
	movie_payload = {'keywords': {'movie_response': [], 'cols': ['id', 'keywords'], 'results_parsed': []},
	'reviews': {'movie_response': [], 'cols': ['id', 'results'], 'results_parsed': []},
	'info': {'movie_response': [],
	'cols': ['id', 'budget', 'revenue', 'genres', 'production_countries', 'tagline'],
	'results_parsed': []}}

	return movie_payload
	class TMDBCastCrewScraper(TMDBMovieScraper):

	def __init__(self, movie_ids: list, popularity_threshold=1, max_threads=30):
	self.movie_ids = movie_ids
	self.cast_results = []
	self.crew_results = []
	self.popularity_threshold = popularity_threshold
	self.max_threads = max_threads

	def _append_data_to_list(self, list_dic_results, popularity_threshold, results, movie_id):
	class TMDBMovieScraper:
	def __init__(self, years_check: list):
	"""
	Initializes the TMDBMovieScraper class with a list of years to iterate over for querying movie data from TMDB.

	Args:
	years_check (list): List of years to iterate over for querying movie data.
	"""
	self.discover_api = tmdb.Discover() # Instantiate tmdb.Discover module
	self.years_check = years_check # List of years to iterate over
	import pandas as pd
	import numpy as np
	from sklearn.metrics.pairwise import cosine_similarity


	def rank_vectors(vector_a: np.ndarray, vectors_n: np.ndarray, labels: pd.Series) -> pd.DataFrame:
	"""
	Ranks the similarity of a target vector with a list of other vectors, and returns the results in a DataFrame.

	Args:
	def semantic_search_loop(df_cached_embeddings, model, tokenizer):
	"""
	This function takes a dataframe of cached embeddings, a model, and a tokenizer, and performs semantic search for
	images related to the user's search query.

	Args:
	df_cached_embeddings (pandas.DataFrame): A dataframe of cached image embeddings
	tokeniser: Tokeniser to use for pre-processing the text data
	model: model : A pre-trained CLIP model.