Skip to content

Instantly share code, notes, and snippets.

class ActorGraphTransformer:
"""
A class for transforming a DataFrame into an actor graph and calculating various graph metrics.
Attributes:
df_d3 (pandas.DataFrame): The input DataFrame containing the graph data.
graph (networkx.Graph): The graph representation of the DataFrame.
actor_graph_metrics_df (pandas.DataFrame): DataFrame containing the calculated graph metrics for each actor.
actor_graph_metrics_dict (dict): Dictionary containing the graph metrics for each actor.
edge_frequency_dict (dict): Dictionary containing the frequency of edges in the graph.
def cache_d3_network_plot(df_d3_masked: pd.DataFrame, file_path: str, edge_distance: int = 100,
node_size: int = 4, fontsize: int = 8) -> None:
"""
Caches a D3 network plot to a file.
Args:
df_d3_masked: The DataFrame containing masked edge data.
file_path: The path to save the cached plot file.
edge_distance: The distance between nodes in the plot.
node_size: The size of the nodes in the plot.
class D3Transformer:
"""
Class to handle the transformation of processed data into a shape that can be used by the d3blocks network graph
Parameters:
df_transformed (pandas.DataFrame): The processed dataframe.
"""
def __init__(self, df_transformed):
class MovieCastTransformer:
"""
A class for processing movie data based on user-selected filters.
Args:
merged_df (pandas.DataFrame): The merged dataframe containing movie and cast data.
Attributes:
merged_df (pandas.DataFrame): The merged dataframe containing movie and cast data.
year_start (int): The start year selected by the user.
class DataLoader:
def __init__(self, config_path):
"""
DataLoader class for loading movie and cast data.
Args:
config_path (str): Path to the YAML config file. Default is 'config.yaml'.
"""
self.config_path = config_path
self.data_path = None
def get_movie_payload():
movie_payload = {'keywords': {'movie_response': [], 'cols': ['id', 'keywords'], 'results_parsed': []},
'reviews': {'movie_response': [], 'cols': ['id', 'results'], 'results_parsed': []},
'info': {'movie_response': [],
'cols': ['id', 'budget', 'revenue', 'genres', 'production_countries', 'tagline'],
'results_parsed': []}}
return movie_payload
class TMDBCastCrewScraper(TMDBMovieScraper):
def __init__(self, movie_ids: list, popularity_threshold=1, max_threads=30):
self.movie_ids = movie_ids
self.cast_results = []
self.crew_results = []
self.popularity_threshold = popularity_threshold
self.max_threads = max_threads
def _append_data_to_list(self, list_dic_results, popularity_threshold, results, movie_id):
@kitsamho
kitsamho / TMDBMovieScraper.py
Created June 18, 2023 17:59
Class to query data from the TMDB API using tmdbsimple
class TMDBMovieScraper:
def __init__(self, years_check: list):
"""
Initializes the TMDBMovieScraper class with a list of years to iterate over for querying movie data from TMDB.
Args:
years_check (list): List of years to iterate over for querying movie data.
"""
self.discover_api = tmdb.Discover() # Instantiate tmdb.Discover module
self.years_check = years_check # List of years to iterate over
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
def rank_vectors(vector_a: np.ndarray, vectors_n: np.ndarray, labels: pd.Series) -> pd.DataFrame:
"""
Ranks the similarity of a target vector with a list of other vectors, and returns the results in a DataFrame.
Args:
def semantic_search_loop(df_cached_embeddings, model, tokenizer):
"""
This function takes a dataframe of cached embeddings, a model, and a tokenizer, and performs semantic search for
images related to the user's search query.
Args:
df_cached_embeddings (pandas.DataFrame): A dataframe of cached image embeddings
tokeniser: Tokeniser to use for pre-processing the text data
model: model : A pre-trained CLIP model.