Skip to content

Instantly share code, notes, and snippets.

@kitsamho
Created June 18, 2023 18:02
Show Gist options
  • Select an option

  • Save kitsamho/443f06e5bf4031d9cc01237a8783155c to your computer and use it in GitHub Desktop.

Select an option

Save kitsamho/443f06e5bf4031d9cc01237a8783155c to your computer and use it in GitHub Desktop.
class TMDBCastCrewScraper(TMDBMovieScraper):
def __init__(self, movie_ids: list, popularity_threshold=1, max_threads=30):
self.movie_ids = movie_ids
self.cast_results = []
self.crew_results = []
self.popularity_threshold = popularity_threshold
self.max_threads = max_threads
def _append_data_to_list(self, list_dic_results, popularity_threshold, results, movie_id):
for dic in list_dic_results:
if dic['popularity'] >= self.popularity_threshold:
dic['tmdb_id'] = movie_id
results.append(dic)
def _get_cast_crew_for_movie(self, movie_id):
try:
film_response = self._get_film_response(movie_id)
# crew_dic_list = film_response.credits()['crew']
cast_dic_list = film_response.credits()['cast']
# self._append_data_to_list(crew_dic_list,self.popularity_threshold,self.crew_results,movie_id)
self._append_data_to_list(cast_dic_list, self.popularity_threshold, self.cast_results, movie_id)
except:
pass
def _request_cast_crew_data(self, movie_ids):
with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_threads) as executor:
futures = []
for movie_id in movie_ids:
futures.append(executor.submit(self._get_cast_crew_for_movie, movie_id))
concurrent.futures.wait(futures)
return
def get_cast_crew(self):
self._request_cast_crew_data(self.movie_ids)
self.df = pd.DataFrame(self.cast_results)
return self.df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment