-
-
Save jeanmidevacc/7192989e9464eb8a519ac37f7209de55 to your computer and use it in GitHub Desktop.
illustration_voyager_usage.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from voyager import Index, Space | |
| # sharing also the chunking function | |
| def chunk_dfp(df, chunk_size): | |
| """Yield successive n-sized chunks from df.""" | |
| for i in range(0, len(df), chunk_size): | |
| yield df.iloc[i:i + chunk_size] | |
| index = Index( | |
| space=Space.Cosine, | |
| num_dimensions=model.get_sentence_embedding_dimension(),# hard coded for the test | |
| )# Initiliazing the index | |
| chunk_size = 1000 | |
| chunks = chunk_dfp(dfp_films_with_decriptions, chunk_size) | |
| for i, dfp_films_with_decriptions_chunk in tqdm(enumerate(chunks), total=len(dfp_films_with_decriptions)//chunk_size + 1, desc=f"Processing films"): | |
| descriptions = dfp_films_with_decriptions_chunk['film_description'].tolist() | |
| embeddings = model.encode_document(descriptions, batch_size=16, normalize_embeddings=normalised, show_progress_bar=False) | |
| index.add_items(embeddings, dfp_films_with_decriptions_chunk.index.tolist()) #add multiple items in the same time | |
| index.save("some_location.voy")# Savingto a local that cna b reloaded later with a load attribute |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment