from txtai.pipeline import Textractor
textractor = Textractor(backend="docling", headers={"user-agent": "Mozilla/5.0"})
textractor("https://miro.medium.com/v2/resize:fit:720/format:webp/1*HHPVwIrcxYcLRvjDpwLQyQ.png")
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from txtai import Agent | |
| # Define tools | |
| tools = [ | |
| "websearch", # Runs a websearch using default engine | |
| "webview", # Loads a web page | |
| ] | |
| # Define LLM | |
| model = "Qwen/Qwen3-4B-Instruct-2507" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| from txtai.ann import ANNFactory | |
| # Index 10M vectors using llama.cpp style quants | |
| ann = None | |
| for _ in range(1000): | |
| # Generate batch of vectors | |
| batch = np.random.rand(10000, 768).astype(np.float32) | |
| if not ann: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # RAG Quick Start | |
| # Easy to use way to get started with RAG using YOUR data | |
| # | |
| # For a complete application see this: https://github.com/neuml/rag | |
| # | |
| # TxtAI has 70+ example notebooks covering everything the framework provides | |
| # Examples: https://neuml.github.io/txtai/examples | |
| # | |
| # Install TxtAI | |
| # pip install txtai[pipeline-data] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import soundfile as sf | |
| from txtai.pipeline import TextToAudio | |
| # Create and run pipeline (Note that model is CC-BY-NC) | |
| tta = TextToAudio("facebook/musicgen-stereo-medium") | |
| speech, rate = tta("Happy 80s rock and synth for a fun startup company") | |
| # Write to file | |
| sf.write("out.wav", speech.T, rate) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import soundfile as sf | |
| from txtai.pipeline import TextToSpeech | |
| # Build pipeline | |
| tts = TextToSpeech("neuml/kokoro-int8-onnx") | |
| # Generate speech | |
| speech, rate = tts( | |
| """Have you ever considered having a snooty British accent? |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from smolagents import WebSearchTool | |
| from txtai import LLM | |
| def webrag(query): | |
| prompt = f""" | |
| Answer the following question using ONLY the context below. | |
| Query: {query} | |
| Context: {search(query)} | |
| """ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from txtai.pipeline import Textractor | |
| # Docling backend, split text by sections | |
| textractor = Textractor(sections=True, backend="docling") | |
| # BERT Paper | |
| textractor("https://arxiv.org/pdf/1810.04805") | |
| # PDF converted to Markdown, split on Markdown sections | |
| # ['## BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding... |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from datasets import load_dataset | |
| from sklearn.metrics import accuracy_score | |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
| from txtai.pipeline import HFTrainer | |
| def metrics(pred): | |
| labels, preds = pred.label_ids, pred.predictions.argmax(-1) | |
| # Calculate accuracy | |
| return {"accuracy": accuracy_score(labels, preds)} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from txtai import Embeddings | |
| from txtai.pipeline import Textractor | |
| urls = "https://github.com/neuml/txtai" | |
| textractor = Textractor(chunker="semantic") | |
| embeddings = Embeddings(backend="ggml", ggml={"quantize": "q4_0"}) | |
| embeddings.index((url, x) for x in textractor(url)) | |
| embeddings.save("gguf") |
NewerOlder
