davidmezzetti/web2gguf.py

## web2gguf.py
from txtai import Embeddings
from txtai.pipeline import Textractor

urls = "https://github.com/neuml/txtai"
textractor = Textractor(chunker="semantic")

embeddings = Embeddings(backend="ggml", ggml={"quantize": "q4_0"})
embeddings.index((url, x) for x in textractor(url))
embeddings.save("gguf")

# gguf-dump gguf/embeddings
#
# * File is LITTLE endian, script is running on a LITTLE endian host.
# * Dumping 3 key/value pair(s)
#       1: UINT32     |        1 | GGUF.version = 3
#       2: UINT64     |        1 | GGUF.tensor_count = 1
#       3: UINT64     |        1 | GGUF.kv_count = 0
# * Dumping 1 tensor(s)
#       1:      11136 |   384,    29,     1,     1 | Q4_0    | data
	from txtai import Embeddings
	from txtai.pipeline import Textractor

	urls = "https://github.com/neuml/txtai"
	textractor = Textractor(chunker="semantic")

	embeddings = Embeddings(backend="ggml", ggml={"quantize": "q4_0"})
	embeddings.index((url, x) for x in textractor(url))
	embeddings.save("gguf")

	# gguf-dump gguf/embeddings
	#
	# * File is LITTLE endian, script is running on a LITTLE endian host.
	# * Dumping 3 key/value pair(s)
	# 1: UINT32 \| 1 \| GGUF.version = 3
	# 2: UINT64 \| 1 \| GGUF.tensor_count = 1
	# 3: UINT64 \| 1 \| GGUF.kv_count = 0
	# * Dumping 1 tensor(s)
	# 1: 11136 \| 384, 29, 1, 1 \| Q4_0 \| data
No results found