Skip to content

Instantly share code, notes, and snippets.

@CurtisNewbie
Last active June 26, 2024 09:23
Show Gist options
  • Select an option

  • Save CurtisNewbie/4037a5c0c924b51ddcf4aa5c99f8590b to your computer and use it in GitHub Desktop.

Select an option

Save CurtisNewbie/4037a5c0c924b51ddcf4aa5c99f8590b to your computer and use it in GitHub Desktop.
from langchain_core.prompts import PromptTemplate
import re
import traceback
import sys
import readline
from langchain_huggingface import HuggingFacePipeline
# template = """Question: {question}
# Answer: Let's think step by step."""
max_new_tokens=300
task="text-generation"
model="TinyLlama/TinyLlama-1.1B-Chat-v1.0"
hf = HuggingFacePipeline.from_model_id(
model_id=model,
task=task,
pipeline_kwargs={
"max_new_tokens": max_new_tokens,
},
model_kwargs={
"temperature": 0.7,
"top_k": 50,
"top_p": 0.95,
"do_sample": True,
},
)
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_core.runnables import RunnablePassthrough
# load the document and split it into chunks
# e.g.,
#
# onecafe is a company found by yongjie.zhuang.
#
# onecafe sells coffee.
files = ["about.txt"]
documents = []
for f in files:
documents.extend(TextLoader(f).load())
# split it into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
print(">> docs", docs)
# create the open-source embedding function
embed = SentenceTransformerEmbeddings(model_name=model)
# load it into Chroma
vec = Chroma.from_documents(docs, embed)
reti = vec.as_retriever(search_kwargs={"k": 10}) # default: k is 4
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
template = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question}
Context: {context}
Answer:"""
ans_pat = "^.*Answer: *(.*)$"
prompt = PromptTemplate.from_template(template)
chain = (
{"context": reti | format_docs, "question": RunnablePassthrough()}
| prompt
| hf.bind()
)
print("\n\n")
while True:
try:
print("Enter your question:")
q = None
while not q: q = sys.stdin.readline().strip()
resp = chain.invoke(q)
m = re.search(ans_pat, resp, re.DOTALL)
ans = resp
if m: ans = m[1]
# print(f"\n\n>>>> resp: '{resp}', m: {m}\n")
print(f"\n\n> AI: '{ans}'\n")
except InterruptedError:
sys.exit()
except Exception as e:
print("Exception caught", e)
traceback.print_exc()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment