Fine tuning LLM with Langchain

import os
from pprint import pprint
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import TokenTextSplitter
from langchain.llms import OpenAI
from langchain.chains import ChatVectorDBChain
from langchain.document_loaders import UnstructuredURLLoader

os.environ[“OPENAI_API_KEY”] = ‘your_open_api_key’

h2o_ai_wave_urls = [
“https://github.com/h2oai/wave/releases”,
“https://wave.h2o.ai/docs/installation”,
“https://wave.h2o.ai/docs/getting-started”,
“https://wave.h2o.ai/docs/examples”,
“https://github.com/h2oai/wave/issues/693”,
“https://github.com/h2oai/wave/blob/master/.github/CONTRIBUTING.md#development-setup”,
“https://github.com/h2oai/wave/discussions/1897”,
“https://github.com/h2oai/wave/discussions/1888”,
“https://github.com/h2oai/wave/discussions/1885”,
“https://github.com/h2oai/wave/discussions/1865”
]

collection_name = “h2o_wave_knowledgebase”
local_directory = “kb-h2o-wave”
persist_directory = os.path.join(os.getcwd(), local_directory)

loader = UnstructuredURLLoader(urls=h2o_ai_wave_urls)
kb_data = loader.load()

text_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=0)
kb_doc = text_splitter.split_documents(kb_data)

embeddings = OpenAIEmbeddings()

kb_db = Chroma.from_documents(kb_doc,
embeddings,
collection_name=collection_name,
persist_directory=persist_directory
)
kb_db.persist()

kb_qa = ChatVectorDBChain.from_llm(
OpenAI(temperature=0, model_name=”gpt-3.5-turbo”),
vectorstore=kb_db,
top_k_docs_for_context=5,
return_source_documents=True
)

chat_history = []
query_statement = “What is this document about?”

—— One question ONLY ——

query_results = kb_qa({“question”: query_statement, “chat_history”: chat_history})

pprint(query_results[‘answer’])

pprint(query_results[‘source_documents’])

—— Question Loop ——

while query_statement != ‘exit’:
query_statement = input(‘Enter your question here: > ‘)
if query_statement != exit:
result = kb_qa({“question”: query_statement, “chat_history”: chat_history})
# pprint(result[“source_documents”])
pprint(result[“answer”])

os.environ[“OPENAI_API_KEY”] = ”
print(“———- Life is good —————-“)

Leave a Comment

Your email address will not be published. Required fields are marked *

Scroll to Top