import osfrom pprint import pprintfrom langchain.embeddings.openai import OpenAIEmbeddingsfrom langchain.vectorstores import Chromafrom langchain.text_splitter import TokenTextSplitterfrom langchain.llms import OpenAIfrom langchain.chains import ChatVectorDBChainfrom langchain.document_loaders import UnstructuredURLLoader os.environ[“OPENAI_API_KEY”] = ‘your_open_api_key’ h2o_ai_wave_urls = [“https://github.com/h2oai/wave/releases”,“https://wave.h2o.ai/docs/installation”,“https://wave.h2o.ai/docs/getting-started”,“https://wave.h2o.ai/docs/examples”,“https://github.com/h2oai/wave/issues/693”,“https://github.com/h2oai/wave/blob/master/.github/CONTRIBUTING.md#development-setup”,“https://github.com/h2oai/wave/discussions/1897”,“https://github.com/h2oai/wave/discussions/1888”,“https://github.com/h2oai/wave/discussions/1885”,“https://github.com/h2oai/wave/discussions/1865”] collection_name = “h2o_wave_knowledgebase”local_directory = “kb-h2o-wave”persist_directory = os.path.join(os.getcwd(), local_directory) loader = UnstructuredURLLoader(urls=h2o_ai_wave_urls)kb_data = loader.load() text_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=0)kb_doc = text_splitter.split_documents(kb_data) embeddings = OpenAIEmbeddings() kb_db = Chroma.from_documents(kb_doc,embeddings,collection_name=collection_name,persist_directory=persist_directory)kb_db.persist() kb_qa …
Fine tuning LLM with Langchain Read More »