Skip to content
Snippets Groups Projects
Unverified Commit 6bd5e7b7 authored by Huu Le (Lee)'s avatar Huu Le (Lee) Committed by GitHub
Browse files

using ingestion pipeline for chromadb (#87)

parent 38bc1d13
No related branches found
No related tags found
No related merge requests found
from dotenv import load_dotenv
load_dotenv()
import os
import logging
from llama_index.core.storage import StorageContext
from llama_index.core.indices import VectorStoreIndex
from app.settings import init_settings
from app.engine.loaders import get_documents
from app.engine.vectordb import get_vector_store
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
def generate_datasource():
init_settings()
logger.info("Creating new index")
# load the documents and create the index
documents = get_documents()
store = get_vector_store()
storage_context = StorageContext.from_defaults(vector_store=store)
VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
show_progress=True, # this will show you a progress bar as the embeddings are created
)
logger.info("Successfully created embeddings in the ChromaDB")
if __name__ == "__main__":
generate_datasource()
import logging
from llama_index.core.indices import VectorStoreIndex
from app.engine.vectordb import get_vector_store
logger = logging.getLogger("uvicorn")
def get_index():
logger.info("Connecting to ChromaDB..")
store = get_vector_store()
index = VectorStoreIndex.from_vector_store(store, use_async=False)
logger.info("Finished connecting to ChromaDB.")
return index
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment