Skip to content
Snippets Groups Projects
Commit 2f3ddda5 authored by Thuc Pham's avatar Thuc Pham Committed by GitHub
Browse files

feat: add pinecone support to create llama (#555)

parent 5309fae6
No related branches found
No related tags found
No related merge requests found
...@@ -4,7 +4,7 @@ export type TemplateType = "simple" | "streaming" | "community" | "llamapack"; ...@@ -4,7 +4,7 @@ export type TemplateType = "simple" | "streaming" | "community" | "llamapack";
export type TemplateFramework = "nextjs" | "express" | "fastapi"; export type TemplateFramework = "nextjs" | "express" | "fastapi";
export type TemplateEngine = "simple" | "context"; export type TemplateEngine = "simple" | "context";
export type TemplateUI = "html" | "shadcn"; export type TemplateUI = "html" | "shadcn";
export type TemplateVectorDB = "none" | "mongo" | "pg"; export type TemplateVectorDB = "none" | "mongo" | "pg" | "pinecone";
export type TemplatePostInstallAction = "none" | "dependencies" | "runApp"; export type TemplatePostInstallAction = "none" | "dependencies" | "runApp";
export type TemplateDataSource = { export type TemplateDataSource = {
type: TemplateDataSourceType; type: TemplateDataSourceType;
......
...@@ -89,6 +89,7 @@ const getVectorDbChoices = (framework: TemplateFramework) => { ...@@ -89,6 +89,7 @@ const getVectorDbChoices = (framework: TemplateFramework) => {
}, },
{ title: "MongoDB", value: "mongo" }, { title: "MongoDB", value: "mongo" },
{ title: "PostgreSQL", value: "pg" }, { title: "PostgreSQL", value: "pg" },
{ title: "Pinecone", value: "pinecone" },
]; ];
const vectordbLang = framework === "fastapi" ? "python" : "typescript"; const vectordbLang = framework === "fastapi" ? "python" : "typescript";
......
DATA_DIR = "data" # directory containing the documents to index
CHUNK_SIZE = 512
CHUNK_OVERLAP = 20
from llama_index import ServiceContext
from app.context import create_base_context
from app.engine.constants import CHUNK_SIZE, CHUNK_OVERLAP
def create_service_context():
base = create_base_context()
return ServiceContext.from_defaults(
llm=base.llm,
embed_model=base.embed_model,
chunk_size=CHUNK_SIZE,
chunk_overlap=CHUNK_OVERLAP,
)
from dotenv import load_dotenv
load_dotenv()
import os
import logging
from llama_index.vector_stores import PineconeVectorStore
from app.engine.constants import DATA_DIR
from app.engine.context import create_service_context
from app.engine.loader import get_documents
from llama_index import (
SimpleDirectoryReader,
VectorStoreIndex,
StorageContext,
)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
def generate_datasource(service_context):
logger.info("Creating new index")
# load the documents and create the index
documents = get_documents()
store = PineconeVectorStore(
api_key=os.environ["PINECONE_API_KEY"],
index_name=os.environ["PINECONE_INDEX_NAME"],
environment=os.environ["PINECONE_ENVIRONMENT"],
)
storage_context = StorageContext.from_defaults(vector_store=store)
VectorStoreIndex.from_documents(
documents,
service_context=service_context,
storage_context=storage_context,
show_progress=True, # this will show you a progress bar as the embeddings are created
)
logger.info(
f"Successfully created embeddings and save to your Pinecone index {os.environ['PINECONE_INDEX_NAME']}"
)
if __name__ == "__main__":
generate_datasource(create_service_context())
import logging
import os
from llama_index import (
VectorStoreIndex,
)
from llama_index.vector_stores import PineconeVectorStore
from app.engine.context import create_service_context
def get_index():
service_context = create_service_context()
logger = logging.getLogger("uvicorn")
logger.info("Connecting to index from Pinecone...")
store = PineconeVectorStore(
api_key=os.environ["PINECONE_API_KEY"],
index_name=os.environ["PINECONE_INDEX_NAME"],
environment=os.environ["PINECONE_ENVIRONMENT"],
)
index = VectorStoreIndex.from_vector_store(store, service_context)
logger.info("Finished connecting to index from Pinecone.")
return index
/* eslint-disable turbo/no-undeclared-env-vars */
import * as dotenv from "dotenv";
import {
PineconeVectorStore,
SimpleDirectoryReader,
VectorStoreIndex,
storageContextFromDefaults,
} from "llamaindex";
import { STORAGE_DIR, checkRequiredEnvVars } from "./shared.mjs";
dotenv.config();
async function loadAndIndex() {
// load objects from storage and convert them into LlamaIndex Document objects
const documents = await new SimpleDirectoryReader().loadData({
directoryPath: STORAGE_DIR,
});
// create vector store
const vectorStore = new PineconeVectorStore();
// create index from all the Documentss and store them in Pinecone
console.log("Start creating embeddings...");
const storageContext = await storageContextFromDefaults({ vectorStore });
await VectorStoreIndex.fromDocuments(documents, { storageContext });
console.log(
"Successfully created embeddings and save to your Pinecone index.",
);
}
(async () => {
checkRequiredEnvVars();
await loadAndIndex();
console.log("Finished generating storage.");
})();
/* eslint-disable turbo/no-undeclared-env-vars */
import {
ContextChatEngine,
LLM,
PineconeVectorStore,
VectorStoreIndex,
serviceContextFromDefaults,
} from "llamaindex";
import { CHUNK_OVERLAP, CHUNK_SIZE, checkRequiredEnvVars } from "./shared.mjs";
async function getDataSource(llm: LLM) {
checkRequiredEnvVars();
const serviceContext = serviceContextFromDefaults({
llm,
chunkSize: CHUNK_SIZE,
chunkOverlap: CHUNK_OVERLAP,
});
const store = new PineconeVectorStore();
return await VectorStoreIndex.fromVectorStore(store, serviceContext);
}
export async function createChatEngine(llm: LLM) {
const index = await getDataSource(llm);
const retriever = index.asRetriever({ similarityTopK: 5 });
return new ContextChatEngine({
chatModel: llm,
retriever,
});
}
export const STORAGE_DIR = "./data";
export const CHUNK_SIZE = 512;
export const CHUNK_OVERLAP = 20;
const REQUIRED_ENV_VARS = ["PINECONE_ENVIRONMENT", "PINECONE_API_KEY"];
export function checkRequiredEnvVars() {
const missingEnvVars = REQUIRED_ENV_VARS.filter((envVar) => {
return !process.env[envVar];
});
if (missingEnvVars.length > 0) {
console.log(
`The following environment variables are required but missing: ${missingEnvVars.join(
", ",
)}`,
);
throw new Error(
`Missing environment variables: ${missingEnvVars.join(", ")}`,
);
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment