Skip to content
Snippets Groups Projects
Unverified Commit 0cf6386e authored by Thuc Pham's avatar Thuc Pham Committed by GitHub
Browse files

feat: use setting config (#38)

parent a6e76e1c
No related branches found
No related tags found
No related merge requests found
Showing
with 45 additions and 125 deletions
import { import {
BaseTool, BaseTool,
OpenAI,
OpenAIAgent, OpenAIAgent,
QueryEngineTool, QueryEngineTool,
Settings,
ToolFactory, ToolFactory,
} from "llamaindex"; } from "llamaindex";
import fs from "node:fs/promises"; import fs from "node:fs/promises";
...@@ -10,12 +10,12 @@ import path from "node:path"; ...@@ -10,12 +10,12 @@ import path from "node:path";
import { getDataSource } from "./index"; import { getDataSource } from "./index";
import { STORAGE_CACHE_DIR } from "./shared"; import { STORAGE_CACHE_DIR } from "./shared";
export async function createChatEngine(llm: OpenAI) { export async function createChatEngine() {
let tools: BaseTool[] = []; let tools: BaseTool[] = [];
// Add a query engine tool if we have a data source // Add a query engine tool if we have a data source
// Delete this code if you don't have a data source // Delete this code if you don't have a data source
const index = await getDataSource(llm); const index = await getDataSource();
if (index) { if (index) {
tools.push( tools.push(
new QueryEngineTool({ new QueryEngineTool({
...@@ -38,7 +38,7 @@ export async function createChatEngine(llm: OpenAI) { ...@@ -38,7 +38,7 @@ export async function createChatEngine(llm: OpenAI) {
return new OpenAIAgent({ return new OpenAIAgent({
tools, tools,
llm, llm: Settings.llm,
verbose: true, verbose: true,
}); });
} }
import { ContextChatEngine, LLM } from "llamaindex"; import { ContextChatEngine, Settings } from "llamaindex";
import { getDataSource } from "./index"; import { getDataSource } from "./index";
export async function createChatEngine(llm: LLM) { export async function createChatEngine() {
const index = await getDataSource(llm); const index = await getDataSource();
if (!index) { if (!index) {
throw new Error( throw new Error(
`StorageContext is empty - call 'npm run generate' to generate the storage first`, `StorageContext is empty - call 'npm run generate' to generate the storage first`,
...@@ -12,7 +12,7 @@ export async function createChatEngine(llm: LLM) { ...@@ -12,7 +12,7 @@ export async function createChatEngine(llm: LLM) {
retriever.similarityTopK = 3; retriever.similarityTopK = 3;
return new ContextChatEngine({ return new ContextChatEngine({
chatModel: llm, chatModel: Settings.llm,
retriever, retriever,
}); });
} }
...@@ -6,6 +6,7 @@ import { ...@@ -6,6 +6,7 @@ import {
storageContextFromDefaults, storageContextFromDefaults,
} from "llamaindex"; } from "llamaindex";
import { getDocuments } from "./loader"; import { getDocuments } from "./loader";
import { initSettings } from "./settings";
import { checkRequiredEnvVars } from "./shared"; import { checkRequiredEnvVars } from "./shared";
dotenv.config(); dotenv.config();
...@@ -33,6 +34,7 @@ async function loadAndIndex() { ...@@ -33,6 +34,7 @@ async function loadAndIndex() {
(async () => { (async () => {
checkRequiredEnvVars(); checkRequiredEnvVars();
initSettings();
await loadAndIndex(); await loadAndIndex();
console.log("Finished generating storage."); console.log("Finished generating storage.");
})(); })();
/* eslint-disable turbo/no-undeclared-env-vars */ /* eslint-disable turbo/no-undeclared-env-vars */
import { import { AstraDBVectorStore, VectorStoreIndex } from "llamaindex";
AstraDBVectorStore, import { checkRequiredEnvVars } from "./shared";
LLM,
VectorStoreIndex,
serviceContextFromDefaults,
} from "llamaindex";
import { CHUNK_OVERLAP, CHUNK_SIZE, checkRequiredEnvVars } from "./shared";
export async function getDataSource(llm: LLM) { export async function getDataSource() {
checkRequiredEnvVars(); checkRequiredEnvVars();
const serviceContext = serviceContextFromDefaults({
llm,
chunkSize: CHUNK_SIZE,
chunkOverlap: CHUNK_OVERLAP,
});
const store = new AstraDBVectorStore(); const store = new AstraDBVectorStore();
await store.connect(process.env.ASTRA_DB_COLLECTION!); await store.connect(process.env.ASTRA_DB_COLLECTION!);
return await VectorStoreIndex.fromVectorStore(store, serviceContext); return await VectorStoreIndex.fromVectorStore(store);
} }
export const CHUNK_SIZE = 512;
export const CHUNK_OVERLAP = 20;
const REQUIRED_ENV_VARS = [ const REQUIRED_ENV_VARS = [
"ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_APPLICATION_TOKEN",
"ASTRA_DB_ENDPOINT", "ASTRA_DB_ENDPOINT",
......
...@@ -6,6 +6,7 @@ import { ...@@ -6,6 +6,7 @@ import {
storageContextFromDefaults, storageContextFromDefaults,
} from "llamaindex"; } from "llamaindex";
import { getDocuments } from "./loader"; import { getDocuments } from "./loader";
import { initSettings } from "./settings";
import { checkRequiredEnvVars, getMilvusClient } from "./shared"; import { checkRequiredEnvVars, getMilvusClient } from "./shared";
dotenv.config(); dotenv.config();
...@@ -32,6 +33,7 @@ async function loadAndIndex() { ...@@ -32,6 +33,7 @@ async function loadAndIndex() {
(async () => { (async () => {
checkRequiredEnvVars(); checkRequiredEnvVars();
initSettings();
await loadAndIndex(); await loadAndIndex();
console.log("Finished generating storage."); console.log("Finished generating storage.");
})(); })();
import { import { MilvusVectorStore, VectorStoreIndex } from "llamaindex";
LLM, import { checkRequiredEnvVars, getMilvusClient } from "./shared";
MilvusVectorStore,
serviceContextFromDefaults,
VectorStoreIndex,
} from "llamaindex";
import {
checkRequiredEnvVars,
CHUNK_OVERLAP,
CHUNK_SIZE,
getMilvusClient,
} from "./shared";
export async function getDataSource(llm: LLM) { export async function getDataSource() {
checkRequiredEnvVars(); checkRequiredEnvVars();
const serviceContext = serviceContextFromDefaults({
llm,
chunkSize: CHUNK_SIZE,
chunkOverlap: CHUNK_OVERLAP,
});
const milvusClient = getMilvusClient(); const milvusClient = getMilvusClient();
const store = new MilvusVectorStore({ milvusClient }); const store = new MilvusVectorStore({ milvusClient });
return await VectorStoreIndex.fromVectorStore(store, serviceContext); return await VectorStoreIndex.fromVectorStore(store);
} }
import { MilvusClient } from "@zilliz/milvus2-sdk-node"; import { MilvusClient } from "@zilliz/milvus2-sdk-node";
export const CHUNK_SIZE = 512;
export const CHUNK_OVERLAP = 20;
const REQUIRED_ENV_VARS = [ const REQUIRED_ENV_VARS = [
"MILVUS_ADDRESS", "MILVUS_ADDRESS",
"MILVUS_USERNAME", "MILVUS_USERNAME",
......
...@@ -7,6 +7,7 @@ import { ...@@ -7,6 +7,7 @@ import {
} from "llamaindex"; } from "llamaindex";
import { MongoClient } from "mongodb"; import { MongoClient } from "mongodb";
import { getDocuments } from "./loader"; import { getDocuments } from "./loader";
import { initSettings } from "./settings";
import { checkRequiredEnvVars } from "./shared"; import { checkRequiredEnvVars } from "./shared";
dotenv.config(); dotenv.config();
...@@ -42,6 +43,7 @@ async function loadAndIndex() { ...@@ -42,6 +43,7 @@ async function loadAndIndex() {
(async () => { (async () => {
checkRequiredEnvVars(); checkRequiredEnvVars();
initSettings();
await loadAndIndex(); await loadAndIndex();
console.log("Finished generating storage."); console.log("Finished generating storage.");
})(); })();
/* eslint-disable turbo/no-undeclared-env-vars */ /* eslint-disable turbo/no-undeclared-env-vars */
import { import { MongoDBAtlasVectorSearch, VectorStoreIndex } from "llamaindex";
LLM,
MongoDBAtlasVectorSearch,
serviceContextFromDefaults,
VectorStoreIndex,
} from "llamaindex";
import { MongoClient } from "mongodb"; import { MongoClient } from "mongodb";
import { checkRequiredEnvVars, CHUNK_OVERLAP, CHUNK_SIZE } from "./shared"; import { checkRequiredEnvVars } from "./shared";
export async function getDataSource(llm: LLM) { export async function getDataSource() {
checkRequiredEnvVars(); checkRequiredEnvVars();
const client = new MongoClient(process.env.MONGO_URI!); const client = new MongoClient(process.env.MONGO_URI!);
const serviceContext = serviceContextFromDefaults({
llm,
chunkSize: CHUNK_SIZE,
chunkOverlap: CHUNK_OVERLAP,
});
const store = new MongoDBAtlasVectorSearch({ const store = new MongoDBAtlasVectorSearch({
mongodbClient: client, mongodbClient: client,
dbName: process.env.MONGODB_DATABASE!, dbName: process.env.MONGODB_DATABASE!,
...@@ -23,5 +13,5 @@ export async function getDataSource(llm: LLM) { ...@@ -23,5 +13,5 @@ export async function getDataSource(llm: LLM) {
indexName: process.env.MONGODB_VECTOR_INDEX, indexName: process.env.MONGODB_VECTOR_INDEX,
}); });
return await VectorStoreIndex.fromVectorStore(store, serviceContext); return await VectorStoreIndex.fromVectorStore(store);
} }
export const CHUNK_SIZE = 512;
export const CHUNK_OVERLAP = 20;
const REQUIRED_ENV_VARS = [ const REQUIRED_ENV_VARS = [
"MONGO_URI", "MONGO_URI",
"MONGODB_DATABASE", "MONGODB_DATABASE",
......
import { import { VectorStoreIndex, storageContextFromDefaults } from "llamaindex";
ServiceContext,
serviceContextFromDefaults,
storageContextFromDefaults,
VectorStoreIndex,
} from "llamaindex";
import * as dotenv from "dotenv"; import * as dotenv from "dotenv";
import { getDocuments } from "./loader"; import { getDocuments } from "./loader";
import { CHUNK_OVERLAP, CHUNK_SIZE, STORAGE_CACHE_DIR } from "./shared"; import { initSettings } from "./settings";
import { STORAGE_CACHE_DIR } from "./shared";
// Load environment variables from local .env file // Load environment variables from local .env file
dotenv.config(); dotenv.config();
...@@ -20,7 +16,7 @@ async function getRuntime(func: any) { ...@@ -20,7 +16,7 @@ async function getRuntime(func: any) {
return end - start; return end - start;
} }
async function generateDatasource(serviceContext: ServiceContext) { async function generateDatasource() {
console.log(`Generating storage context...`); console.log(`Generating storage context...`);
// Split documents, create embeddings and store them in the storage context // Split documents, create embeddings and store them in the storage context
const ms = await getRuntime(async () => { const ms = await getRuntime(async () => {
...@@ -30,18 +26,13 @@ async function generateDatasource(serviceContext: ServiceContext) { ...@@ -30,18 +26,13 @@ async function generateDatasource(serviceContext: ServiceContext) {
const documents = await getDocuments(); const documents = await getDocuments();
await VectorStoreIndex.fromDocuments(documents, { await VectorStoreIndex.fromDocuments(documents, {
storageContext, storageContext,
serviceContext,
}); });
}); });
console.log(`Storage context successfully generated in ${ms / 1000}s.`); console.log(`Storage context successfully generated in ${ms / 1000}s.`);
} }
(async () => { (async () => {
const serviceContext = serviceContextFromDefaults({ initSettings();
chunkSize: CHUNK_SIZE, await generateDatasource();
chunkOverlap: CHUNK_OVERLAP,
});
await generateDatasource(serviceContext);
console.log("Finished generating storage."); console.log("Finished generating storage.");
})(); })();
import { import {
LLM,
serviceContextFromDefaults,
SimpleDocumentStore, SimpleDocumentStore,
storageContextFromDefaults, storageContextFromDefaults,
VectorStoreIndex, VectorStoreIndex,
} from "llamaindex"; } from "llamaindex";
import { CHUNK_OVERLAP, CHUNK_SIZE, STORAGE_CACHE_DIR } from "./shared"; import { STORAGE_CACHE_DIR } from "./shared";
export async function getDataSource(llm: LLM) { export async function getDataSource() {
const serviceContext = serviceContextFromDefaults({
llm,
chunkSize: CHUNK_SIZE,
chunkOverlap: CHUNK_OVERLAP,
});
const storageContext = await storageContextFromDefaults({ const storageContext = await storageContextFromDefaults({
persistDir: `${STORAGE_CACHE_DIR}`, persistDir: `${STORAGE_CACHE_DIR}`,
}); });
...@@ -25,6 +18,5 @@ export async function getDataSource(llm: LLM) { ...@@ -25,6 +18,5 @@ export async function getDataSource(llm: LLM) {
} }
return await VectorStoreIndex.init({ return await VectorStoreIndex.init({
storageContext, storageContext,
serviceContext,
}); });
} }
export const STORAGE_CACHE_DIR = "./cache"; export const STORAGE_CACHE_DIR = "./cache";
export const CHUNK_SIZE = 512;
export const CHUNK_OVERLAP = 20;
...@@ -6,6 +6,7 @@ import { ...@@ -6,6 +6,7 @@ import {
storageContextFromDefaults, storageContextFromDefaults,
} from "llamaindex"; } from "llamaindex";
import { getDocuments } from "./loader"; import { getDocuments } from "./loader";
import { initSettings } from "./settings";
import { import {
PGVECTOR_COLLECTION, PGVECTOR_COLLECTION,
PGVECTOR_SCHEMA, PGVECTOR_SCHEMA,
...@@ -37,6 +38,7 @@ async function loadAndIndex() { ...@@ -37,6 +38,7 @@ async function loadAndIndex() {
(async () => { (async () => {
checkRequiredEnvVars(); checkRequiredEnvVars();
initSettings();
await loadAndIndex(); await loadAndIndex();
console.log("Finished generating storage."); console.log("Finished generating storage.");
process.exit(0); process.exit(0);
......
/* eslint-disable turbo/no-undeclared-env-vars */ /* eslint-disable turbo/no-undeclared-env-vars */
import { PGVectorStore, VectorStoreIndex } from "llamaindex";
import { import {
LLM,
PGVectorStore,
VectorStoreIndex,
serviceContextFromDefaults,
} from "llamaindex";
import {
CHUNK_OVERLAP,
CHUNK_SIZE,
PGVECTOR_SCHEMA, PGVECTOR_SCHEMA,
PGVECTOR_TABLE, PGVECTOR_TABLE,
checkRequiredEnvVars, checkRequiredEnvVars,
} from "./shared"; } from "./shared";
export async function getDataSource(llm: LLM) { export async function getDataSource() {
checkRequiredEnvVars(); checkRequiredEnvVars();
const pgvs = new PGVectorStore({ const pgvs = new PGVectorStore({
connectionString: process.env.PG_CONNECTION_STRING, connectionString: process.env.PG_CONNECTION_STRING,
schemaName: PGVECTOR_SCHEMA, schemaName: PGVECTOR_SCHEMA,
tableName: PGVECTOR_TABLE, tableName: PGVECTOR_TABLE,
}); });
const serviceContext = serviceContextFromDefaults({ return await VectorStoreIndex.fromVectorStore(pgvs);
llm,
chunkSize: CHUNK_SIZE,
chunkOverlap: CHUNK_OVERLAP,
});
return await VectorStoreIndex.fromVectorStore(pgvs, serviceContext);
} }
export const PGVECTOR_COLLECTION = "data"; export const PGVECTOR_COLLECTION = "data";
export const CHUNK_SIZE = 512;
export const CHUNK_OVERLAP = 20;
export const PGVECTOR_SCHEMA = "public"; export const PGVECTOR_SCHEMA = "public";
export const PGVECTOR_TABLE = "llamaindex_embedding"; export const PGVECTOR_TABLE = "llamaindex_embedding";
......
...@@ -6,6 +6,7 @@ import { ...@@ -6,6 +6,7 @@ import {
storageContextFromDefaults, storageContextFromDefaults,
} from "llamaindex"; } from "llamaindex";
import { getDocuments } from "./loader"; import { getDocuments } from "./loader";
import { initSettings } from "./settings";
import { checkRequiredEnvVars } from "./shared"; import { checkRequiredEnvVars } from "./shared";
dotenv.config(); dotenv.config();
...@@ -28,6 +29,7 @@ async function loadAndIndex() { ...@@ -28,6 +29,7 @@ async function loadAndIndex() {
(async () => { (async () => {
checkRequiredEnvVars(); checkRequiredEnvVars();
initSettings();
await loadAndIndex(); await loadAndIndex();
console.log("Finished generating storage."); console.log("Finished generating storage.");
})(); })();
/* eslint-disable turbo/no-undeclared-env-vars */ /* eslint-disable turbo/no-undeclared-env-vars */
import { import { PineconeVectorStore, VectorStoreIndex } from "llamaindex";
LLM, import { checkRequiredEnvVars } from "./shared";
PineconeVectorStore,
VectorStoreIndex,
serviceContextFromDefaults,
} from "llamaindex";
import { CHUNK_OVERLAP, CHUNK_SIZE, checkRequiredEnvVars } from "./shared";
export async function getDataSource(llm: LLM) { export async function getDataSource() {
checkRequiredEnvVars(); checkRequiredEnvVars();
const serviceContext = serviceContextFromDefaults({
llm,
chunkSize: CHUNK_SIZE,
chunkOverlap: CHUNK_OVERLAP,
});
const store = new PineconeVectorStore(); const store = new PineconeVectorStore();
return await VectorStoreIndex.fromVectorStore(store, serviceContext); return await VectorStoreIndex.fromVectorStore(store);
} }
export const CHUNK_SIZE = 512;
export const CHUNK_OVERLAP = 20;
const REQUIRED_ENV_VARS = ["PINECONE_ENVIRONMENT", "PINECONE_API_KEY"]; const REQUIRED_ENV_VARS = ["PINECONE_ENVIRONMENT", "PINECONE_API_KEY"];
export function checkRequiredEnvVars() { export function checkRequiredEnvVars() {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment