Skip to content
Snippets Groups Projects
Unverified Commit 90027a7b authored by Thuc Pham's avatar Thuc Pham Committed by GitHub
Browse files

fix: enable split long sentence by default (#568)

parent aab56faf
Branches
Tags
No related merge requests found
---
"llamaindex": patch
---
Add splitLongSentences option to SimpleNodeParser
import {
Document,
SimpleNodeParser,
VectorStoreIndex,
serviceContextFromDefaults,
} from "llamaindex";
export const STORAGE_DIR = "./data";
(async () => {
// create service context that is splitting sentences longer than CHUNK_SIZE
const serviceContext = serviceContextFromDefaults({
nodeParser: new SimpleNodeParser({
chunkSize: 512,
chunkOverlap: 20,
splitLongSentences: true,
}),
});
// generate a document with a very long sentence (9000 words long)
const longSentence = "is ".repeat(9000) + ".";
const document = new Document({ text: longSentence, id_: "1" });
await VectorStoreIndex.fromDocuments([document], {
serviceContext,
});
})();
......@@ -27,12 +27,14 @@ export class SimpleNodeParser implements NodeParser {
includePrevNextRel?: boolean;
chunkSize?: number;
chunkOverlap?: number;
splitLongSentences?: boolean;
}) {
this.textSplitter =
init?.textSplitter ??
new SentenceSplitter({
chunkSize: init?.chunkSize ?? DEFAULT_CHUNK_SIZE,
chunkOverlap: init?.chunkOverlap ?? DEFAULT_CHUNK_OVERLAP,
splitLongSentences: init?.splitLongSentences ?? false,
});
this.includeMetadata = init?.includeMetadata ?? true;
this.includePrevNextRel = init?.includePrevNextRel ?? true;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment