Skip to content
Snippets Groups Projects
Unverified Commit 90027a7b authored by Thuc Pham's avatar Thuc Pham Committed by GitHub
Browse files

fix: enable split long sentence by default (#568)

parent aab56faf
No related branches found
No related tags found
No related merge requests found
---
"llamaindex": patch
---
Add splitLongSentences option to SimpleNodeParser
import {
Document,
SimpleNodeParser,
VectorStoreIndex,
serviceContextFromDefaults,
} from "llamaindex";
export const STORAGE_DIR = "./data";
(async () => {
// create service context that is splitting sentences longer than CHUNK_SIZE
const serviceContext = serviceContextFromDefaults({
nodeParser: new SimpleNodeParser({
chunkSize: 512,
chunkOverlap: 20,
splitLongSentences: true,
}),
});
// generate a document with a very long sentence (9000 words long)
const longSentence = "is ".repeat(9000) + ".";
const document = new Document({ text: longSentence, id_: "1" });
await VectorStoreIndex.fromDocuments([document], {
serviceContext,
});
})();
...@@ -27,12 +27,14 @@ export class SimpleNodeParser implements NodeParser { ...@@ -27,12 +27,14 @@ export class SimpleNodeParser implements NodeParser {
includePrevNextRel?: boolean; includePrevNextRel?: boolean;
chunkSize?: number; chunkSize?: number;
chunkOverlap?: number; chunkOverlap?: number;
splitLongSentences?: boolean;
}) { }) {
this.textSplitter = this.textSplitter =
init?.textSplitter ?? init?.textSplitter ??
new SentenceSplitter({ new SentenceSplitter({
chunkSize: init?.chunkSize ?? DEFAULT_CHUNK_SIZE, chunkSize: init?.chunkSize ?? DEFAULT_CHUNK_SIZE,
chunkOverlap: init?.chunkOverlap ?? DEFAULT_CHUNK_OVERLAP, chunkOverlap: init?.chunkOverlap ?? DEFAULT_CHUNK_OVERLAP,
splitLongSentences: init?.splitLongSentences ?? false,
}); });
this.includeMetadata = init?.includeMetadata ?? true; this.includeMetadata = init?.includeMetadata ?? true;
this.includePrevNextRel = init?.includePrevNextRel ?? true; this.includePrevNextRel = init?.includePrevNextRel ?? true;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment