Skip to content
Snippets Groups Projects
Commit 446dc85b authored by Marcus Schiesser's avatar Marcus Schiesser
Browse files

fix: usage of transformers.js as CJS

parent 4aa2c226
Branches
Tags
No related merge requests found
...@@ -19,12 +19,12 @@ async function main() { ...@@ -19,12 +19,12 @@ async function main() {
const sim1 = similarity( const sim1 = similarity(
textEmbedding1, textEmbedding1,
imageEmbedding, imageEmbedding,
SimilarityType.COSINE, SimilarityType.DEFAULT,
); );
const sim2 = similarity( const sim2 = similarity(
textEmbedding2, textEmbedding2,
imageEmbedding, imageEmbedding,
SimilarityType.COSINE, SimilarityType.DEFAULT,
); );
console.log(`Similarity between "${text1}" and the image is ${sim1}`); console.log(`Similarity between "${text1}" and the image is ${sim1}`);
......
...@@ -2,12 +2,16 @@ ...@@ -2,12 +2,16 @@
"version": "0.0.1", "version": "0.0.1",
"private": true, "private": true,
"name": "clip-test", "name": "clip-test",
"type": "module",
"dependencies": { "dependencies": {
"dotenv": "^16.3.1", "dotenv": "^16.3.1",
"llamaindex": "workspace:*" "llamaindex": "workspace:*"
}, },
"devDependencies": {
"@types/node": "^18",
"ts-node": "^10.9.1"
},
"scripts": { "scripts": {
"lint": "eslint ." "lint": "eslint .",
"start": "ts-node ./clip_test.ts"
} }
} }
\ No newline at end of file
import { ClientOptions as OpenAIClientOptions } from "openai";
import {
AutoProcessor,
AutoTokenizer,
CLIPTextModelWithProjection,
CLIPVisionModelWithProjection,
RawImage,
} from "@xenova/transformers";
import _ from "lodash"; import _ from "lodash";
import { ClientOptions as OpenAIClientOptions } from "openai";
import { DEFAULT_SIMILARITY_TOP_K } from "./constants"; import { DEFAULT_SIMILARITY_TOP_K } from "./constants";
import { import {
AzureOpenAIConfig, AzureOpenAIConfig,
...@@ -308,6 +300,7 @@ export class OpenAIEmbedding extends BaseEmbedding { ...@@ -308,6 +300,7 @@ export class OpenAIEmbedding extends BaseEmbedding {
export type ImageType = string | Blob | URL; export type ImageType = string | Blob | URL;
async function readImage(input: ImageType) { async function readImage(input: ImageType) {
const { RawImage } = await import("@xenova/transformers");
if (input instanceof Blob) { if (input instanceof Blob) {
return await RawImage.fromBlob(input); return await RawImage.fromBlob(input);
} else if (_.isString(input) || input instanceof URL) { } else if (_.isString(input) || input instanceof URL) {
...@@ -320,7 +313,7 @@ async function readImage(input: ImageType) { ...@@ -320,7 +313,7 @@ async function readImage(input: ImageType) {
/* /*
* Base class for Multi Modal embeddings. * Base class for Multi Modal embeddings.
*/ */
abstract class MultiModalEmbedding extends BaseEmbedding { export abstract class MultiModalEmbedding extends BaseEmbedding {
abstract getImageEmbedding(images: ImageType): Promise<number[]>; abstract getImageEmbedding(images: ImageType): Promise<number[]>;
async getImageEmbeddings(images: ImageType[]): Promise<number[][]> { async getImageEmbeddings(images: ImageType[]): Promise<number[][]> {
...@@ -347,6 +340,7 @@ export class ClipEmbedding extends MultiModalEmbedding { ...@@ -347,6 +340,7 @@ export class ClipEmbedding extends MultiModalEmbedding {
async getTokenizer() { async getTokenizer() {
if (!this.tokenizer) { if (!this.tokenizer) {
const { AutoTokenizer } = await import("@xenova/transformers");
this.tokenizer = await AutoTokenizer.from_pretrained(this.modelType); this.tokenizer = await AutoTokenizer.from_pretrained(this.modelType);
} }
return this.tokenizer; return this.tokenizer;
...@@ -354,6 +348,7 @@ export class ClipEmbedding extends MultiModalEmbedding { ...@@ -354,6 +348,7 @@ export class ClipEmbedding extends MultiModalEmbedding {
async getProcessor() { async getProcessor() {
if (!this.processor) { if (!this.processor) {
const { AutoProcessor } = await import("@xenova/transformers");
this.processor = await AutoProcessor.from_pretrained(this.modelType); this.processor = await AutoProcessor.from_pretrained(this.modelType);
} }
return this.processor; return this.processor;
...@@ -361,6 +356,9 @@ export class ClipEmbedding extends MultiModalEmbedding { ...@@ -361,6 +356,9 @@ export class ClipEmbedding extends MultiModalEmbedding {
async getVisionModel() { async getVisionModel() {
if (!this.visionModel) { if (!this.visionModel) {
const { CLIPVisionModelWithProjection } = await import(
"@xenova/transformers"
);
this.visionModel = await CLIPVisionModelWithProjection.from_pretrained( this.visionModel = await CLIPVisionModelWithProjection.from_pretrained(
this.modelType, this.modelType,
); );
...@@ -371,6 +369,9 @@ export class ClipEmbedding extends MultiModalEmbedding { ...@@ -371,6 +369,9 @@ export class ClipEmbedding extends MultiModalEmbedding {
async getTextModel() { async getTextModel() {
if (!this.textModel) { if (!this.textModel) {
const { CLIPTextModelWithProjection } = await import(
"@xenova/transformers"
);
this.textModel = await CLIPTextModelWithProjection.from_pretrained( this.textModel = await CLIPTextModelWithProjection.from_pretrained(
this.modelType, this.modelType,
); );
......
...@@ -57,6 +57,13 @@ importers: ...@@ -57,6 +57,13 @@ importers:
llamaindex: llamaindex:
specifier: workspace:* specifier: workspace:*
version: link:../../packages/core version: link:../../packages/core
devDependencies:
'@types/node':
specifier: ^18
version: 18.18.8
ts-node:
specifier: ^10.9.1
version: 10.9.1(@types/node@18.18.8)(typescript@5.2.2)
   
apps/docs: apps/docs:
dependencies: dependencies:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment