diff --git a/examples/data/multi_modal/1.jpg b/examples/data/multi_modal/1.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..169024b540c591fa85e0d1c24c581dca6f8255b1
Binary files /dev/null and b/examples/data/multi_modal/1.jpg differ
diff --git a/examples/data/multi_modal/2.jpg b/examples/data/multi_modal/2.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0a41cb1c510102bf3b8610275716bb7adac44581
Binary files /dev/null and b/examples/data/multi_modal/2.jpg differ
diff --git a/examples/data/multi_modal/3.jpg b/examples/data/multi_modal/3.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c58d5a1fb48e4e8809804a37263bafa87c9494dc
Binary files /dev/null and b/examples/data/multi_modal/3.jpg differ
diff --git a/examples/data/multi_modal/60.jpg b/examples/data/multi_modal/60.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5cbdea93a792635df763dd778ec36ce9ae5cfc68
Binary files /dev/null and b/examples/data/multi_modal/60.jpg differ
diff --git a/examples/data/multi_modal/61.jpg b/examples/data/multi_modal/61.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d5c9edd53b61cc2b64d53cc96ab2844e34fe4831
Binary files /dev/null and b/examples/data/multi_modal/61.jpg differ
diff --git a/examples/data/multi_modal/62.jpg b/examples/data/multi_modal/62.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..bbb7b2199c7806ceb51ea382da77157ac7f86dc5
Binary files /dev/null and b/examples/data/multi_modal/62.jpg differ
diff --git a/examples/data/multi_modal/San Francisco.txt b/examples/data/multi_modal/San Francisco.txt
new file mode 100644
index 0000000000000000000000000000000000000000..938f45d2bc883b2f267b1dfed163db65e8e52f7c
Binary files /dev/null and b/examples/data/multi_modal/San Francisco.txt differ
diff --git a/examples/data/multi_modal/Vincent van Gogh.txt b/examples/data/multi_modal/Vincent van Gogh.txt
new file mode 100644
index 0000000000000000000000000000000000000000..30b127be017a095afff17a95a5f1f93dfa917968
Binary files /dev/null and b/examples/data/multi_modal/Vincent van Gogh.txt differ
diff --git a/examples/multiModal.ts b/examples/multiModal.ts
index 941b54b065580aa0d23329be677671ee7c4df04e..8a31980d550e30cd0c1ae90dca2ad7bda76d30b0 100644
--- a/examples/multiModal.ts
+++ b/examples/multiModal.ts
@@ -1,9 +1,12 @@
 import {
+  ImageNode,
   serviceContextFromDefaults,
   SimpleDirectoryReader,
   SimpleVectorStore,
+  TextNode,
   VectorStoreIndex,
 } from "llamaindex";
+import * as path from "path";
 
 async function main() {
   // read data into documents
@@ -28,7 +31,17 @@ async function main() {
     "what are Vincent van Gogh's famous paintings",
   );
   for (const result of results) {
-    console.log(result.node);
+    const node = result.node;
+    if (!node) {
+      continue;
+    }
+    if (node instanceof ImageNode) {
+      console.log(`Image: ${path.join(__dirname, node.id_)}`);
+    } else if (node instanceof TextNode) {
+      console.log("Text:", (node as TextNode).text.substring(0, 128));
+    }
+    console.log(`ID: ${node.id_}`);
+    console.log(`Similarity: ${result.score}`);
   }
 }
 
diff --git a/packages/core/src/Node.ts b/packages/core/src/Node.ts
index ede6d92eeb104f4fbf7d46f3acd917bbcbec0e21..67ed91a1c16220a732b7e2fee1a6514a8ab46b9f 100644
--- a/packages/core/src/Node.ts
+++ b/packages/core/src/Node.ts
@@ -14,6 +14,7 @@ export enum ObjectType {
   IMAGE = "IMAGE",
   INDEX = "INDEX",
   DOCUMENT = "DOCUMENT",
+  IMAGE_DOCUMENT = "IMAGE_DOCUMENT",
 }
 
 export enum MetadataMode {
@@ -229,17 +230,6 @@ export class TextNode<T extends Metadata = Metadata> extends BaseNode<T> {
   }
 }
 
-export type ImageType = string | Blob | URL;
-
-export class ImageNode<T extends Metadata = Metadata> extends TextNode<T> {
-  image?: ImageType; // image as blob
-  textEmbedding?: number[]; // Assuming text embedding is an array of numbers
-
-  getType(): ObjectType {
-    return ObjectType.IMAGE;
-  }
-}
-
 export class IndexNode<T extends Metadata = Metadata> extends TextNode<T> {
   indexId: string = "";
 
@@ -288,15 +278,37 @@ export function jsonToNode(json: any, type?: ObjectType) {
       return new IndexNode(json);
     case ObjectType.DOCUMENT:
       return new Document(json);
+    case ObjectType.IMAGE_DOCUMENT:
+      return new ImageDocument(json);
     default:
       throw new Error(`Invalid node type: ${nodeType}`);
   }
 }
 
+export type ImageType = string | Blob | URL;
+
+export type ImageNodeConstructorProps<T extends Metadata> = Pick<
+  ImageNode<T>,
+  "image" | "id_"
+> &
+  Partial<ImageNode<T>>;
+
+export class ImageNode<T extends Metadata = Metadata> extends TextNode<T> {
+  image: ImageType; // image as blob
+
+  constructor(init: ImageNodeConstructorProps<T>) {
+    super(init);
+    this.image = init.image;
+  }
+
+  getType(): ObjectType {
+    return ObjectType.IMAGE;
+  }
+}
+
 export class ImageDocument<T extends Metadata = Metadata> extends ImageNode<T> {
-  constructor(init?: Partial<ImageDocument<T>>) {
+  constructor(init: ImageNodeConstructorProps<T>) {
     super(init);
-    Object.assign(this, init);
 
     if (new.target === ImageDocument) {
       this.hash = this.generateHash();
@@ -304,7 +316,7 @@ export class ImageDocument<T extends Metadata = Metadata> extends ImageNode<T> {
   }
 
   getType() {
-    return ObjectType.DOCUMENT;
+    return ObjectType.IMAGE_DOCUMENT;
   }
 }
 
diff --git a/packages/core/src/NodeParser.ts b/packages/core/src/NodeParser.ts
index f3d064ba5738702f4a11ba12483d6eb8122735d1..d39aae5ae98fd5ef25103850f990439e55ed94b2 100644
--- a/packages/core/src/NodeParser.ts
+++ b/packages/core/src/NodeParser.ts
@@ -1,4 +1,10 @@
-import { Document, NodeRelationship, TextNode } from "./Node";
+import {
+  BaseNode,
+  Document,
+  ImageDocument,
+  NodeRelationship,
+  TextNode,
+} from "./Node";
 import { SentenceSplitter } from "./TextSplitter";
 import { DEFAULT_CHUNK_OVERLAP, DEFAULT_CHUNK_SIZE } from "./constants";
 
@@ -27,12 +33,19 @@ export function getTextSplitsFromDocument(
  * @returns An array of nodes.
  */
 export function getNodesFromDocument(
-  document: Document,
+  doc: BaseNode,
   textSplitter: SentenceSplitter,
   includeMetadata: boolean = true,
   includePrevNextRel: boolean = true,
 ) {
-  let nodes: TextNode[] = [];
+  if (doc instanceof ImageDocument) {
+    return [doc];
+  }
+  if (!(doc instanceof Document)) {
+    throw new Error("Expected either an Image Document or Document");
+  }
+  const document = doc as Document;
+  const nodes: TextNode[] = [];
 
   const textSplits = getTextSplitsFromDocument(document, textSplitter);
 
@@ -62,7 +75,7 @@ export function getNodesFromDocument(
 }
 
 /**
- * A NodeParser generates TextNodes from Documents
+ * A NodeParser generates Nodes from Documents
  */
 export interface NodeParser {
   /**
@@ -70,7 +83,7 @@ export interface NodeParser {
    * @param documents - The documents to generate nodes from.
    * @returns An array of nodes.
    */
-  getNodesFromDocuments(documents: Document[]): TextNode[];
+  getNodesFromDocuments(documents: BaseNode[]): BaseNode[];
 }
 
 /**
@@ -121,7 +134,7 @@ export class SimpleNodeParser implements NodeParser {
    * Generate Node objects from documents
    * @param documents
    */
-  getNodesFromDocuments(documents: Document[]) {
+  getNodesFromDocuments(documents: BaseNode[]) {
     return documents
       .map((document) => getNodesFromDocument(document, this.textSplitter))
       .flat();
diff --git a/packages/core/src/embeddings/MultiModalEmbedding.ts b/packages/core/src/embeddings/MultiModalEmbedding.ts
index 43bb854a4c92a3af321d223026442bfb9082fd01..46d68ec25948db03c0137c35094acb72f6af557d 100644
--- a/packages/core/src/embeddings/MultiModalEmbedding.ts
+++ b/packages/core/src/embeddings/MultiModalEmbedding.ts
@@ -9,7 +9,6 @@ export abstract class MultiModalEmbedding extends BaseEmbedding {
   abstract getImageEmbedding(images: ImageType): Promise<number[]>;
 
   async getImageEmbeddings(images: ImageType[]): Promise<number[][]> {
-    // Embed the input sequence of images asynchronously.
     return Promise.all(
       images.map((imgFilePath) => this.getImageEmbedding(imgFilePath)),
     );
diff --git a/packages/core/src/indices/vectorStore/VectorIndexRetriever.ts b/packages/core/src/indices/vectorStore/VectorIndexRetriever.ts
index fb9c8ee71e965a4cef9ef69b643491323fbf15a1..b24b732ccf17d06b2ba926d4cecd8ab9f8543d05 100644
--- a/packages/core/src/indices/vectorStore/VectorIndexRetriever.ts
+++ b/packages/core/src/indices/vectorStore/VectorIndexRetriever.ts
@@ -41,7 +41,7 @@ export class VectorIndexRetriever implements BaseRetriever {
   ): Promise<NodeWithScore[]> {
     let nodesWithScores = await this.textRetrieve(query, preFilters);
     nodesWithScores = nodesWithScores.concat(
-      await this.imageRetrieve(query, preFilters),
+      await this.textToImageRetrieve(query, preFilters),
     );
     this.sendEvent(query, nodesWithScores, parentEvent);
     return nodesWithScores;
@@ -56,7 +56,7 @@ export class VectorIndexRetriever implements BaseRetriever {
     return this.buildNodeListFromQueryResult(result);
   }
 
-  private async imageRetrieve(query: string, preFilters?: unknown) {
+  private async textToImageRetrieve(query: string, preFilters?: unknown) {
     if (!this.index.imageEmbedModel || !this.index.imageVectorStore) {
       // no-op if image embedding and vector store are not set
       return [];
diff --git a/packages/core/src/indices/vectorStore/VectorStoreIndex.ts b/packages/core/src/indices/vectorStore/VectorStoreIndex.ts
index 99c9101f31ebb4005f76a132b1742560f43f3a26..6721aaa862da17a9bc3019cec421b2f6c8218300 100644
--- a/packages/core/src/indices/vectorStore/VectorStoreIndex.ts
+++ b/packages/core/src/indices/vectorStore/VectorStoreIndex.ts
@@ -1,4 +1,3 @@
-import _ from "lodash";
 import {
   BaseNode,
   Document,
@@ -150,7 +149,6 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
   /**
    * Get the embeddings for nodes.
    * @param nodes
-   * @param serviceContext
    * @param logProgress log progress to console (useful for debugging)
    * @returns
    */
@@ -348,11 +346,6 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
     nodes: ImageNode[],
     logProgress: boolean = false,
   ): Promise<BaseNode[]> {
-    const isImageToText = nodes.every((node) => _.isString(node.text));
-    if (isImageToText) {
-      // every image node has a text, use the text embedding model
-      return this.getNodeEmbeddingResults(nodes, logProgress);
-    }
     if (!this.imageEmbedModel) {
       return [];
     }
@@ -364,9 +357,7 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
       if (logProgress) {
         console.log(`getting embedding for node ${i}/${nodes.length}`);
       }
-      node.embedding = await this.imageEmbedModel.getImageEmbedding(
-        node.getContent(MetadataMode.EMBED),
-      );
+      node.embedding = await this.imageEmbedModel.getImageEmbedding(node.image);
       nodesWithEmbeddings.push(node);
     }
 
@@ -383,8 +374,7 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
     for (let node of nodes) {
       if (node instanceof ImageNode) {
         imageNodes.push(node);
-      }
-      if (node instanceof TextNode) {
+      } else if (node instanceof TextNode) {
         textNodes.push(node);
       }
     }
diff --git a/packages/core/src/readers/ImageReader.ts b/packages/core/src/readers/ImageReader.ts
index be6ec431d69080503b183a4e1e568d379baee3fb..fd1b3969558b7076a3ceaad960eb07f9fe86f42e 100644
--- a/packages/core/src/readers/ImageReader.ts
+++ b/packages/core/src/readers/ImageReader.ts
@@ -18,7 +18,7 @@ export class ImageReader implements BaseReader {
     file: string,
     fs: GenericFileSystem = DEFAULT_FS,
   ): Promise<Document[]> {
-    const dataBuffer = await fs.readFile(file, "utf-8");
+    const dataBuffer = await fs.readFile(file);
     const blob = new Blob([dataBuffer]);
     return [new ImageDocument({ image: blob, id_: file })];
   }