From 8bf5b4acfdb679a3de3b1503815b1a935c23436f Mon Sep 17 00:00:00 2001
From: Alex Yang <himself65@outlook.com>
Date: Tue, 2 Jul 2024 14:48:51 -0700
Subject: [PATCH] fix: llama parse input spreadsheet (#1007)

---
 .changeset/happy-hairs-deny.md                |   5 +
 .../src/readers/LlamaParseReader.ts           | 219 +++++++++---------
 .../tests/readers/fixtures/test.xlsx          | Bin 0 -> 8441 bytes
 .../tests/readers/llama-parser-reader.test.ts |  15 ++
 4 files changed, 129 insertions(+), 110 deletions(-)
 create mode 100644 .changeset/happy-hairs-deny.md
 create mode 100644 packages/llamaindex/tests/readers/fixtures/test.xlsx
 create mode 100644 packages/llamaindex/tests/readers/llama-parser-reader.test.ts

diff --git a/.changeset/happy-hairs-deny.md b/.changeset/happy-hairs-deny.md
new file mode 100644
index 000000000..76e9b70af
--- /dev/null
+++ b/.changeset/happy-hairs-deny.md
@@ -0,0 +1,5 @@
+---
+"llamaindex": patch
+---
+
+fix: llama parse input spreadsheet
diff --git a/packages/llamaindex/src/readers/LlamaParseReader.ts b/packages/llamaindex/src/readers/LlamaParseReader.ts
index 2c4150e87..cc3028027 100644
--- a/packages/llamaindex/src/readers/LlamaParseReader.ts
+++ b/packages/llamaindex/src/readers/LlamaParseReader.ts
@@ -1,105 +1,100 @@
 import { Document } from "@llamaindex/core/schema";
 import { fs, getEnv } from "@llamaindex/env";
-import { filetypemime } from "magic-bytes.js";
+import { filetypeinfo } from "magic-bytes.js";
 import { FileReader, type Language, type ResultType } from "./type.js";
 
-const SupportedFiles: { [key: string]: string } = {
-  ".pdf": "application/pdf",
-  // Documents and Presentations
-  ".602": "application/x-t602",
-  ".abw": "application/x-abiword",
-  ".cgm": "image/cgm",
-  ".cwk": "application/x-cwk",
-  ".doc": "application/msword",
-  ".docx":
-    "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-  ".docm": "application/vnd.ms-word.document.macroEnabled.12",
-  ".dot": "application/msword",
-  ".dotm": "application/vnd.ms-word.template.macroEnabled.12",
-  ".dotx":
-    "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
-  ".hwp": "application/x-hwp",
-  ".key": "application/x-iwork-keynote-sffkey",
-  ".lwp": "application/vnd.lotus-wordpro",
-  ".mw": "application/macwriteii",
-  ".mcw": "application/macwriteii",
-  ".pages": "application/x-iwork-pages-sffpages",
-  ".pbd": "application/x-pagemaker",
-  ".ppt": "application/vnd.ms-powerpoint",
-  ".pptm": "application/vnd.ms-powerpoint.presentation.macroEnabled.12",
-  ".pptx":
-    "application/vnd.openxmlformats-officedocument.presentationml.presentation",
-  ".pot": "application/vnd.ms-powerpoint",
-  ".potm": "application/vnd.ms-powerpoint.template.macroEnabled.12",
-  ".potx":
-    "application/vnd.openxmlformats-officedocument.presentationml.template",
-  ".rtf": "application/rtf",
-  ".sda": "application/vnd.stardivision.draw",
-  ".sdd": "application/vnd.stardivision.impress",
-  ".sdp": "application/sdp",
-  ".sdw": "application/vnd.stardivision.writer",
-  ".sgl": "application/vnd.stardivision.writer",
-  ".sti": "application/vnd.sun.xml.impress.template",
-  ".sxi": "application/vnd.sun.xml.impress",
-  ".sxw": "application/vnd.sun.xml.writer",
-  ".stw": "application/vnd.sun.xml.writer.template",
-  ".sxg": "application/vnd.sun.xml.writer.global",
-  ".txt": "text/plain",
-  ".uof": "application/vnd.uoml+xml",
-  ".uop": "application/vnd.openofficeorg.presentation",
-  ".uot": "application/x-uo",
-  ".vor": "application/vnd.stardivision.writer",
-  ".wpd": "application/wordperfect",
-  ".wps": "application/vnd.ms-works",
-  ".xml": "application/xml",
-  ".zabw": "application/x-abiword",
-  // Images
-  ".epub": "application/epub+zip",
-  ".jpg": "image/jpeg",
-  ".jpeg": "image/jpeg",
-  ".png": "image/png",
-  ".gif": "image/gif",
-  ".bmp": "image/bmp",
-  ".svg": "image/svg+xml",
-  ".tiff": "image/tiff",
-  ".webp": "image/webp",
-  // Web
-  ".htm": "text/html",
-  ".html": "text/html",
-  // Spreadsheets
-  ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-  ".xls": "application/vnd.ms-excel",
-  ".xlsm": "application/vnd.ms-excel.sheet.macroEnabled.12",
-  ".xlsb": "application/vnd.ms-excel.sheet.binary.macroEnabled.12",
-  ".xlw": "application/vnd.ms-excel",
-  ".csv": "text/csv",
-  ".dif": "application/x-dif",
-  ".sylk": "text/vnd.sylk",
-  ".slk": "text/vnd.sylk",
-  ".prn": "application/x-prn",
-  ".numbers": "application/x-iwork-numbers-sffnumbers",
-  ".et": "application/vnd.ms-excel",
-  ".ods": "application/vnd.oasis.opendocument.spreadsheet",
-  ".fods": "application/vnd.oasis.opendocument.spreadsheet",
-  ".uos1": "application/vnd.uoml+xml",
-  ".uos2": "application/vnd.uoml+xml",
-  ".dbf": "application/vnd.dbf",
-  ".wk1": "application/vnd.lotus-1-2-3",
-  ".wk2": "application/vnd.lotus-1-2-3",
-  ".wk3": "application/vnd.lotus-1-2-3",
-  ".wk4": "application/vnd.lotus-1-2-3",
-  ".wks": "application/vnd.lotus-1-2-3",
-  ".123": "application/vnd.lotus-1-2-3",
-  ".wq1": "application/x-lotus",
-  ".wq2": "application/x-lotus",
-  ".wb1": "application/x-quattro-pro",
-  ".wb2": "application/x-quattro-pro",
-  ".wb3": "application/x-quattro-pro",
-  ".qpw": "application/x-quattro-pro",
-  ".xlr": "application/vnd.ms-works",
-  ".eth": "application/ethos",
-  ".tsv": "text/tab-separated-values",
-};
+const SUPPORT_FILE_EXT: string[] = [
+  ".pdf",
+  // document and presentations
+  ".602",
+  ".abw",
+  ".cgm",
+  ".cwk",
+  ".doc",
+  ".docx",
+  ".docm",
+  ".dot",
+  ".dotm",
+  ".hwp",
+  ".key",
+  ".lwp",
+  ".mw",
+  ".mcw",
+  ".pages",
+  ".pbd",
+  ".ppt",
+  ".pptm",
+  ".pptx",
+  ".pot",
+  ".potm",
+  ".potx",
+  ".rtf",
+  ".sda",
+  ".sdd",
+  ".sdp",
+  ".sdw",
+  ".sgl",
+  ".sti",
+  ".sxi",
+  ".sxw",
+  ".stw",
+  ".sxg",
+  ".txt",
+  ".uof",
+  ".uop",
+  ".uot",
+  ".vor",
+  ".wpd",
+  ".wps",
+  ".xml",
+  ".zabw",
+  ".epub",
+  // images
+  ".jpg",
+  ".jpeg",
+  ".png",
+  ".gif",
+  ".bmp",
+  ".svg",
+  ".tiff",
+  ".webp",
+  // web
+  ".htm",
+  ".html",
+  // spreadsheets
+  ".xlsx",
+  ".xls",
+  ".xlsm",
+  ".xlsb",
+  ".xlw",
+  ".csv",
+  ".dif",
+  ".sylk",
+  ".slk",
+  ".prn",
+  ".numbers",
+  ".et",
+  ".ods",
+  ".fods",
+  ".uos1",
+  ".uos2",
+  ".dbf",
+  ".wk1",
+  ".wk2",
+  ".wk3",
+  ".wk4",
+  ".wks",
+  ".123",
+  ".wq1",
+  ".wq2",
+  ".wb1",
+  ".wb2",
+  ".wb3",
+  ".qpw",
+  ".xlr",
+  ".eth",
+  ".tsv",
+];
 
 /**
  * Represents a reader for parsing files using the LlamaParse API.
@@ -165,7 +160,7 @@ export class LlamaParseReader extends FileReader {
     fileName?: string,
   ): Promise<string> {
     // Load data, set the mime type
-    const { mimeType, extension } = await this.getMimeType(data);
+    const { mime, extension } = await LlamaParseReader.getMimeType(data);
 
     if (this.verbose) {
       const name = fileName ? fileName : extension;
@@ -173,7 +168,7 @@ export class LlamaParseReader extends FileReader {
     }
 
     const body = new FormData();
-    body.set("file", new Blob([data], { type: mimeType }), fileName);
+    body.set("file", new Blob([data], { type: mime }), fileName);
 
     const LlamaParseBodyParams = {
       language: this.language,
@@ -378,19 +373,23 @@ export class LlamaParseReader extends FileReader {
     return images;
   }
 
-  private async getMimeType(
+  static async getMimeType(
     data: Uint8Array,
-  ): Promise<{ mimeType: string; extension: string }> {
-    const mimes = filetypemime(data); // Get an array of possible MIME types
-    const extension = Object.keys(SupportedFiles).find(
-      (ext) => SupportedFiles[ext] === mimes[0],
-    ); // Find the extension for the first MIME type
-    if (!extension) {
-      const supportedExtensions = Object.keys(SupportedFiles).join(", ");
+  ): Promise<{ mime: string; extension: string }> {
+    const typeinfos = filetypeinfo(data);
+    // find the first type info that matches the supported MIME types
+    // It could be happened that docx file is recognized as zip file, so we need to check the mime type
+    const info = typeinfos.find((info) => {
+      if (info.extension && SUPPORT_FILE_EXT.includes(`.${info.extension}`)) {
+        return info;
+      }
+    });
+    if (!info || !info.mime || !info.extension) {
+      const ext = SUPPORT_FILE_EXT.join(", ");
       throw new Error(
-        `File has type "${mimes[0]}" which does not match supported MIME Types. Supported formats include: ${supportedExtensions}`,
+        `File has type which does not match supported MIME Types. Supported formats include: ${ext}`,
       );
     }
-    return { mimeType: mimes[0], extension }; // Return the first MIME type and its corresponding extension
+    return { mime: info.mime, extension: info.extension };
   }
 }
diff --git a/packages/llamaindex/tests/readers/fixtures/test.xlsx b/packages/llamaindex/tests/readers/fixtures/test.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..fca20837be21a90ea3fde97b39ecb2a768324a55
GIT binary patch
literal 8441
zcmWIWW@Zs#U}NB5U|>*WSQ32nydNV20|yHOgD?XJQ?zq_UP)?RNqk6UL27ZVUPW$>
z!Xg$XjRg!$45MH~hrpW2r~M8a2)KUt_~ZOUx9jdE2M*J>O~-|HX~-u$@=Us_UwP-!
zKU?AN8yyykW|?nHI)A6w)_C3-vuh&R9V_;(zA>Xef^pq9uie{{=67%ZG*NWMlHf}f
z?4q}sHr@R4Ep5{cwgXL@WW&~YYua!&ma>N_eP@)JYho8Q^M&~~r4v<}(E{HWUu<5n
z*ZV=x%qm{9xqBRx!_`FBOW(R@*_kjw^!)Av^Fj?Jn=Wqkc@nZ_?k`u(?<{^ZE!nJP
zKlJ4o7<#<$|Ht;`v)u}lKy8g@M$5`GPqy8V`1tnmr_;Yx?RAYFTr%%n=lHWKbtRLG
zGt0^i%pTW1>?zXTYW>6R;Q50ZpN#fxn)t6S@<&g<%U`yM`=>{rFiLNjv5oD+t<>|2
zmY=w?zVZuWW|8ur`E?R!?RN91uoz}q@I4Qh-0w2$SND4Ezq^ALu-#DF|M(?80|UeV
z|I7@i*>G#9f$|py1_mD{1_o{h4yO2`)SO~{JrFsX{YG@IgL{6>Bwzo-20U%=J^r+x
z_<r^71OX@Saz~HdiXn9ko_F}S_AXYDzPn#tMj%A0nc2O5Z{43$x9{5YzB-}!?UBiv
zZXpqC<!$9_+ou02EM?bHE1Kq3xaQ#z!LqAguYQ~9?p3K#wzpqv^^O$_y-(g(cDwx1
zWuw4(y>KZ`;iWQdVzx6IqmKDT-b?zij`6Bf@Y}U*8@8J-cs2R_?E34|mc8V)@38cF
zb7`jNiQ=U6o9k*$<S2gqDHR%=VVSuhX<m_)Qe}Kf&VkP^v-b=Ct<m+Hb0SE|MCgan
zdH3V03%=GzZ87<HdT!+rbI!x7PkcDp)Gfc_9wW9)B2YX{w}P30;SdJ{11K9+<mi{@
z7iA~q=VycJsWqXu^KM%R)W#?Lcf7FMDd^f+r#DXFISorVWXx~YYDskVb$BeAz9@;^
z@&8`un?;kVHWpl#GSe(_Yfn$(JN|tBHs4$u!<&T{&-oO!`8IED)u{Y!V7y<aSlgi2
z@8Y&4TQhE+sY#prGxOhb`RCG#^6y(KOc)ltO-oyP#mZ%cw$`-^?|jxoa!;E&fj4x+
z6#f;_-fpMYCHa_|ZZiB9;9QygPQa;>t1V(}#lfJ!FTB}T`CK;5(pqpowB~MahHoKP
zjPujv{oA-Mu3>yM?|O_a|AJ)~MIIh?)>hfOUh$1?Vei_O_3ZXdi5rejK2TB;a+q7w
zYGY73LvPuV{zvK(f}iwQZ$Flq)AmH+<h&j09yU(>Vt3+{bwcI(1eX6sQjD3rQJjjE
z@-D^vcXEpB_B}G)eb@8)g4bWuZ@uukx-9UrU#e>OnmIG=bdI*|y*=Tt)w@dGuqIm@
z(~Ex;o@ZxH$#MN1rR#V?`Oldx8*L9pR5uwciZZ+zbX4e~)`f?5>*BB8X_y<rB$B6d
zW54Zm9zC%a+)1qy#T56xU$=5qPuw+Q;o6r96-P363odF&xuPLDdz)Cn$qUk@mmaVF
zApC1dE1#6(73;L!&*hC7SW+#rK16vk&(pu;z2Y!$ZPO};x2OLu-P^E*JwE=<fwOl!
z^P>IcxQB^ZHj3W7vH8(~&c7<Y1%7Jx6}Vouwl3Ja>f6+7GmI}cZjzh1?nT)a0hw=m
zYHa_rzPFz?uU^9Kwz=%ecbVJg<n9XheX(@o-p9F`W(t!0CdpFgAK#jPYr`F$Yu0hA
zxQ+-VJm2tL{4u|1fZD#r=8k`6-8xfqFw&@(qflq|jhRzb`jUN(?YGwC&tg|OZ1Ht-
zqfpi1%#8U5a%c3GP59HBx02=8mU*RB4<p-t@l*y*T3GWqn{_SsgdOwPjyEJ!U$DQu
z>a^e4^NDPwg$vweWPVn>im;fyW16r}u6BXiwy72?=kBbVmbP&<_mbQ7KdfgxQ212v
zC;p<2w$|04OOFKq^@`<)Ugle}g`1;dpQ`k3#~yWuizhwvEK|DHT6~i^?fbd&^<<Ni
zcm7OW!&m?HpqR*lhq-Q{{MUo;pGlN`WqAL+oLTtp-rBkM?(Qm>`0CV~`~U2ZOLy!`
zQ@47)UENA;{`zU*>c8UBPOmTjx$@k_&^d0)f4g_PemQUSyV{27Z@SaPfC5`SBiF?~
z&+cQ*q`mF83LY>pFz7NdFi0_QFjnN~Ln<@mtO~2x7&R6!Fv3f?5z1II@pRT<P`j%2
zU-OJw+cK`DQ%>BR;<WAPUk1f~`6JU>(k5N2kAK*7*TKW=CdWFqyTvNG<@S#k_=)WN
z?&m!5>4FoH@!!{7{SaDfv$rY9aGhCm`I1DXw%DIPHvC<3(8KCTtbD{$vEx!!6F;<_
z*tYm$!87KaCYd!dy-Osbd5+2Xc<(s*S-j`J^#YsMA-|)X?_58>V3nrIr?+|cP4+G=
z&+k{Z+}YqTEz?6hU}~psXWRZcs_q9)|2S*=aniijuwOETaoyQ`M{Q;I)!G!etE@i0
z>8ORjn550`C!fTz72>%w+H{$i7#OxPGcZUnFfc$1@#2is)RJO-Fl7iT&F4(Mo%Ps2
zpzS^PZ$78*eKk?$hJ{|2w&c9``d*@Y`f-E{yRxS5p-biOZ!CQ2l;`BgRJr75THVX<
zf8H$+tz><^)Kt`0%}_O=C3nfqkT2iYAJ6dW<iBGQ6Ui)U+qAsAJKy~C-qPs%j4Vwo
zkF{^LUYOofd9m?XT~f8dj>}QYD-ZZ@Puy32V78&B_OxS#2Ti88&Er_QyDjl{7*|B}
z#>)wZI3GEDZ20@z@7urTWk*dd&fYD%?tbW%oZ!|FBN=xM)u<`{(#f*xMPJ#eSc*S+
zP_V>ebH7pYpGA|V9g57qZQy8R{k2B3{#aU1xADuM|DiKf()r?^erTA#;!C!!>izR;
zz6ejN6%3n`c(`<Dk^1bef8LuHbu3?Pb1Ln%&aU3i?HSw7yjSM&pWOcSZPr$udoPmi
zdDJ{~6fL|ERx&eS+l>BaKa~^T|Bs%0;Pw0k4lD<lI8~d?v$MA!KB&%f)LCoyiCUSx
zKi_zqwY?MlHQa2WGs8b--YqrD4|U)D&#?0)lN<LHgBc5Q`m#ST-EDv1+4Z6v+-3z6
zn>5dP`05sV>Udt+&I}d-6Pq+WeEh%q>Uf<$qaAj_(^q?AsL=)E@F0y-KIa=YdVk_R
zuxObVm*-Da)t@^J;@q;5ys`v4($_4RFl~N3)8iMnZ@!rRTs&1geaW(EH$VjfW()aS
z!WPFJ><kQoN}!G^INg+Fq~@mTgGq3bc@uTEUwXUfe)bdp9nQpFJM*GC(elD_^Kb8}
z9!`yY;^8@^E$&=_L(pz*5k{`(SG*gUGB0{J-mB8+V#{b?+xev5FhSY(e}_$x+pLFO
zU3>pIOiC)(H@@=T_TRI;$B&!$7k{6;dd5O-C6%Y1*SX*QkW)I@b3Sv^n<{^`$6mLc
zgC3^zN&0(Sj~0GeTlD^M|GP)bq7!E*Oz?lg_^xVV&`ZayTImKoPi5y#VyTNR5KLy>
zo)kHkyN0tWIgGb<e`K$rug#PP5?Ab^bYAo<=nQ*#_qK+-;SS#^SD1~}&TYv`@|1Y<
z$L0dZ&S}hQ7g*C``Jc`Fe42A-kX%xP){lj+e?B|E`rhv7qwG5cf~!-!i;_Q2KmC;H
z^XCg^G^bsDyX2on&h_TCb<7D_;^tYe|F5-UKCoHRCiB?RHO3hxf6HbhdoOr%Y2oh6
zTv=xOmh9hl*z$$R@5H;8@*QUW`}NMa&w6p<;X~2^ZTW8=RM*{je??~xW9pA*eEaNP
zZ9O!x)qFw6#V0l5cXPJhJ}a}gRmZ}*<dF5*W%s3?ePCj}xS?0i_GpuOR{WZyKP1JI
z88Q?Ocd2=-4c>IXxZ_dWsvR!QTET7C<-7_Hsq*|z6mWQP^>4xNe4Rdi4n5E8xC1Wl
zcrxug&awE0Uaw||+AH+rsrmgq75~0=KmT9(@u;}|y#L&?7gLuQg-*Mp=r=8Or~JQv
z$N&HRTK;1H|9`jn<NuxPou2w<b^X78SGV8)JEbZjt5q}Z-_O<I@%3Ln>DT}GCm(OM
z<vYu~X%B^^gggCCs8*dxu5d`^nr%O`YqqD4l8-K@Ok-aAtBGAY4^B!Mi?8#~5w;VZ
zkQ#H~_v&=9>zh5+h?JiBW6-7h!0`E+^R1T|_`k-ea{Gs!ykxsGVrpT(;l6^iwg)<&
zKDemW`o$|&i(PPI#yq1r8hcbY13ms9JI`5Yb2oiTRGIBE<AbX48eb$`YklUb&AhNu
z)T8x+V(&ryY;nhet*NsgFJEzf=IfR1PY$^q+H~NIW1Z8C)MLLZmKMATvOH+0E&YdM
z^$FKcIl5`7cRaK{e3@j>=T@w=)ce`QhUdDLe-+z~JS+Ka7=7F<NTcfArOFmdedYE)
zH3APUN{g0xh8(NYI<M$rX!Z0ItNpWYTo=DTV4w4*^n0+EPWgU2L7PUF@MR$^9sg2Q
zW%ljy-?3NjdQM}f_USJ!IjPs@Ejah$#rkPs8S;XMKB&$UZkku~D&fNOm8qYbzBF@e
zI;B|1dW|{f_^N&W9W6$l*G+e8e(LbjPC0*K())+Sd-hGMxMtD0@y7C>EPL`=--R4V
zDha4KlJ-3H0cYzc{zpeMr??B1Olhj#;{Q6cW$j+?``+by*0ny}ZyuSvL2v%ulGrAm
zof~;nzL{@3%VM0}q?DjGW07+BE>^+Qe_K25h4<b#rMZ2Bc<w_sjh;1C>dQ{P?wT*G
z!?OQRW!8NDu&+Br_j~W%tiN6+ZgtVOhjp!6&kC%c6wGw{WZt7)y19AY((nK8+j4XJ
zw0!Pax0{}ATTr;SsdhrsvK5I7_Gm`gF8e6_{_SrajpX#KvnM8+td-|nZ~EDgXV$za
znLc^H8!}m@|Ef%2;@&QD<AlP&tNAOs&MD1ts@E`xJ+t&osJ^P#&X#QxCC&bZ6!~t?
zv|IZq+3x(Uxy$xHNmrW^A^NzES<3&W#g<heZF$>1IiAgTzPllIR@m#dhG}~_J1?b}
zb<S#b$u|BWCA6#fTU|tnjLxm7zsEwYmoa2TuRl<2-n9Mey6Y>_dlo3OoBoRVty%o_
z*zQ}F@2_6|-E;DkAj8_DvHs;I*Z1}>pRdQg)%?R2*FzSn+}g8SK2FvAkiwFGxUyv0
z?|1g11?FE%mlv#b^`D)zS&B<)?YH(1Tm6)-7cqOEn%SiKLg-orNB22r<2hw}S-<o?
zH1bodSW@@>P!4CizOm{i(f?*ERYik%H})8H&t5b6r~0xkjw2a&zJB?ff4EUqtuy6`
z)rOM#l~0`ZKB&ula^Us0=_ivM(^kFteyL+yNg21MowwMz_}(A4o42aOH|{>a=%?ze
zO;am+6pu))b*L`)Q<aftnYsG8$hpJYX0GYjpKTpH@9$dYbxQKd)7&iX2c5cU(>$9u
z)%*CJ+}wa&r{_!Z?8-j$GU)w=_j}&%t6P6<n)bKjej78RGF|g$zsvdcu<WPM>Ll^4
z5ve<WSDkyCGf9?ZRq^bK?|b*VtetUY(K5;R&l2Zu`5ZX=R`nVVuigH|{dc#`Zn-R!
zzx!+Rov$mdS|3@q!K5*t@7(G1+cm#<CLI^>6Aag}ew)U5XXYg%ThU|NgYDv<Pxndd
zRQtSB&ueqYzZp6LbK^_*UI?{()_%Ow^nj4V>{;SrH^MGNzP;I0wfVH`X4c)J=T6<o
z{<=zc&(s?-oAX+IvoE?v$L9K7DP5bFc8%}!yjx2WlQnnT+N!kWBx~CxOY1Xdi}at&
z)SeoCtDrTksci54U%hd)0%mNY`|nDGzS_DgKGVEvtJnIAD_9r1J>dWS@VSG(YvF|x
z`)VW(X9`3=zi&U|<J^3OM`s(a&z0<7eW&JefayK+`wlD*|K%$jnCPG_!}@iDIG<K?
z;)OL=4kvEDly|jis^blrHrpFpW<S37TyB@ag9Y|7eaCw@T#{i}_wKvY)VTd-D@<-L
zuCw5C`?`?-LCx=-P1_~r78ge!So*N^y*s$+fIbwqQ*3|AN+t$|a5e@8UPuG6xTG>C
zwHVYYToRg_ecM3bpWTgmfnV=Vl|9zs+~$4xO2CU#3uND{PCDJWBCco2l5@xYRr{C4
z$yjZ>Sl8*l{BrU6yUMYbR)=wINj+Ay)<`C{MZz%jRoCj!w*B^^2U(O~uUNI>LRV_V
zah-jq?CZVO-sb$aBkhc0Lx9=g?B}y26}QD0d%kVWo6$5kO4?Ia=WN%FV+K)?^&VV8
z?GI;ooDzK}n6~MUynyG~&&xQybfsVYn8f|AOjgWo)113&r<j#!He^ZnW^~@l$Vs{~
z%X!kd-)j9&mR=0I{*I}jb6s}artKxmQ@k=NotU0{S9Im%e7yNq^k=^%f`&P#_tseW
zXRn>(y7Zm~dlFON{id6nFX^xP5Pk2a`_q+wJGX^2s2@}D_txFw$hV~Voa?>CD^hBY
zls?|#%6~=T@1yc8t(`Z%R^NT-_o;Tn6NQF;rFm!mB|j7pf8?S2P3OSG<qH-jyNPaa
zsQ#UKoq?V8`r7uFkNyenH7XH5dh!9oOQB~kgJyHB`@T|>J$xQln6*OIkF2-*{meh?
zzH;uM=!wf+m#2n>{0OR=u$b}b6rT?F|15P^(r@JUUU3t<bT?G8?f8?~1qKTz?7jK;
zD|f%W%j=EyJl(f`$;b<^Z#nNFe_2_-s!jgSY|(<+Zvq)RA08_F%6_OJa_e%jmN>@e
z_v5e6NfvwIax`m~;JcSQW<H$!cWP*j^5f(y=KHR$Yd-Pe<i77-uF3P8x4ye~=xdYy
zEs1##<F|j-zkD=TkbQE29jDuL9nNJZYXnsm&;FSd=yTX&&rjPKO!6}r?_M|+ToG%m
zoe(smcH?g$dH;Pp{9?9$-}N!QRs5c5!?%9222a;%11`5@D^9h?r&Tm2X~gxof4;rM
zgYC}b@^f7iTmtP&ZGNUa+mX_=x;c!!wJlD7b6UM-lgt!_<@GIpM6q>x1d`geIx#XZ
zWHK=@2r_UmrsO9F6y+Bb>nG<IrGnD6!Xg&N(Y`PP!<0#}-iHk&THmw&cRpD=d&}4E
zOJxg;1Y|h;)b6!Dxhs}q=dtMSzrC5eufE#z(a7?7ZP7XJQ#bw0E~-bUZ&?*EOF*PC
zX%_d^l^0GI$=zRYqH>i*?v^I5w+9zS*4Bj?1smi(JekFOjk_RFK{MPSc;VXb%MRSw
zsO#}+(t`lUm~8Ep{O-FCwrwik#W+W>s8zM%)XL>Mn2)8YEMlItSuoQs=Q6X!;R_8c
zPi6fki_}-!M!ftd^I~deFxP(bg_jlIO*wm0q3_*X;l&$sZ!bCfTSBqED_*?+eC^eU
z6?x%20kK742HaP&qwNDk*KT>1ebGd99>-lF=~sW0-c;*0+<#YJ>Hqb>+iaeK2Z0=6
zJf?l8wD#|gKNmQ6;u|Hk>xL(%NosyNt)!ZN_p|-8-3t!4+O+)OH$1I=M0tVU$CJ-j
z?8jP|9*T?Pn83)uAkECcAb_0k5(^4O(;Z^KY0f0y{M(?xb>?64Ja?rna|3xA*Ccs_
zf7y`4IH@e%Z&~ufO)4F0Cce4uzwP3cn`O6p<@Mw27Oi;4H&?sLC#8A)k)psZroEh7
zzy9{Jc`v7D-`ylp!>d=Z=7^x@otNGIS(BZP91&Z2LTsMaMWf?Wr=&!^X%O4_ws<+`
zO6GH!w@zO$x%QyeAYZm>T6k)u_QwM+ve)Q8J{jQCw|d_H>s$EJ9xR@I*Dx(%?Y<)~
z)-ax1f5rdW5{HEkA9414m06*?Pmae*^|{eh%k3HWCOuuOH+9bPtn331t@<zUS~#iN
zbG;HwjJVI1qqMdv?rVm{lBPH7_Z-j;{KImHagC6z3imwY2MV(%8J117cqy;AH>mr>
zk`ytYqlR;`pKsiI@pRyX=HiD528~aSSw1?q$@$m49EFee9n*^+O-y_=w_qFdeUsvI
z=606;zYbT+?&A12Lwfm(*<aUxktu&DCvpFJWn9Q@F`;We+5KMIiC#&*mr~Sxau@6V
z<G*4*mnT~^Pk(vs>!-57xamLR(@j<F*G2tjgB72QOu7uXrl}F}24P{Dv<~n_H2`&j
z4WbP~wlgy@Kt?F_K+|pL8qp^mAeunr8aD7K1oF5hx@OeDP>?1V4&(xBhFFYj1Xf3&
z51PSDVPGia0h<Fd0X%kwJcNes8uTF)gefcez@~tQQji8z&<#Lunjs9BBnCDB(NIIz
yidq9AG%_%xNir~C)Q9NWQOhx8?f0dTv_ng^0B=?{kQz<~P6j(B28N|_ARYjGAkek|

literal 0
HcmV?d00001

diff --git a/packages/llamaindex/tests/readers/llama-parser-reader.test.ts b/packages/llamaindex/tests/readers/llama-parser-reader.test.ts
new file mode 100644
index 000000000..b43cf9915
--- /dev/null
+++ b/packages/llamaindex/tests/readers/llama-parser-reader.test.ts
@@ -0,0 +1,15 @@
+import { LlamaParseReader } from "llamaindex";
+import { readFile } from "node:fs/promises";
+import { join } from "node:path";
+import { fileURLToPath } from "node:url";
+import { expect, test } from "vitest";
+
+const fixturesDir = fileURLToPath(new URL("./fixtures", import.meta.url));
+
+test("file type should be detected correctly", async () => {
+  const xlsx = join(fixturesDir, "test.xlsx");
+  const buffer = await readFile(xlsx);
+  const { mime, extension } = await LlamaParseReader.getMimeType(buffer);
+  expect(mime).toBe("application/vnd.oasis.opendocument.spreadsheet");
+  expect(extension).toBe("ods");
+});
-- 
GitLab