Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Llama Index
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
mirrored_repos
MachineLearning
run-llama
Llama Index
Commits
10d7b12c
Unverified
Commit
10d7b12c
authored
2 years ago
by
ahmetkca
Committed by
GitHub
2 years ago
Browse files
Options
Downloads
Patches
Plain Diff
Refactor download_loader to how loaders are downloaded. (#439)
Co-authored-by:
Jerry Liu
<
jerry@robustintelligence.com
>
parent
a8015893
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
.gitignore
+1
-0
1 addition, 0 deletions
.gitignore
gpt_index/readers/download.py
+112
-24
112 additions, 24 deletions
gpt_index/readers/download.py
with
113 additions
and
24 deletions
.gitignore
+
1
−
0
View file @
10d7b12c
...
...
@@ -137,3 +137,4 @@ dmypy.json
# Jetbrains
.idea
modules/
\ No newline at end of file
This diff is collapsed.
Click to expand it.
gpt_index/readers/download.py
+
112
−
24
View file @
10d7b12c
...
...
@@ -6,7 +6,7 @@ import subprocess
import
sys
from
importlib
import
util
from
pathlib
import
Path
from
typing
import
Optional
from
typing
import
List
,
Optional
,
Tuple
import
pkg_resources
import
requests
...
...
@@ -14,9 +14,66 @@ from pkg_resources import DistributionNotFound
from
gpt_index.readers.base
import
BaseReader
LOADER_HUB_URL
=
(
"
https://raw.githubusercontent.com/emptycrown/loader-hub/main/loader_hub
"
)
LLAMA_HUB_CONTENTS_URL
=
"
https://raw.githubusercontent.com/emptycrown/loader-hub/main
"
LOADER_HUB_PATH
=
"
/loader_hub
"
LOADER_HUB_URL
=
LLAMA_HUB_CONTENTS_URL
+
LOADER_HUB_PATH
def
_get_file_content
(
loader_hub_url
:
str
,
path
:
str
)
->
Tuple
[
str
,
int
]:
"""
Get the content of a file from the GitHub REST API.
"""
resp
=
requests
.
get
(
loader_hub_url
+
path
)
return
resp
.
text
,
resp
.
status_code
def
get_exports
(
raw_content
:
str
)
->
List
:
"""
Read content of a Python file and returns a list of exported class names.
For example:
```python
from .a import A
from .b import B
__all__ = [
"
A
"
,
"
B
"
]
```
will return `[
"
A
"
,
"
B
"
]`.
Args:
- raw_content: The content of a Python file as a string.
Returns:
A list of exported class names.
"""
exports
=
[]
for
line
in
raw_content
.
splitlines
():
line
=
line
.
strip
()
if
line
.
startswith
(
"
__all__
"
):
exports
=
line
.
split
(
"
=
"
)[
1
].
strip
().
strip
(
"
[
"
).
strip
(
"
]
"
).
split
(
"
,
"
)
exports
=
[
export
.
strip
().
strip
(
"'"
).
strip
(
'"'
)
for
export
in
exports
]
return
exports
def
rewrite_exports
(
exports
:
List
[
str
])
->
None
:
"""
Write the `__all__` variable to the `__init__.py` file in the modules dir.
Removes the line that contains `__all__` and appends a new line with the updated
`__all__` variable.
Args:
- exports: A list of exported class names.
"""
dirpath
=
Path
(
__file__
).
parent
/
"
llamahub_modules
"
init_path
=
f
"
{
dirpath
}
/__init__.py
"
with
open
(
init_path
,
"
r
"
)
as
f
:
lines
=
f
.
readlines
()
with
open
(
init_path
,
"
w
"
)
as
f
:
for
line
in
lines
:
line
=
line
.
strip
()
if
line
.
startswith
(
"
__all__
"
):
continue
f
.
write
(
line
+
os
.
linesep
)
f
.
write
(
f
"
__all__ =
{
list
(
set
(
exports
))
}
"
+
os
.
linesep
)
def
download_loader
(
...
...
@@ -41,13 +98,14 @@ def download_loader(
Returns:
A Loader.
"""
dirpath
=
"
.
modules
"
dirpath
=
Path
(
__file__
).
parent
/
"
llamahub_
modules
"
if
not
os
.
path
.
exists
(
dirpath
):
# Create a new directory because it does not exist
os
.
makedirs
(
dirpath
)
library_path
=
f
"
{
dirpath
}
/library.json
"
loader_id
=
None
# e.g. `web/simple_web`
extra_files
=
[]
# e.g. `web/simple_web/utils.py`
# Check cache first
if
not
refresh_cache
and
os
.
path
.
exists
(
library_path
):
...
...
@@ -55,41 +113,70 @@ def download_loader(
library
=
json
.
load
(
f
)
if
loader_class
in
library
:
loader_id
=
library
[
loader_class
][
"
id
"
]
extra_files
=
library
[
loader_class
].
get
(
"
extra_files
"
,
[])
# Fetch up-to-date library from remote repo if loader_id not found
if
loader_id
is
None
:
response
=
requests
.
get
(
f
"
{
loader_hub_url
}
/library.json
"
)
library
=
json
.
loads
(
response
.
te
x
t
)
library_raw_content
,
_
=
_get_file_content
(
loader_hub_url
,
"
/library.json
"
)
library
=
json
.
loads
(
library_raw_con
te
n
t
)
if
loader_class
not
in
library
:
raise
ValueError
(
"
Loader class name not found in library
"
)
loader_id
=
library
[
loader_class
][
"
id
"
]
extra_files
=
library
[
loader_class
].
get
(
"
extra_files
"
,
[])
# Update cache
with
open
(
library_path
,
"
w
"
)
as
f
:
f
.
write
(
response
.
te
x
t
)
f
.
write
(
library_raw_con
te
n
t
)
assert
loader_id
is
not
None
if
loader_id
is
None
:
raise
ValueError
(
"
Loader class name not found in library
"
)
# Load the module
loader_filename
=
loader_id
.
replace
(
"
/
"
,
"
-
"
)
loader_path
=
f
"
{
dirpath
}
/
{
loader_filename
}
.py
"
requirements_path
=
f
"
{
dirpath
}
/
{
loader_filename
}
_requirements.txt
"
loader_path
=
f
"
{
dirpath
}
/
{
loader_id
}
"
requirements_path
=
f
"
{
loader_path
}
/requirements.txt
"
if
refresh_cache
or
not
os
.
path
.
exists
(
loader_path
):
response
=
requests
.
get
(
f
"
{
loader_hub_url
}
/
{
loader_id
}
/base.py
"
)
response_text
=
response
.
text
os
.
makedirs
(
loader_path
,
exist_ok
=
True
)
basepy_raw_content
,
_
=
_get_file_content
(
loader_hub_url
,
f
"
/
{
loader_id
}
/base.py
"
)
if
use_gpt_index_import
:
response_
te
x
t
=
response_
te
x
t
.
replace
(
basepy_raw_con
te
n
t
=
basepy_raw_con
te
n
t
.
replace
(
"
import llama_index
"
,
"
import gpt_index
"
)
response_text
=
response_text
.
replace
(
"
from llama_index
"
,
"
from gpt_index
"
)
with
open
(
loader_path
,
"
w
"
)
as
f
:
f
.
write
(
response_text
)
basepy_raw_content
=
basepy_raw_content
.
replace
(
"
from llama_index
"
,
"
from gpt_index
"
)
with
open
(
f
"
{
loader_path
}
/base.py
"
,
"
w
"
)
as
f
:
f
.
write
(
basepy_raw_content
)
# Get content of extra files if there are any
# and write them under the loader directory
for
extra_file
in
extra_files
:
extra_file_raw_content
,
_
=
_get_file_content
(
loader_hub_url
,
f
"
/
{
loader_id
}
/
{
extra_file
}
"
)
# If the extra file is an __init__.py file, we need to
# add the exports to the __init__.py file in the modules directory
if
extra_file
==
"
__init__.py
"
:
loader_exports
=
get_exports
(
extra_file_raw_content
)
existing_exports
=
[]
if
os
.
path
.
exists
(
dirpath
/
"
__init__.py
"
):
with
open
(
dirpath
/
"
__init__.py
"
,
"
r+
"
)
as
f
:
f
.
write
(
f
"
from .
{
loader_id
}
import
{
'
,
'
.
join
(
loader_exports
)
}
"
)
existing_exports
=
get_exports
(
f
.
read
())
rewrite_exports
(
existing_exports
+
loader_exports
)
with
open
(
f
"
{
loader_path
}
/
{
extra_file
}
"
,
"
w
"
)
as
f
:
f
.
write
(
extra_file_raw_content
)
if
not
os
.
path
.
exists
(
requirements_path
):
response
=
requests
.
get
(
f
"
{
loader_hub_url
}
/
{
loader_id
}
/requirements.txt
"
)
if
response
.
status_code
==
200
:
# NOTE: need to check the status code
response_txt
,
status_code
=
_get_file_content
(
loader_hub_url
,
f
"
/
{
loader_id
}
/requirements.txt
"
)
if
status_code
==
200
:
with
open
(
requirements_path
,
"
w
"
)
as
f
:
f
.
write
(
response
.
te
xt
)
f
.
write
(
response
_t
xt
)
# Install dependencies if there are any and not already installed
if
os
.
path
.
exists
(
requirements_path
):
...
...
@@ -102,10 +189,11 @@ def download_loader(
subprocess
.
check_call
(
[
sys
.
executable
,
"
-m
"
,
"
pip
"
,
"
install
"
,
"
-r
"
,
requirements_path
]
)
spec
=
util
.
spec_from_file_location
(
"
custom_loader
"
,
location
=
loader_path
)
spec
=
util
.
spec_from_file_location
(
"
custom_loader
"
,
location
=
f
"
{
loader_path
}
/base.py
"
)
if
spec
is
None
:
raise
ValueError
(
f
"
Could not find file:
{
loader_path
}
.
"
)
raise
ValueError
(
f
"
Could not find file:
{
loader_path
}
/base.py
.
"
)
module
=
util
.
module_from_spec
(
spec
)
spec
.
loader
.
exec_module
(
module
)
# type: ignore
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment