Skip to content
Snippets Groups Projects
Unverified Commit 276e36bc authored by betteryz's avatar betteryz Committed by GitHub
Browse files

Fix llama-hub Data Loader "GPTRepo" encoding issue, supporting gemini...

Fix llama-hub Data Loader "GPTRepo" encoding issue, supporting gemini embedding more configurations. (#10802)

* Support Gemini "transport" configuration

Added Gemini transportation method configuration support.

* Sync updates in multi_modal_llms\gemini

* Updated Dashscope qwen llm defaults

Setting qwen default num_outputs and temperature

* cr

* support gemini embedding configuration

support configuring api_base, api_key, transport method

* fix gptrepo data connector encoding issue

reading a file in default encoding(GBK) will cause error characters problem. Added encoding configuration

* sync latest repo

* sync latest repo

* cr

* cr

---------

Co-authored-by: default avatarHaotian Zhang <socool.king@gmail.com>
parent 1a9169c2
No related branches found
No related tags found
No related merge requests found
......@@ -58,6 +58,7 @@ def process_repository(
ignore_list,
concatenate: bool = False,
extensions: Optional[List[str]] = None,
encoding: Optional[str] = "utf-8",
) -> List[str]:
"""Process repository."""
result_texts = []
......@@ -74,7 +75,7 @@ def process_repository(
not should_ignore(relative_file_path, ignore_list)
and is_correct_extension
):
with open(file_path, errors="ignore") as file:
with open(file_path, errors="ignore", encoding=encoding) as file:
contents = file.read()
result_text += "-" * 4 + "\n"
result_text += f"{relative_file_path}\n"
......@@ -105,6 +106,7 @@ class GPTRepoReader(BaseReader):
repo_path: str,
preamble_str: Optional[str] = None,
extensions: Optional[List[str]] = None,
encoding: Optional[str] = "utf-8",
) -> List[Document]:
"""Load data from the input directory.
......@@ -146,7 +148,11 @@ class GPTRepoReader(BaseReader):
"aforementioned file as context.\n"
)
text_list = process_repository(
repo_path, ignore_list, concatenate=self.concatenate, extensions=extensions
repo_path,
ignore_list,
concatenate=self.concatenate,
extensions=extensions,
encoding=encoding,
)
docs = []
for text in text_list:
......
......@@ -47,7 +47,17 @@ class GeminiEmbedding(BaseEmbedding):
"google-generativeai package not found, install with"
"'pip install google-generativeai'"
)
gemini.configure(api_key=api_key)
# API keys are optional. The API can be authorised via OAuth (detected
# environmentally) or by the GOOGLE_API_KEY environment variable.
config_params: Dict[str, Any] = {
"api_key": api_key or os.getenv("GOOGLE_API_KEY"),
}
if api_base:
config_params["client_options"] = {"api_endpoint": api_base}
if transport:
config_params["transport"] = transport
# transport: A string, one of: [`rest`, `grpc`, `grpc_asyncio`].
gemini.configure(**config_params)
self._model = gemini
super().__init__(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment