Skip to content
Snippets Groups Projects
Unverified Commit 9d75ab49 authored by Adithya Krishnan's avatar Adithya Krishnan Committed by GitHub
Browse files

Update embedding field to use fixed array size (#12416)

parent 433804f1
No related branches found
No related tags found
No related merge requests found
......@@ -98,7 +98,7 @@ class DuckDBVectorStore(BasePydanticVectorStore):
database_name: Optional[str] = ":memory:",
table_name: Optional[str] = "documents",
# schema_name: Optional[str] = "main",
embed_dim: Optional[int] = 1536,
embed_dim: Optional[int] = None,
# hybrid_search: Optional[bool] = False,
# https://duckdb.org/docs/extensions/full_text_search
text_search_config: Optional[dict] = {
......@@ -161,13 +161,9 @@ class DuckDBVectorStore(BasePydanticVectorStore):
except Exception as e:
raise ValueError(f"Index table {table_name} not found in the database.")
_std = {
"text": "VARCHAR",
"node_id": "VARCHAR",
"embedding": "FLOAT[]",
"metadata_": "JSON",
}
_ti = {_i[0]: _i[1] for _i in _table_info}
# Not testing for the column type similarity only testing for the column names.
_std = {"text", "node_id", "embedding", "metadata_"}
_ti = {_i[0] for _i in _table_info}
if _std != _ti:
raise ValueError(
f"Index table {table_name} does not have the correct schema."
......@@ -188,7 +184,7 @@ class DuckDBVectorStore(BasePydanticVectorStore):
database_name: Optional[str] = ":memory:",
table_name: Optional[str] = "documents",
# schema_name: Optional[str] = "main",
embed_dim: Optional[int] = 1536,
embed_dim: Optional[int] = None,
# hybrid_search: Optional[bool] = False,
text_search_config: Optional[dict] = {
"stemmer": "english",
......@@ -226,9 +222,17 @@ class DuckDBVectorStore(BasePydanticVectorStore):
# TODO: schema.table also.
# Check if table and type is present
# if not, create table
if self.database_name == ":memory:":
self._conn.execute(
f"""
if self.embed_dim is None:
_query = f"""
CREATE TABLE {self.table_name} (
node_id VARCHAR,
text TEXT,
embedding FLOAT[],
metadata_ JSON
);
"""
else:
_query = f"""
CREATE TABLE {self.table_name} (
node_id VARCHAR,
text TEXT,
......@@ -236,19 +240,13 @@ class DuckDBVectorStore(BasePydanticVectorStore):
metadata_ JSON
);
"""
)
if self.database_name == ":memory:":
self._conn.execute(_query)
else:
with DuckDBLocalContext(self._database_path) as _conn:
_conn.execute(
f"""
CREATE TABLE {self.table_name} (
node_id VARCHAR,
text TEXT,
embedding FLOAT[{self.embed_dim}],
metadata_ JSON
);
"""
)
_conn.execute(_query)
self._is_initialized = True
def _node_to_table_row(self, node: BaseNode) -> Any:
......
......@@ -28,12 +28,12 @@ license = "MIT"
maintainers = ["krish-adi"]
name = "llama-index-vector-stores-duckdb"
readme = "README.md"
version = "0.1.3"
version = "0.1.4"
[tool.poetry.dependencies]
python = ">=3.8.1,<4.0"
llama-index-core = "^0.10.0"
duckdb = "0.9.2"
duckdb = "^0.10.1"
[tool.poetry.group.dev.dependencies]
ipython = "8.10.0"
......
......@@ -82,7 +82,7 @@ def text_node_list() -> List[TextNode]:
@pytest.fixture(scope="module")
def vector_store() -> DuckDBVectorStore:
return DuckDBVectorStore()
return DuckDBVectorStore(embed_dim=3)
def test_instance_creation_from_memory(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment