Skip to content
Snippets Groups Projects
Unverified Commit 78f9a113 authored by Massimiliano Pippi's avatar Massimiliano Pippi Committed by GitHub
Browse files

v0.11.x (#15398)

parent ef9a21c7
No related branches found
No related tags found
No related merge requests found
Showing
with 113 additions and 114 deletions
......@@ -28,8 +28,6 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: "poetry"
cache-dependency-path: "**/poetry.lock"
- name: Install deps
shell: bash
run: poetry install
......
......@@ -15,3 +15,4 @@ credentials.json
token.json
.python-version
.DS_Store
/storage/
# ChangeLog
## [2024-08-22]
### `llama-index-core` [0.11.0]
- removed deprecated `ServiceContext` -- using this now will print an error with a link to the migration guide
- removed deprecated `LLMPredictor` -- using this now will print an error, any existing LLM is a drop-in replacement
- made `pandas` an optional dependency
### `Everything Else`
- bumped the minor version of every package to account for the new version of `llama-index-core`
## [2024-08-21]
### `llama-index-core` [0.10.68]
......
python_sources()
poetry_requirements(
name="poetry",
)
python_requirements(
name="reqs",
)
......@@ -7,6 +7,7 @@ from pathlib import Path
from typing import Any, Callable, Dict, Optional, Union, cast
from llama_index.core import (
Settings,
SimpleDirectoryReader,
VectorStoreIndex,
)
......@@ -16,9 +17,8 @@ from llama_index.core.base.response.schema import (
StreamingResponse,
Response,
)
from llama_index.core.bridge.pydantic import BaseModel, Field, validator
from llama_index.core.bridge.pydantic import BaseModel, Field, field_validator
from llama_index.core.chat_engine import CondenseQuestionChatEngine
from llama_index.core.indices.service_context import ServiceContext
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.llms import LLM
from llama_index.core.query_engine import CustomQueryEngine
......@@ -100,7 +100,7 @@ class RagCLI(BaseModel):
class Config:
arbitrary_types_allowed = True
@validator("query_pipeline", always=True)
@field_validator("query_pipeline", mode="before")
def query_pipeline_from_ingestion_pipeline(
cls, query_pipeline: Any, values: Dict[str, Any]
) -> Optional[QueryPipeline]:
......@@ -127,15 +127,13 @@ class RagCLI(BaseModel):
embed_model = transformation
break
service_context = ServiceContext.from_defaults(
llm=llm, embed_model=embed_model or "default"
)
Settings.llm = llm
Settings.embed_model = embed_model
retriever = VectorStoreIndex.from_vector_store(
ingestion_pipeline.vector_store, service_context=service_context
ingestion_pipeline.vector_store,
).as_retriever(similarity_top_k=8)
response_synthesizer = CompactAndRefine(
service_context=service_context, streaming=True, verbose=verbose
)
response_synthesizer = CompactAndRefine(streaming=True, verbose=verbose)
# define query pipeline
query_pipeline = QueryPipeline(verbose=verbose)
......@@ -151,7 +149,7 @@ class RagCLI(BaseModel):
query_pipeline.add_link("query", "summarizer", dest_key="query_str")
return query_pipeline
@validator("chat_engine", always=True)
@field_validator("chat_engine", mode="before")
def chat_engine_from_query_pipeline(
cls, chat_engine: Any, values: Dict[str, Any]
) -> Optional[CondenseQuestionChatEngine]:
......
{
"StorageContext": "llama_index.core",
"ServiceContext": "llama_index.core",
"ComposableGraph": "llama_index.core",
"# indicesSummaryIndex": "llama_index.core",
"VectorStoreIndex": "llama_index.core",
......
......@@ -32,13 +32,13 @@ maintainers = [
name = "llama-index-cli"
packages = [{include = "llama_index/"}]
readme = "README.md"
version = "0.1.13"
version = "0.3.0"
[tool.poetry.dependencies]
python = ">=3.8.1,<4.0"
llama-index-core = "^0.10.11.post1"
llama-index-embeddings-openai = "^0.1.1"
llama-index-llms-openai = "^0.1.1"
llama-index-core = "^0.11.0"
llama-index-embeddings-openai = "^0.2.0"
llama-index-llms-openai = "^0.2.0"
[tool.poetry.group.dev.dependencies]
black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"}
......
"""Init file of LlamaIndex."""
__version__ = "0.10.68.post1"
__version__ = "0.11.0"
import logging
from logging import NullHandler
......@@ -152,8 +152,5 @@ global_handler: Optional[BaseCallbackHandler] = None
# NOTE: keep for backwards compatibility
SQLContextBuilder = SQLDocumentContextBuilder
# global service context for ServiceContext.from_defaults()
global_service_context: Optional[ServiceContext] = None
# global tokenizer
global_tokenizer: Optional[Callable[[str], list]] = None
......@@ -15,7 +15,7 @@ from llama_index.core.agent.types import (
TaskStepOutput,
)
from llama_index.core.base.query_pipeline.query import QueryComponent
from llama_index.core.bridge.pydantic import BaseModel, Field
from llama_index.core.bridge.pydantic import BaseModel, Field, ConfigDict
from llama_index.core.callbacks import (
CallbackManager,
trace_method,
......@@ -72,14 +72,12 @@ class QueryPipelineAgentWorker(BaseModel, BaseAgentWorker):
"""
model_config = ConfigDict(arbitrary_types_allowed=True)
pipeline: QueryPipeline = Field(..., description="Query pipeline")
callback_manager: CallbackManager = Field(..., exclude=True)
task_key: str = Field("task", description="Key to store task in state")
step_state_key: str = Field("step_state", description="Key to store step in state")
class Config:
arbitrary_types_allowed = True
def __init__(
self,
pipeline: QueryPipeline,
......
......@@ -19,7 +19,7 @@ from llama_index.core.agent.types import (
TaskStep,
TaskStepOutput,
)
from llama_index.core.bridge.pydantic import BaseModel, Field, PrivateAttr
from llama_index.core.bridge.pydantic import BaseModel, Field, PrivateAttr, ConfigDict
from llama_index.core.callbacks import (
CallbackManager,
trace_method,
......@@ -55,6 +55,7 @@ class CustomSimpleAgentWorker(BaseModel, BaseAgentWorker):
"""
model_config = ConfigDict(arbitrary_types_allowed=True)
tools: Sequence[BaseTool] = Field(..., description="Tools to use for reasoning")
llm: LLM = Field(..., description="LLM to use")
callback_manager: CallbackManager = Field(
......@@ -67,9 +68,6 @@ class CustomSimpleAgentWorker(BaseModel, BaseAgentWorker):
_get_tools: Callable[[str], Sequence[BaseTool]] = PrivateAttr()
class Config:
arbitrary_types_allowed = True
def __init__(
self,
tools: Sequence[BaseTool],
......@@ -79,18 +77,7 @@ class CustomSimpleAgentWorker(BaseModel, BaseAgentWorker):
tool_retriever: Optional[ObjectRetriever[BaseTool]] = None,
**kwargs: Any,
) -> None:
if len(tools) > 0 and tool_retriever is not None:
raise ValueError("Cannot specify both tools and tool_retriever")
elif len(tools) > 0:
self._get_tools = lambda _: tools
elif tool_retriever is not None:
tool_retriever_c = cast(ObjectRetriever[BaseTool], tool_retriever)
self._get_tools = lambda message: tool_retriever_c.retrieve(message)
else:
self._get_tools = lambda _: []
callback_manager = callback_manager or CallbackManager([])
super().__init__(
tools=tools,
llm=llm,
......@@ -100,6 +87,16 @@ class CustomSimpleAgentWorker(BaseModel, BaseAgentWorker):
**kwargs,
)
if len(tools) > 0 and tool_retriever is not None:
raise ValueError("Cannot specify both tools and tool_retriever")
elif len(tools) > 0:
self._get_tools = lambda _: tools
elif tool_retriever is not None:
tool_retriever_c = cast(ObjectRetriever[BaseTool], tool_retriever)
self._get_tools = lambda message: tool_retriever_c.retrieve(message)
else:
self._get_tools = lambda _: []
@classmethod
def from_tools(
cls,
......
......@@ -16,7 +16,7 @@ from llama_index.core.agent.types import (
TaskStep,
TaskStepOutput,
)
from llama_index.core.bridge.pydantic import BaseModel, Field
from llama_index.core.bridge.pydantic import BaseModel, Field, ConfigDict
from llama_index.core.callbacks import (
CallbackManager,
trace_method,
......@@ -44,6 +44,7 @@ class FnAgentWorker(BaseModel, BaseAgentWorker):
"""
model_config = ConfigDict(arbitrary_types_allowed=True)
fn: Callable = Field(..., description="Function to run.")
async_fn: Optional[Callable] = Field(
None, description="Async function to run. If not provided, will run `fn`."
......@@ -56,9 +57,6 @@ class FnAgentWorker(BaseModel, BaseAgentWorker):
verbose: bool = Field(False, description="Verbose mode.")
class Config:
arbitrary_types_allowed = True
def __init__(
self,
fn: Callable,
......
......@@ -13,7 +13,7 @@ from llama_index.core.agent.react.types import (
ObservationReasoningStep,
)
from llama_index.core.base.llms.types import ChatMessage, MessageRole
from llama_index.core.bridge.pydantic import BaseModel
from llama_index.core.bridge.pydantic import BaseModel, ConfigDict
from llama_index.core.tools import BaseTool
logger = logging.getLogger(__name__)
......@@ -36,8 +36,7 @@ def get_react_tool_descriptions(tools: Sequence[BaseTool]) -> List[str]:
class BaseAgentChatFormatter(BaseModel):
"""Base chat formatter."""
class Config:
arbitrary_types_allowed = True
model_config = ConfigDict(arbitrary_types_allowed=True)
@abstractmethod
def format(
......
......@@ -7,7 +7,12 @@ from typing import Any, Dict, List, Optional, TYPE_CHECKING
from llama_index.core.base.base_query_engine import BaseQueryEngine
from llama_index.core.base.llms.types import ChatMessage
from llama_index.core.base.response.schema import RESPONSE_TYPE, Response
from llama_index.core.bridge.pydantic import BaseModel, Field
from llama_index.core.bridge.pydantic import (
BaseModel,
Field,
SerializeAsAny,
ConfigDict,
)
from llama_index.core.callbacks import CallbackManager, trace_method
from llama_index.core.chat_engine.types import (
BaseChatEngine,
......@@ -80,11 +85,11 @@ class TaskStep(BaseModel):
"""
task_id: str = Field(..., diescription="Task ID")
task_id: str = Field(..., description="Task ID")
step_id: str = Field(..., description="Step ID")
input: Optional[str] = Field(default=None, description="User input")
# memory: BaseMemory = Field(
# ..., type=BaseMemory, description="Conversational Memory"
# ..., description="Conversational Memory"
# )
step_state: Dict[str, Any] = Field(
default_factory=dict, description="Additional state for a given step."
......@@ -155,25 +160,22 @@ class Task(BaseModel):
"""
class Config:
arbitrary_types_allowed = True
model_config = ConfigDict(arbitrary_types_allowed=True)
task_id: str = Field(
default_factory=lambda: str(uuid.uuid4()), type=str, description="Task ID"
default_factory=lambda: str(uuid.uuid4()), description="Task ID"
)
input: str = Field(..., type=str, description="User input")
input: str = Field(..., description="User input")
# NOTE: this is state that may be modified throughout the course of execution of the task
memory: BaseMemory = Field(
memory: SerializeAsAny[BaseMemory] = Field(
...,
type=BaseMemory,
description=(
"Conversational Memory. Maintains state before execution of this task."
),
)
callback_manager: CallbackManager = Field(
default_factory=CallbackManager,
default_factory=lambda: CallbackManager([]),
exclude=True,
description="Callback manager for the task.",
)
......@@ -190,8 +192,7 @@ class Task(BaseModel):
class BaseAgentWorker(PromptMixin, DispatcherSpanMixin):
"""Base agent worker."""
class Config:
arbitrary_types_allowed = True
model_config = ConfigDict(arbitrary_types_allowed=True)
def _get_prompts(self) -> PromptDictType:
"""Get prompts."""
......
......@@ -12,7 +12,7 @@ from llama_index.core.base.query_pipeline.query import (
validate_and_convert_stringable,
)
from llama_index.core.base.response.schema import RESPONSE_TYPE
from llama_index.core.bridge.pydantic import Field
from llama_index.core.bridge.pydantic import Field, ConfigDict, SerializeAsAny
from llama_index.core.callbacks.base import CallbackManager
from llama_index.core.prompts.mixin import PromptDictType, PromptMixin
from llama_index.core.schema import NodeWithScore, QueryBundle, QueryType
......@@ -108,10 +108,10 @@ class BaseQueryEngine(ChainableMixin, PromptMixin, DispatcherSpanMixin):
class QueryEngineComponent(QueryComponent):
"""Query engine component."""
query_engine: BaseQueryEngine = Field(..., description="Query engine")
class Config:
arbitrary_types_allowed = True
model_config = ConfigDict(arbitrary_types_allowed=True)
query_engine: SerializeAsAny[BaseQueryEngine] = Field(
..., description="Query engine"
)
def set_callback_manager(self, callback_manager: CallbackManager) -> None:
"""Set callback manager."""
......
......@@ -11,7 +11,7 @@ from llama_index.core.base.query_pipeline.query import (
QueryComponent,
validate_and_convert_stringable,
)
from llama_index.core.bridge.pydantic import Field
from llama_index.core.bridge.pydantic import Field, ConfigDict
from llama_index.core.callbacks.base import CallbackManager
from llama_index.core.callbacks.schema import CBEventType, EventPayload
from llama_index.core.prompts.mixin import (
......@@ -27,7 +27,6 @@ from llama_index.core.schema import (
QueryType,
TextNode,
)
from llama_index.core.service_context import ServiceContext
from llama_index.core.settings import Settings
from llama_index.core.utils import print_text
from llama_index.core.instrumentation import DispatcherSpanMixin
......@@ -213,7 +212,10 @@ class BaseRetriever(ChainableMixin, PromptMixin, DispatcherSpanMixin):
return [
n
for n in retrieved_nodes
if not ((n.node.hash, n.node.ref_doc_id) in seen or seen.add((n.node.hash, n.node.ref_doc_id))) # type: ignore[func-returns-value]
if not (
(n.node.hash, n.node.ref_doc_id) in seen
or seen.add((n.node.hash, n.node.ref_doc_id))
) # type: ignore[func-returns-value]
]
@dispatcher.span
......@@ -304,19 +306,6 @@ class BaseRetriever(ChainableMixin, PromptMixin, DispatcherSpanMixin):
"""
return self._retrieve(query_bundle)
def get_service_context(self) -> Optional[ServiceContext]:
"""Attempts to resolve a service context.
Short-circuits at self.service_context, self._service_context,
or self._index.service_context.
"""
if hasattr(self, "service_context"):
return self.service_context
if hasattr(self, "_service_context"):
return self._service_context
elif hasattr(self, "_index") and hasattr(self._index, "service_context"):
return self._index.service_context
return None
def _as_query_component(self, **kwargs: Any) -> QueryComponent:
"""Return a query component."""
return RetrieverComponent(retriever=self)
......@@ -325,11 +314,9 @@ class BaseRetriever(ChainableMixin, PromptMixin, DispatcherSpanMixin):
class RetrieverComponent(QueryComponent):
"""Retriever component."""
model_config = ConfigDict(arbitrary_types_allowed=True)
retriever: BaseRetriever = Field(..., description="Retriever")
class Config:
arbitrary_types_allowed = True
def set_callback_manager(self, callback_manager: CallbackManager) -> None:
"""Set callback manager."""
self.retriever.callback_manager = callback_manager
......
......@@ -6,7 +6,11 @@ from enum import Enum
from typing import Any, Callable, Coroutine, List, Optional, Tuple
import numpy as np
from llama_index.core.bridge.pydantic import Field, validator
from llama_index.core.bridge.pydantic import (
Field,
ConfigDict,
field_validator,
)
from llama_index.core.callbacks.base import CallbackManager
from llama_index.core.callbacks.schema import CBEventType, EventPayload
from llama_index.core.constants import (
......@@ -63,6 +67,9 @@ def similarity(
class BaseEmbedding(TransformComponent, DispatcherSpanMixin):
"""Base class for embeddings."""
model_config = ConfigDict(
protected_namespaces=("pydantic_model_",), arbitrary_types_allowed=True
)
model_name: str = Field(
default="unknown", description="The name of the embedding model."
)
......@@ -70,7 +77,7 @@ class BaseEmbedding(TransformComponent, DispatcherSpanMixin):
default=DEFAULT_EMBED_BATCH_SIZE,
description="The batch size for embedding calls.",
gt=0,
lte=2048,
le=2048,
)
callback_manager: CallbackManager = Field(
default_factory=lambda: CallbackManager([]), exclude=True
......@@ -80,13 +87,9 @@ class BaseEmbedding(TransformComponent, DispatcherSpanMixin):
description="The number of workers to use for async embedding calls.",
)
class Config:
arbitrary_types_allowed = True
@validator("callback_manager", pre=True)
def _validate_callback_manager(
cls, v: Optional[CallbackManager]
) -> CallbackManager:
@field_validator("callback_manager")
@classmethod
def check_callback_manager(cls, v: CallbackManager) -> CallbackManager:
if v is None:
return CallbackManager([])
return v
......
......@@ -17,7 +17,7 @@ from llama_index.core.base.llms.types import (
from llama_index.core.base.query_pipeline.query import (
ChainableMixin,
)
from llama_index.core.bridge.pydantic import Field, validator
from llama_index.core.bridge.pydantic import Field, model_validator, ConfigDict
from llama_index.core.callbacks import CallbackManager
from llama_index.core.instrumentation import DispatcherSpanMixin
from llama_index.core.schema import BaseComponent
......@@ -26,18 +26,16 @@ from llama_index.core.schema import BaseComponent
class BaseLLM(ChainableMixin, BaseComponent, DispatcherSpanMixin):
"""BaseLLM interface."""
model_config = ConfigDict(arbitrary_types_allowed=True)
callback_manager: CallbackManager = Field(
default_factory=CallbackManager, exclude=True
default_factory=lambda: CallbackManager([]), exclude=True
)
class Config:
arbitrary_types_allowed = True
@validator("callback_manager", pre=True)
def _validate_callback_manager(cls, v: CallbackManager) -> CallbackManager:
if v is None:
return CallbackManager([])
return v
@model_validator(mode="after")
def check_callback_manager(self) -> "BaseLLM":
if self.callback_manager is None:
self.callback_manager = CallbackManager([])
return self
@property
@abstractmethod
......
from enum import Enum
from typing import Any, AsyncGenerator, Generator, Optional, Union, List, Any
from llama_index.core.bridge.pydantic import BaseModel, Field
from llama_index.core.bridge.pydantic import BaseModel, Field, ConfigDict
from llama_index.core.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_NUM_OUTPUTS
try:
......@@ -49,7 +49,7 @@ class ChatMessage(BaseModel):
def _recursive_serialization(self, value: Any) -> Any:
if isinstance(value, (V1BaseModel, V2BaseModel)):
return value.dict()
return value.model_dump()
if isinstance(value, dict):
return {
key: self._recursive_serialization(value)
......@@ -60,8 +60,11 @@ class ChatMessage(BaseModel):
return value
def dict(self, **kwargs: Any) -> dict:
return self.model_dump(**kwargs)
def model_dump(self, **kwargs: Any) -> dict:
# ensure all additional_kwargs are serializable
msg = super().dict(**kwargs)
msg = super().model_dump(**kwargs)
for key, value in msg.get("additional_kwargs", {}).items():
value = self._recursive_serialization(value)
......@@ -129,6 +132,9 @@ CompletionResponseAsyncGen = AsyncGenerator[CompletionResponse, None]
class LLMMetadata(BaseModel):
model_config = ConfigDict(
protected_namespaces=("pydantic_model_",), arbitrary_types_allowed=True
)
context_window: int = Field(
default=DEFAULT_CONTEXT_WINDOW,
description=(
......
......@@ -20,7 +20,7 @@ from llama_index.core.base.llms.types import (
CompletionResponse,
)
from llama_index.core.base.response.schema import Response
from llama_index.core.bridge.pydantic import BaseModel, Field
from llama_index.core.bridge.pydantic import BaseModel, Field, ConfigDict
from llama_index.core.callbacks.base import CallbackManager
from llama_index.core.schema import NodeWithScore, QueryBundle, TextNode
......@@ -243,13 +243,11 @@ class QueryComponent(BaseModel):
class CustomQueryComponent(QueryComponent):
"""Custom query component."""
model_config = ConfigDict(arbitrary_types_allowed=True)
callback_manager: CallbackManager = Field(
default_factory=CallbackManager, description="Callback manager"
)
class Config:
arbitrary_types_allowed = True
def set_callback_manager(self, callback_manager: CallbackManager) -> None:
"""Set callback manager."""
self.callback_manager = callback_manager
......
......@@ -58,11 +58,11 @@ class PydanticResponse:
def __str__(self) -> str:
"""Convert to string representation."""
return self.response.json() if self.response else "None"
return self.response.model_dump_json() if self.response else "None"
def __getattr__(self, name: str) -> Any:
"""Get attribute, but prioritize the pydantic response object."""
if self.response is not None and name in self.response.dict():
if self.response is not None and name in self.response.model_dump():
return getattr(self.response, name)
else:
return None
......@@ -97,7 +97,7 @@ class PydanticResponse:
def get_response(self) -> Response:
"""Get a standard response object."""
response_txt = self.response.json() if self.response else "None"
response_txt = self.response.model_dump_json() if self.response else "None"
return Response(response_txt, self.source_nodes, self.metadata)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment