Skip to content
Snippets Groups Projects
Unverified Commit c2272b57 authored by Andrei Fajardo's avatar Andrei Fajardo Committed by GitHub
Browse files

Adds new LabelledSimpleDataset (llama-dataset) (#11805)

parent ea87cd47
No related branches found
No related tags found
No related merge requests found
......@@ -8,12 +8,13 @@ from typing import Generator, Generic, List, Optional, Type, TypeVar, Union
import tqdm
from llama_index.core.async_utils import asyncio_module
from llama_index.core.base.base_query_engine import BaseQueryEngine
from llama_index.core.llms import LLM
from llama_index.core.bridge.pydantic import BaseModel, Field, PrivateAttr
from llama_index.core.evaluation import BaseEvaluator
from openai import RateLimitError
from pandas import DataFrame as PandasDataFrame
PredictorType = Union[BaseQueryEngine, BaseEvaluator]
PredictorType = Union[BaseQueryEngine, BaseEvaluator, LLM]
P = TypeVar("P", bound=PredictorType)
......
from typing import Optional, List
from llama_index.core.llama_dataset.base import (
BaseLlamaDataExample,
BaseLlamaDataset,
CreatedBy,
BaseLlamaExamplePrediction,
BaseLlamaPredictionDataset,
)
from llama_index.core.llms import LLM
from llama_index.core.bridge.pydantic import Field
from pandas import DataFrame as PandasDataFrame
class SimpleExamplePrediction(BaseLlamaExamplePrediction):
"""RAG example prediction class.
Args:
response (str): The response generated by the LLM.
contexts (Optional[List[str]]): The retrieved context (text) for generating
response.
"""
label: str = Field(
default_factory=str,
description="The generated (predicted) label that can be compared to a reference (ground-truth) label.",
)
@property
def class_name(self) -> str:
"""Data example class name."""
return "SimpleExamplePrediction"
class SimplePredictionDataset(BaseLlamaPredictionDataset):
"""RagDataset class."""
_prediction_type = SimpleExamplePrediction
def to_pandas(self) -> PandasDataFrame:
"""Create pandas dataframe."""
data = {}
if self.predictions:
data = {
"label": [t.label for t in self.predictions],
}
return PandasDataFrame(data)
@property
def class_name(self) -> str:
"""Class name."""
return "SimplePredictionDataset"
class LabelledSimpleDataExample(BaseLlamaDataExample):
reference_label: str = Field(default_factory=str, description="Class label")
text: str = Field(default_factory=str, description="Text body of example")
text_by: Optional[CreatedBy] = Field(
default=None, description="What generated the query."
)
@property
def class_name(self) -> str:
"""Data example class name."""
return "LabelledSimpleDataExample"
class LabelledSimpleDataset(BaseLlamaDataset[LLM]):
_example_type = LabelledSimpleDataExample
def _construct_prediction_dataset(
self, predictions: List[SimpleExamplePrediction]
) -> SimplePredictionDataset:
"""Construct the specific prediction dataset.
Args:
predictions (List[BaseLlamaExamplePrediction]): the list of predictions.
Returns:
BaseLlamaPredictionDataset: A dataset of predictions.
"""
return SimplePredictionDataset(predictions=predictions)
def to_pandas(self) -> PandasDataFrame:
"""Create pandas dataframe."""
data = {
"reference_label": [t.reference_label for t in self.examples],
"text": [t.text for t in self.examples],
"text_by": [str(t.text_by) for t in self.examples],
}
return PandasDataFrame(data)
async def _apredict_example(
self,
predictor: LLM,
example: LabelledSimpleDataExample,
sleep_time_in_seconds: int,
) -> SimpleExamplePrediction:
"""Async predict RAG example with a query engine."""
raise NotImplementedError("This method has not yet been implemented.")
def _predict_example(
self,
predictor: LLM,
example: BaseLlamaDataExample,
sleep_time_in_seconds: int = 0,
) -> BaseLlamaExamplePrediction:
raise NotImplementedError("This method has not yet been implemented.")
@property
def class_name(self) -> str:
"""Data example class name."""
return "LabelledSimpleDataset"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment