Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Llama Index
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
mirrored_repos
MachineLearning
run-llama
Llama Index
Commits
c2272b57
Unverified
Commit
c2272b57
authored
1 year ago
by
Andrei Fajardo
Committed by
GitHub
1 year ago
Browse files
Options
Downloads
Patches
Plain Diff
Adds new LabelledSimpleDataset (llama-dataset) (#11805)
parent
ea87cd47
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
llama-index-core/llama_index/core/llama_dataset/base.py
+2
-1
2 additions, 1 deletion
llama-index-core/llama_index/core/llama_dataset/base.py
llama-index-core/llama_index/core/llama_dataset/simple.py
+114
-0
114 additions, 0 deletions
llama-index-core/llama_index/core/llama_dataset/simple.py
with
116 additions
and
1 deletion
llama-index-core/llama_index/core/llama_dataset/base.py
+
2
−
1
View file @
c2272b57
...
...
@@ -8,12 +8,13 @@ from typing import Generator, Generic, List, Optional, Type, TypeVar, Union
import
tqdm
from
llama_index.core.async_utils
import
asyncio_module
from
llama_index.core.base.base_query_engine
import
BaseQueryEngine
from
llama_index.core.llms
import
LLM
from
llama_index.core.bridge.pydantic
import
BaseModel
,
Field
,
PrivateAttr
from
llama_index.core.evaluation
import
BaseEvaluator
from
openai
import
RateLimitError
from
pandas
import
DataFrame
as
PandasDataFrame
PredictorType
=
Union
[
BaseQueryEngine
,
BaseEvaluator
]
PredictorType
=
Union
[
BaseQueryEngine
,
BaseEvaluator
,
LLM
]
P
=
TypeVar
(
"
P
"
,
bound
=
PredictorType
)
...
...
This diff is collapsed.
Click to expand it.
llama-index-core/llama_index/core/llama_dataset/simple.py
0 → 100644
+
114
−
0
View file @
c2272b57
from
typing
import
Optional
,
List
from
llama_index.core.llama_dataset.base
import
(
BaseLlamaDataExample
,
BaseLlamaDataset
,
CreatedBy
,
BaseLlamaExamplePrediction
,
BaseLlamaPredictionDataset
,
)
from
llama_index.core.llms
import
LLM
from
llama_index.core.bridge.pydantic
import
Field
from
pandas
import
DataFrame
as
PandasDataFrame
class
SimpleExamplePrediction
(
BaseLlamaExamplePrediction
):
"""
RAG example prediction class.
Args:
response (str): The response generated by the LLM.
contexts (Optional[List[str]]): The retrieved context (text) for generating
response.
"""
label
:
str
=
Field
(
default_factory
=
str
,
description
=
"
The generated (predicted) label that can be compared to a reference (ground-truth) label.
"
,
)
@property
def
class_name
(
self
)
->
str
:
"""
Data example class name.
"""
return
"
SimpleExamplePrediction
"
class
SimplePredictionDataset
(
BaseLlamaPredictionDataset
):
"""
RagDataset class.
"""
_prediction_type
=
SimpleExamplePrediction
def
to_pandas
(
self
)
->
PandasDataFrame
:
"""
Create pandas dataframe.
"""
data
=
{}
if
self
.
predictions
:
data
=
{
"
label
"
:
[
t
.
label
for
t
in
self
.
predictions
],
}
return
PandasDataFrame
(
data
)
@property
def
class_name
(
self
)
->
str
:
"""
Class name.
"""
return
"
SimplePredictionDataset
"
class
LabelledSimpleDataExample
(
BaseLlamaDataExample
):
reference_label
:
str
=
Field
(
default_factory
=
str
,
description
=
"
Class label
"
)
text
:
str
=
Field
(
default_factory
=
str
,
description
=
"
Text body of example
"
)
text_by
:
Optional
[
CreatedBy
]
=
Field
(
default
=
None
,
description
=
"
What generated the query.
"
)
@property
def
class_name
(
self
)
->
str
:
"""
Data example class name.
"""
return
"
LabelledSimpleDataExample
"
class
LabelledSimpleDataset
(
BaseLlamaDataset
[
LLM
]):
_example_type
=
LabelledSimpleDataExample
def
_construct_prediction_dataset
(
self
,
predictions
:
List
[
SimpleExamplePrediction
]
)
->
SimplePredictionDataset
:
"""
Construct the specific prediction dataset.
Args:
predictions (List[BaseLlamaExamplePrediction]): the list of predictions.
Returns:
BaseLlamaPredictionDataset: A dataset of predictions.
"""
return
SimplePredictionDataset
(
predictions
=
predictions
)
def
to_pandas
(
self
)
->
PandasDataFrame
:
"""
Create pandas dataframe.
"""
data
=
{
"
reference_label
"
:
[
t
.
reference_label
for
t
in
self
.
examples
],
"
text
"
:
[
t
.
text
for
t
in
self
.
examples
],
"
text_by
"
:
[
str
(
t
.
text_by
)
for
t
in
self
.
examples
],
}
return
PandasDataFrame
(
data
)
async
def
_apredict_example
(
self
,
predictor
:
LLM
,
example
:
LabelledSimpleDataExample
,
sleep_time_in_seconds
:
int
,
)
->
SimpleExamplePrediction
:
"""
Async predict RAG example with a query engine.
"""
raise
NotImplementedError
(
"
This method has not yet been implemented.
"
)
def
_predict_example
(
self
,
predictor
:
LLM
,
example
:
BaseLlamaDataExample
,
sleep_time_in_seconds
:
int
=
0
,
)
->
BaseLlamaExamplePrediction
:
raise
NotImplementedError
(
"
This method has not yet been implemented.
"
)
@property
def
class_name
(
self
)
->
str
:
"""
Data example class name.
"""
return
"
LabelledSimpleDataset
"
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment