Source code for datasets.types

import uuid
from dataclasses import dataclass, field
from adalflow.core.base_data_class import DataClass


[docs] @dataclass class BaseData(DataClass): __doc__ = """A common dataclass for representing examples in a dataset.""" id: str = field( metadata={"desc": "The unique identifier of the example", "type": "id"}, default=str(uuid.uuid4()), )
[docs] @dataclass class Example(DataClass): __doc__ = """A common dataclass for representing examples in a dataset.""" id: str = field( metadata={"desc": "The unique identifier of the example"}, default=str(uuid.uuid4()), ) question: str = field( metadata={"desc": "The question to be answered"}, default=None ) answer: str = field(metadata={"desc": "The answer to the question"}, default=None)
[docs] @dataclass class HotPotQAData(Example): __doc__ = """A dataclass for representing examples in the HotPotQA dataset.""" gold_titles: set = field( metadata={"desc": "The set of titles that support the answer"}, default=None, )
[docs] @dataclass class TrecData(BaseData): __doc__ = """A dataclass for representing examples in the TREC dataset.""" question: str = field( metadata={"desc": "The question to be classified"}, default=None, ) class_name: str = field( metadata={"desc": "One of {ABBR, ENTY, DESC, HUM, LOC, NUM}"}, default=None, ) class_index: int = field( metadata={"desc": "The class label, in range [0, 5]"}, default=-1, ) __input_fields__ = ["question"] # follow this order too. __output_fields__ = ["class_name", "class_index"]