"""The most commonly used output parsers for the Generator.
Includes:
- YamlOutputParser: YAML output parser using dataclass for schema extraction.
- JsonOutputParser: JSON output parser using dataclass for schema extraction.
- ListOutputParser: List output parser to parse list of objects from the string.
- BooleanOutputParser: Boolean output parser to parse boolean values from the string.
"""
from dataclasses import is_dataclass
from typing import Dict, Any, Optional, List
import logging
from adalflow.core.component import Component
from adalflow.core.prompt_builder import Prompt
from adalflow.core.string_parser import YamlParser, ListParser, JsonParser
from adalflow.core.base_data_class import DataClass, DataClassFormatType
from adalflow.core.base_data_class import ExcludeType, IncludeType
__all__ = [
"OutputParser",
"YamlOutputParser",
"JsonOutputParser",
"ListOutputParser",
"BooleanOutputParser",
]
log = logging.getLogger(__name__)
# TODO: delete examples here
JSON_OUTPUT_FORMAT = r"""Your output should be formatted as a standard JSON instance with the following schema:
```
{{schema}}
```
{% if example %}
Examples:
```
{{example}}
```
{% endif %}
-Make sure to always enclose the JSON output in triple backticks (```). Please do not add anything other than valid JSON output!
-Use double quotes for the keys and string values.
-DO NOT mistaken the "properties" and "type" in the schema as the actual fields in the JSON output.
-Follow the JSON formatting conventions."""
YAML_OUTPUT_FORMAT = r"""Your output should be formatted as a standard YAML instance with the following schema:
```
{{schema}}
```
{% if example %}
Examples:
```
{{example}}
```
{% endif %}
-Make sure to always enclose the YAML output in triple backticks (```). Please do not add anything other than valid YAML output!
-Follow the YAML formatting conventions with an indent of 2 spaces.
-DO NOT mistaken the "properties" and "type" in the schema as the actual fields in the YAML output.
-Quote the string values properly."""
LIST_OUTPUT_FORMAT = r"""Your output should be formatted as a standard Python list.
- Start the list with '[' and end with ']'
- DO NOT mistaken the "properties" and "type" in the schema as the actual fields in the list output.
"""
YAML_OUTPUT_PARSER_OUTPUT_TYPE = Dict[str, Any]
[docs]
class OutputParser(Component):
__doc__ = r"""The abstract class for all output parsers.
This interface helps users customize output parsers with consistent interfaces for the Generator.
Even though you don't always need to subclass it.
LightRAG uses two core components:
1. the Prompt to format output instruction
2. A string parser component from core.string_parser for response parsing.
"""
def __init__(self, *args, **kwargs) -> None:
super().__init__()
pass
[docs]
def call(self, input: str) -> Any:
r"""Parse the output string to the desired format and return the parsed output."""
raise NotImplementedError("This is an abstract method.")
[docs]
class YamlOutputParser(OutputParser):
__doc__ = r"""YAML output parser using dataclass for schema extraction.
.. note::
Only use yaml for simple dataclass objects. For complex objects, use JSON.
Args:
data_class (Type): The dataclass to extract the schema for the YAML output.
example (Type, optional): The example dataclass object to show in the prompt. Defaults to None.
yaml_output_format_template (str, optional): The template for the YAML output format. Defaults to YAML_OUTPUT_FORMAT.
output_processors (Component, optional): The output processors to parse the YAML string to JSON object. Defaults to YamlParser().
Examples:
>>> from prompts.outputs import YamlOutputParser
>>> from dataclasses import dataclass, field
>>> from typing import List
>>>
>>> @dataclass
>>> class ThoughtAction:
>>> thought: str = field(metadata={"description": "Reasoning behind the answer"}) # required field
>>> answer: str = field(metadata={"description": "Your answer to the question"}, default=None) # optional field
>>>
>>> # If you want to parse it back to the dataclass, you can add a from_dict method to the dataclass
>>> # def from_dict(self, d: Dict[str, Any]) -> "ThoughtAction":
>>> # return ThoughtAction(**d)
>>>
>>> yaml_parser = YamlOutputParser(data_class_for_yaml=ThoughtAction)
>>> yaml_format_instructions = yaml_parser.format_instructions()
>>> print(yaml_format_instructions)
>>> yaml_str = '''The output should be formatted as a standard YAML instance with the following JSON schema:
>>> ```
>>> 'thought': {'type': 'str', 'description': 'Reasoning behind the answer', 'required': True}, 'answer': {'type': 'str', 'description': '
>>> Your answer to the question', 'required': False, 'default': None}
>>> ```
>>> -Make sure to always enclose the YAML output in triple backticks (```). Please do not add anything other than valid YAML output!
>>> -Follow the YAML formatting conventions with an indent of 2 spaces.
>>> '''
>>> # use it in the generator
>>> task_desc_str = "You are a helpful assistant who answers user query. "+yaml_format_instructions
>>> generator = Generator(output_processors=yaml_parser, ..., preset_prompt_kwargs={"task_desc_str": task_desc_str})
>>> generator("Should i be a doctor?")
"""
def __init__(
self,
data_class: DataClass,
examples: List[DataClass] = None,
include_fields: IncludeType = None,
exclude_fields: ExcludeType = None,
return_data_class: bool = False,
):
super().__init__()
if not is_dataclass(data_class):
raise TypeError(f"Provided class is not a dataclass: {data_class}")
if not issubclass(data_class, DataClass):
raise TypeError(
f"Provided class is not a subclass of DataClass: {data_class}"
)
# ensure example is instance of data class and initiated
if examples is not None and not isinstance(examples[0], data_class):
raise TypeError(
f"Provided example is not an instance of the data class: {data_class}"
)
self._return_data_class = return_data_class
self._exclude_fields = exclude_fields
self._include_fields = include_fields
self.data_class: DataClass = data_class
self.output_format_prompt = Prompt(template=YAML_OUTPUT_FORMAT)
self.output_processors = YamlParser()
self.examples = examples
[docs]
def call(self, input: str) -> YAML_OUTPUT_PARSER_OUTPUT_TYPE:
r"""Parse the YAML string to JSON object and return the JSON object."""
try:
output_dict = self.output_processors(input)
if self._return_data_class:
return self.data_class.from_dict(output_dict)
return output_dict
except Exception as e:
log.error(f"Error in parsing YAML to JSON: {e}")
raise e
def _extra_repr(self) -> str:
s = f"data_class={self.data_class.__name__}, examples={self.examples}, exclude_fields={self._exclude_fields}, \
include_fields={self._include_fields},\return_data_class={self._return_data_class}"
return s
[docs]
class JsonOutputParser(OutputParser):
def __init__(
self,
data_class: DataClass,
examples: List[DataClass] = None,
include_fields: IncludeType = None,
exclude_fields: ExcludeType = None,
return_data_class: bool = False,
):
super().__init__()
if not is_dataclass(data_class):
raise TypeError(f"Provided class is not a dataclass: {data_class}")
if not issubclass(data_class, DataClass):
raise TypeError(
f"Provided class is not a subclass of DataClass: {data_class}"
)
if examples is not None and not isinstance(examples[0], data_class):
raise TypeError(
f"Provided example is not an instance of the data class: {data_class}"
)
self._return_data_class = return_data_class
self._exclude_fields = exclude_fields
self._include_fields = include_fields
template = JSON_OUTPUT_FORMAT
self.data_class: DataClass = data_class
self.output_format_prompt = Prompt(template=template)
self.output_processors = JsonParser()
self.examples = examples
[docs]
def call(self, input: str) -> Any:
try:
output_dict = self.output_processors(input)
log.debug(f"{__class__.__name__} output_dict: {output_dict}")
except Exception as e:
log.error(f"Error in parsing JSON to JSON: {e}")
raise e
try:
if self._return_data_class:
return self.data_class.from_dict(output_dict)
return output_dict
except Exception as e:
log.error(f"Error in converting dict to data class: {e}")
raise e
def _extra_repr(self) -> str:
s = f"""data_class={self.data_class.__name__}, examples={self.examples}, exclude_fields={self._exclude_fields}, \
include_fields={self._include_fields}, return_data_class={self._return_data_class}"""
return s
[docs]
class ListOutputParser(OutputParser):
__doc__ = r"""List output parser to parse list of objects from the string."""
def __init__(self, list_output_format_template: str = LIST_OUTPUT_FORMAT):
super().__init__()
self.list_output_format_prompt = Prompt(template=list_output_format_template)
self.output_processors = ListParser()
[docs]
def call(self, input: str) -> list:
return self.output_processors(input)
def _parse_boolean_from_str(input: str) -> Optional[bool]:
input = input.strip()
if "true" in input.lower():
return True
elif "false" in input.lower():
return False
else:
return None
[docs]
class BooleanOutputParser(OutputParser):
__doc__ = r"""Boolean output parser to parse boolean values from the string."""
def __init__(self):
super().__init__()
self.output_processors = None
[docs]
def call(self, input: str) -> bool:
input = input.strip()
output = None
# evaluate the expression to get the boolean value
try:
output = eval(input)
if isinstance(output, bool):
return output
# go to string parsing
output = _parse_boolean_from_str(input)
return output
except Exception as e:
# try to do regex matching for boolean values
log.info(f"Error: {e}")
output = _parse_boolean_from_str(input)
return output