Source code for components.output_parsers.outputs

"""The most commonly used output parsers for the Generator.

Includes:
- YamlOutputParser: YAML output parser using dataclass for schema extraction.
- JsonOutputParser: JSON output parser using dataclass for schema extraction.
- ListOutputParser: List output parser to parse list of objects from the string.
- BooleanOutputParser: Boolean output parser to parse boolean values from the string.
"""

from dataclasses import is_dataclass
from typing import Dict, Any, Optional, List
import logging

from adalflow.core.component import DataComponent
from adalflow.core.prompt_builder import Prompt
from adalflow.core.string_parser import YamlParser, ListParser, JsonParser
from adalflow.core.base_data_class import DataClass, DataClassFormatType
from adalflow.core.base_data_class import ExcludeType, IncludeType


__all__ = [
    "OutputParser",
    "YamlOutputParser",
    "JsonOutputParser",
    "ListOutputParser",
    "BooleanOutputParser",
]

log = logging.getLogger(__name__)

# TODO: delete examples here
JSON_OUTPUT_FORMAT = r"""Your output should be formatted as a standard JSON instance with the following schema:
```
{{schema}}
```
{% if example %}
Examples:
```
{{example}}
```
{% endif %}
- Output ONLY valid JSON without any markdown formatting or backticks
- Use double quotes for all keys and string values
- Ensure proper JSON syntax with correct comma placement
- Do not include any text before or after the JSON object
- When including string values with newlines, use \\n instead of actual line breaks
- Properly escape special characters: use \\" for quotes, \\\\ for backslashes
- For multiline strings, keep them on a single line with \\n characters
"""

YAML_OUTPUT_FORMAT = r"""Your output should be formatted as a standard YAML instance with the following schema:
```
{{schema}}
```
{% if example %}
Examples:
```
{{example}}
```
{% endif %}

-Make sure to always enclose the YAML output in triple backticks (```). Please do not add anything other than valid YAML output!
-Follow the YAML formatting conventions with an indent of 2 spaces.
-DO NOT mistaken the "properties" and "type" in the schema as the actual fields in the YAML output.
-Quote the string values properly."""

LIST_OUTPUT_FORMAT = r"""Your output should be formatted as a standard Python list.
- Start the list with '[' and end with ']'
- DO NOT mistaken the "properties" and "type" in the schema as the actual fields in the list output.
"""


YAML_OUTPUT_PARSER_OUTPUT_TYPE = Dict[str, Any]



[docs]
class OutputParser(DataComponent):
    __doc__ = r"""The abstract class for all output parsers.

    On top of the basic string Parser, it handles structured data interaction:
    1. format_instructions: Return the formatted instructions to use in prompt for the output format.
    2. call: Parse the output string to the desired format and return the parsed output via yaml or json.

    This interface helps users customize output parsers with consistent interfaces for the Generator.
    Even though you don't always need to subclass it.

    AdalFlow uses two core classes:
    1. the Prompt to format output instruction
    2. A string parser from core.string_parser for response parsing.
    """

    def __init__(self, *args, **kwargs) -> None:
        
        super().__init__()
        pass


[docs]
    def format_instructions(self) -> str:
        r"""Return the formatted instructions to use in prompt for the output format."""
        raise NotImplementedError("This is an abstract method.")



[docs]
    def call(self, input: str) -> Any:
        r"""Parse the output string to the desired format and return the parsed output."""
        raise NotImplementedError("This is an abstract method.")





[docs]
class YamlOutputParser(OutputParser):
    __doc__ = r"""YAML output parser using dataclass for schema extraction.

    .. note::
        Only use yaml for simple dataclass objects. For complex objects, use JSON.

    Args:
        data_class (Type): The dataclass to extract the schema for the YAML output.
        example (Type, optional): The example dataclass object to show in the prompt. Defaults to None.
        yaml_output_format_template (str, optional): The template for the YAML output format. Defaults to YAML_OUTPUT_FORMAT.
        output_processors (Component, optional): The output processors to parse the YAML string to JSON object. Defaults to YamlParser().

    Examples:

    >>> from prompts.outputs import YamlOutputParser
    >>> from dataclasses import dataclass, field
    >>> from typing import List
    >>>
    >>> @dataclass
    >>> class ThoughtAction:
    >>>     thought: str = field(metadata={"description": "Reasoning behind the answer"}) # required field
    >>>     answer: str = field(metadata={"description": "Your answer to the question"}, default=None) # optional field
    >>>
    >>> # If you want to parse it back to the dataclass, you can add a from_dict method to the dataclass
    >>> # def from_dict(self, d: Dict[str, Any]) -> "ThoughtAction":
    >>> #     return ThoughtAction(**d)
    >>>
    >>> yaml_parser = YamlOutputParser(data_class_for_yaml=ThoughtAction)
    >>> yaml_format_instructions = yaml_parser.format_instructions()
    >>> print(yaml_format_instructions)
    >>> yaml_str = '''The output should be formatted as a standard YAML instance with the following JSON schema:
    >>> ```
    >>> 'thought': {'type': 'str', 'description': 'Reasoning behind the answer', 'required': True}, 'answer': {'type': 'str', 'description': '
    >>> Your answer to the question', 'required': False, 'default': None}
    >>> ```
    >>> -Make sure to always enclose the YAML output in triple backticks (```). Please do not add anything other than valid YAML output!
    >>> -Follow the YAML formatting conventions with an indent of 2 spaces.
    >>> '''
    >>> # use it in the generator
    >>> task_desc_str = "You are a helpful assistant who answers user query. "+yaml_format_instructions
    >>> generator = Generator(output_processors=yaml_parser, ..., preset_prompt_kwargs={"task_desc_str": task_desc_str})
    >>> generator("Should i be a doctor?")
    """

    def __init__(
        self,
        data_class: DataClass,
        examples: List[DataClass] = None,
        include_fields: IncludeType = None,
        exclude_fields: ExcludeType = None,
        return_data_class: bool = False,
    ):

        super().__init__()
        if not is_dataclass(data_class):
            raise TypeError(f"Provided class is not a dataclass: {data_class}")

        if not issubclass(data_class, DataClass):
            raise TypeError(
                f"Provided class is not a subclass of DataClass: {data_class}"
            )

        # ensure example is instance of data class and initiated
        if examples is not None and not isinstance(examples[0], data_class):
            raise TypeError(
                f"Provided example is not an instance of the data class: {data_class}"
            )
        self._return_data_class = return_data_class
        self._exclude_fields = exclude_fields
        self._include_fields = include_fields
        self.data_class: DataClass = data_class
        self.output_format_prompt = Prompt(template=YAML_OUTPUT_FORMAT)
        self.output_processors = YamlParser()
        self.examples = examples


[docs]
    def format_instructions(
        self,
        format_type: Optional[DataClassFormatType] = None,
    ) -> str:
        r"""Return the formatted instructions to use in prompt for the YAML output format.

        Args:
            format_type (DataClassFormatType, optional): The format type to show in the prompt.
                Defaults to DataClassFormatType.SIGNATURE_YAML for less token usage.
                Options: DataClassFormatType.SIGNATURE_YAML, DataClassFormatType.SIGNATURE_JSON, DataClassFormatType.SCHEMA.
            exclude (List[str], optional): The fields to exclude from the schema of the data class.
        """
        format_type = format_type or DataClassFormatType.SIGNATURE_YAML
        schema = self.data_class.format_class_str(
            format_type=format_type,
            exclude=self._exclude_fields,
            include=self._include_fields,
        )
        # convert example to string, convert data class to yaml string
        example_str = ""
        try:
            if self.examples and len(self.examples) > 0:
                for example in self.examples:
                    per_example_str = example.format_example_str(
                        format_type=DataClassFormatType.EXAMPLE_YAML,
                        exclude=self._exclude_fields,
                        include=self._include_fields,
                    )
                    example_str += f"{per_example_str}\n________\n"
                # remove the last new line
                example_str = example_str[:-1]
                log.debug(f"{__class__.__name__} example_str: {example_str}")

        except Exception as e:
            log.error(f"Error in formatting example for {__class__.__name__}, {e}")
            example_str = None

        return self.output_format_prompt(schema=schema, example=example_str)



[docs]
    def call(self, input: str) -> YAML_OUTPUT_PARSER_OUTPUT_TYPE:
        r"""Parse the YAML string to JSON object and return the JSON object."""
        try:
            output_dict = self.output_processors(input)
            if self._return_data_class:
                return self.data_class.from_dict(output_dict)
            return output_dict
        except Exception as e:
            log.error(f"Error in parsing YAML to JSON: {e}")
            raise e


    def _extra_repr(self) -> str:
        s = f"data_class={self.data_class.__name__}, examples={self.examples}, exclude_fields={self._exclude_fields}, \
        include_fields={self._include_fields},\return_data_class={self._return_data_class}"
        return s




[docs]
class JsonOutputParser(OutputParser):
    def __init__(
        self,
        data_class: DataClass,
        examples: List[DataClass] = None,
        include_fields: IncludeType = None,
        exclude_fields: ExcludeType = None,
        return_data_class: bool = False,
    ):
        super().__init__()
        if not is_dataclass(data_class):
            raise TypeError(f"Provided class is not a dataclass: {data_class}")

        if not issubclass(data_class, DataClass):
            raise TypeError(
                f"Provided class is not a subclass of DataClass: {data_class}"
            )

        if (
            examples is not None
            and len(examples) > 0
            and not isinstance(examples[0], data_class)
        ):
            raise TypeError(
                f"Provided example is not an instance of the data class: {data_class}"
            )
        self._return_data_class = return_data_class
        self._exclude_fields = exclude_fields
        self._include_fields = include_fields
        template = JSON_OUTPUT_FORMAT
        self.data_class: DataClass = data_class
        self.output_format_prompt = Prompt(template=template)
        self.output_processors = JsonParser()
        self.examples = examples


[docs]
    def format_instructions(
        self,
        format_type: Optional[DataClassFormatType] = None,
    ) -> str:
        r"""Return the formatted instructions to use in prompt for the JSON output format.

        Args:
            format_type (DataClassFormatType, optional): The format type to show in the prompt.
                Defaults to DataClassFormatType.SIGNATURE_JSON for less token usage compared with DataClassFormatType.SCHEMA.
                Options: DataClassFormatType.SIGNATURE_YAML, DataClassFormatType.SIGNATURE_JSON, DataClassFormatType.SCHEMA.
        """
        format_type = format_type or DataClassFormatType.SIGNATURE_JSON
        schema = self.data_class.format_class_str(
            format_type=format_type,
            exclude=self._exclude_fields,
            include=self._include_fields,
        )
        example_str = ""
        try:
            if self.examples and len(self.examples) > 0:
                for example in self.examples:
                    per_example_str = example.format_example_str(
                        format_type=DataClassFormatType.EXAMPLE_JSON,
                        exclude=self._exclude_fields,
                        include=self._include_fields,
                    )
                    example_str += f"{per_example_str}\n________\n"
                # remove the last new line
                example_str = example_str[:-1]
                log.debug(f"{__class__.__name__} example_str: {example_str}")

        except Exception as e:
            log.error(f"Error in formatting example for {__class__.__name__}, {e}")
            example_str = None
        return self.output_format_prompt(schema=schema, example=example_str)



[docs]
    def call(self, input: str) -> Any:
        try:
            output_dict = self.output_processors(input)
            log.debug(f"{__class__.__name__} output_dict: {output_dict}")

        except Exception as e:
            log.error(f"Error in parsing JSON to JSON: {e}")
            raise e
        try:
            if self._return_data_class:
                return self.data_class.from_dict(output_dict)
            return output_dict
        except Exception as e:
            log.error(f"Error in converting dict to data class: {e}")
            raise e


    def _extra_repr(self) -> str:
        s = f"""data_class={self.data_class.__name__}, examples={self.examples}, exclude_fields={self._exclude_fields}, \
            include_fields={self._include_fields}, return_data_class={self._return_data_class}"""
        return s




[docs]
class ListOutputParser(OutputParser):
    __doc__ = r"""List output parser to parse list of objects from the string."""

    def __init__(self, list_output_format_template: str = LIST_OUTPUT_FORMAT):
        super().__init__()
        self.list_output_format_prompt = Prompt(template=list_output_format_template)
        self.output_processors = ListParser()


[docs]
    def format_instructions(self) -> str:
        return self.list_output_format_prompt()



[docs]
    def call(self, input: str) -> list:
        return self.output_processors(input)




def _parse_boolean_from_str(input: str) -> Optional[bool]:
    input = input.strip()
    if "true" in input.lower():
        return True
    elif "false" in input.lower():
        return False
    else:
        return None



[docs]
class BooleanOutputParser(OutputParser):
    __doc__ = r"""Boolean output parser to parse boolean values from the string."""

    def __init__(self):
        super().__init__()
        self.output_processors = None


[docs]
    def format_instructions(self) -> str:
        return "The output should be a boolean value. True or False."



[docs]
    def call(self, input: str) -> bool:

        input = input.strip()
        output = None
        # evaluate the expression to get the boolean value
        try:
            output = eval(input)
            if isinstance(output, bool):
                return output
            # go to string parsing
            output = _parse_boolean_from_str(input)
            return output
        except Exception as e:
            # try to do regex matching for boolean values
            log.info(f"Error: {e}")
            output = _parse_boolean_from_str(input)
            return output