Source code for components.output_parsers.dataclass_parser
"""DataClassParser will help users interact with LLMs even better than JsonOutputParser and YamlOutputParser with DataClass."""fromdataclassesimportis_dataclassfromtypingimportAny,Literal,List,Optionalimportloggingfromadalflow.core.prompt_builderimportPromptfromadalflow.core.componentimportDataComponentfromadalflow.core.string_parserimportYamlParser,JsonParserfromadalflow.core.base_data_classimportDataClass,DataClassFormatTypefromadalflow.core.base_data_classimportExcludeType,IncludeType__all__=["DataClassParser"]log=logging.getLogger(__name__)JSON_OUTPUT_FORMAT=r"""Your output should be formatted as a standard JSON instance with the following schema:```{{schema}}```-Make sure to always enclose the JSON output in triple backticks (```). Please do not add anything other than valid JSON output!-Use double quotes for the keys and string values.-DO NOT mistaken the "properties" and "type" in the schema as the actual fields in the JSON output.-Follow the JSON formatting conventions."""YAML_OUTPUT_FORMAT=r"""Your output should be formatted as a standard YAML instance with the following schema:```{{schema}}```-Make sure to always enclose the YAML output in triple backticks (```). Please do not add anything other than valid YAML output!-Follow the YAML formatting conventions with an indent of 2 spaces.-DO NOT mistaken the "properties" and "type" in the schema as the actual fields in the YAML output.-Quote the string values properly."""EXAMPLES_FORMAT=r"""{% if examples %}{% for example in examples %}{{example}}__________{% endfor %}{% endif %}"""
[docs]classDataClassParser(DataComponent):__doc__=r"""Made the structured output even simpler compared with JsonOutputParser and YamlOutputParser. 1. Understands __input_fields__ and __output_fields__ from the DataClass (no need to use include/exclude to decide fields). 2. User can choose to save the `task_desc` in the DataClass and use it in the prompt. Example: .. code-block:: python @dataclass class BasicQAOutput(adal.DataClass): explanation: str = field( metadata={"desc": "A brief explanation of the concept in one sentence."} ) example: str = field( metadata={"desc": "An example of the concept in a sentence."} ) # Control output fields order __output_fields__ = ["explanation", "example"] # Define the template using jinja2 syntax qa_template = "<SYS> You are a helpful assistant. <OUTPUT_FORMAT> {{output_format_str}} </OUTPUT_FORMAT> </SYS> <USER> {{input_str}} </USER>" parser = adal.DataClassParser(data_class=BasicQAOutput, return_data_class=True) # Set up the generator with model, template, and parser self.generator = adal.Generator( model_client=model_client, model_kwargs=model_kwargs, template=qa_template, prompt_kwargs={"output_format_str": parser.get_output_format_str()}, output_processors=parser, ) """def__init__(self,data_class:DataClass,return_data_class:bool=False,format_type:Literal["yaml","json"]="json",):super().__init__()ifnotis_dataclass(data_class):raiseValueError("data_class must be a dataclass.")ifnotissubclass(data_class,DataClass):raiseValueError("data_class must be a subclass of DataClass.")self._return_data_class=return_data_classself._input_fields=data_class.get_input_fields()self._output_fields=data_class.get_output_fields()ifformat_typenotin["yaml","json"]:raiseValueError("Invalid format type.")self._format_type=format_typeself._data_class:DataClass=data_classself._output_processor=YamlParser()ifformat_type=="yaml"elseJsonParser()self.output_format_prompt=(Prompt(template=YAML_OUTPUT_FORMAT)ifformat_type=="yaml"elsePrompt(template=JSON_OUTPUT_FORMAT))
[docs]defget_input_format_str(self)->str:r"""Return the formatted instructions to use in prompt for the input format."""ifself._format_type=="yaml":returnself._data_class.to_yaml_signature(include=self._input_fields)else:returnself._data_class.to_json_signature(include=self._input_fields)
[docs]defget_output_format_str(self)->str:r"""Return the formatted instructions to use in prompt for the output format."""output_format_str=Noneifself._format_type=="yaml":schema=self._data_class.to_yaml_signature(include=self._output_fields)output_format_str=Prompt(template=YAML_OUTPUT_FORMAT)(schema=schema)else:schema=self._data_class.to_json_signature(include=self._output_fields)output_format_str=Prompt(template=JSON_OUTPUT_FORMAT)(schema=schema)returnoutput_format_str
[docs]defget_input_str(self,input:DataClass)->str:r"""Return the formatted input string."""ifnotisinstance(input,self._data_class):raiseValueError("input must be an instance of the data_class.")ifself._format_type=="yaml":returninput.to_yaml(include=self._input_fields)else:returninput.to_json(include=self._input_fields)
[docs]defget_task_desc_str(self)->str:r"""Return the task description string."""returnself._data_class.get_task_desc()
[docs]defget_examples_str(self,examples:List[DataClass],include:Optional[IncludeType]=None,exclude:Optional[ExcludeType]=None,)->str:r"""Return the examples string."""str_examples=[]ifexamplesandlen(examples)>0:forexampleinexamples:per_example_str=example.format_example_str(format_type=(DataClassFormatType.EXAMPLE_YAMLifself._format_type=="yaml"elseDataClassFormatType.EXAMPLE_JSON),exclude=exclude,include=include,)str_examples.append(per_example_str)examples_str=Prompt(template=EXAMPLES_FORMAT)(examples=str_examples)returnexamples_str
[docs]defcall(self,input:str)->Any:r"""Parse the output string to the desired format and return the parsed output."""try:output_dict=self._output_processor(input)ifself._return_data_class:returnself._data_class.from_dict(output_dict)returnoutput_dictexceptExceptionase:log.error(f"Error at parsing output: {e}")raiseValueError(f"Error: {e}")