"""Extract and convert common string to Python objects.From simple data types like boolean, integer, and float to more complex data types like JSON, YAML, and list strings."""fromtypingimportDict,List,Unionimportloggingfromadalflow.core.componentimportDataComponentimportadalflow.core.functionalasFlog=logging.getLogger(__name__)BOOLEAN_PARSER_OUTPUT_TYPE=bool# class Parser:# __doc__ = r"""Base class for all string parsers."""# def __init__(self):# super().__init__()# def __call__(self, input: str) -> object:# return self.call(input)# def call(self, input: str) -> object:# raise NotImplementedError(# "Parser subclasses must implement the __call__ method"# )
[docs]classBooleanParser(DataComponent):__doc__=r"""Extracts boolean values from text. Examples: .. code-block:: python boolean_parser = BooleanParser() test_input_1 = "True" # or "true" or "...true..." print(boolean_parser(test_input_1)) # Expected to extract True """def__init__(self,*args,**kwargs):super().__init__()
[docs]classIntParser(DataComponent):__doc__=r"""Extracts integer values from text. Returns: int: Extracted integer value. Raises: ValueError: If the input text does not contain an integer Examples: .. code-block:: python int_parser = IntParser() test_input_2 = "123" # or "...123..." print(int_parser(test_input_2)) # Expected to extract 123 """def__init__(self):super().__init__()
[docs]classFloatParser(DataComponent):__doc__=r"""Extracts float values from text. Returns: float: Extracted float value. Raises: ValueError: If the input text does not contain a float Examples: .. code-block:: python float_parser = FloatParser() test_input_3 = "123.45" # or "...123.45..." print(float_parser(test_input_3)) # Expected to extract 123.45 """def__init__(self):super().__init__()
[docs]classListParser(DataComponent):__doc__=r"""Extracts list `[...]` strings from text and parses them into a list object. Args: add_missing_right_bracket (bool, optional): Add a missing right bracket to the list string. Defaults to True. Returns: List[object]: Extracted list object. Raises: ValueError: If the input text does not contain a list Examples: .. code-block:: python list_parser = ListParser() test_input_4 = 'Some random text before ["item1", "item2"] and more after' print(list_parser(test_input_4)) # Expected to extract ["item1", "item2"] """def__init__(self,add_missing_right_bracket:bool=True):super().__init__()self.add_missing_right_bracket=add_missing_right_bracket
[docs]defcall(self,input:str)->LIST_PARSER_OUTPUT_TYPE:input=input.strip()list_str=None# Extract list stringtry:list_str=F.extract_list_str(input,self.add_missing_right_bracket)exceptExceptionase:raiseValueError(f"Error at extracting list string: {e}")# Parse list string with json.loads and yaml.safe_loadtry:list_obj=F.parse_json_str_to_obj(list_str)returnlist_objexceptExceptionase:log.error(f"Error at parsing list string with json.loads: {e}")raiseValueError(f"Error: {e}")
[docs]classJsonParser(DataComponent):__doc__=r"""Extracts JSON strings `{...}` or `[...]` from text and parses them into a JSON object. It can output either a dictionary or a list as they are both valid JSON objects. Args: add_missing_right_brace (bool, optional): Add a missing right brace to the JSON string. Defaults to True. Returns: Union[Dict[str, object], List[object]]: Extracted JSON object. Raises: ValueError: If the input text does not contain a JSON object Examples: .. code-block:: python json_parser = JsonParser() json_str = "```json\n{\"key\": \"value\"}\n```" json_obj = json_parser(json_str) print(json_obj) # Expected to extract {"key": "value"} """def__init__(self,add_missing_right_brace:bool=True):super().__init__()self.add_missing_right_brace=add_missing_right_brace
[docs]defcall(self,input:str)->JSON_PARSER_OUTPUT_TYPE:input=input.strip()# Extract JSON stringjson_str=Nonetry:json_str=F.extract_json_str(input,self.add_missing_right_brace)log.debug(f"json_str: {json_str}")exceptExceptionase:raiseValueError(f"Error: {e}")# Parse JSON string with json.loads and yaml.safe_loadtry:json_obj=F.parse_json_str_to_obj(json_str)log.debug(f"json_obj: {json_obj}")returnjson_objexceptExceptionase:log.error(f"Error at parsing JSON string: {e}")raiseValueError(f"Error: {e}")
YAML_PARSER_OUTPUT_TYPE=JSON_PARSER_OUTPUT_TYPE# TODO: yaml parser needs to be more robust, currently json works way better than yaml
[docs]classYamlParser(DataComponent):__doc__=r"""To extract YAML strings from text and parse them into a YAML object. Returns: JSON_PARSER_OUTPUT_TYPE: Extracted YAML object. Raises: ValueError: If the input text does not contain a YAML object Examples: .. code-block:: python yaml_parser = YamlParser() yaml_str = "```yaml\nkey: value\n```" yaml_obj = yaml_parser(yaml_str) print(yaml_obj) # Expected to extract {"key": "value"} """def__init__(self):super().__init__()
[docs]defcall(self,input:str)->YAML_PARSER_OUTPUT_TYPE:input=input.strip()# parse YAML string with yaml.safe_loadtry:yaml_str=F.extract_yaml_str(input)yaml_obj=F.parse_yaml_str_to_obj(yaml_str)returnyaml_objexceptExceptionase:raiseValueError(f"Error: {e}")