Source code for optim.text_grad.text_loss_with_eval_fn

"""Adapted from text_grad's String Based Function"""

from typing import Callable, Dict, Union, TYPE_CHECKING, Optional
import logging
import json
from adalflow.optim.loss_component import LossComponent
from adalflow.optim.function import BackwardContext


if TYPE_CHECKING:
    from adalflow.core import ModelClient
    from adalflow.core.generator import BackwardEngine
from adalflow.core.types import GeneratorOutput
from adalflow.optim.parameter import Parameter, GradientContext
from adalflow.optim.types import ParameterType

from adalflow.core.prompt_builder import Prompt
from adalflow.eval.base import BaseEvaluator


log = logging.getLogger(__name__)

###  Loss/Score Information  ###
CONVERSATION_TEMPLATE_STRING = r"""
The variable is passed to the eval function and compared with a target/ground truth value.

<EVAL_FUNC_DESCRIPTION>: {{eval_fn_desc}}
<INPUTS>: {{input_str}}
<OUTPUTS/SCORE>: {{response_value}}
{% if metadata %}
Note: {{metadata}}
{% endif %}"""


# Does not have gradient on the output, the loss function of the backpropagation chain
# CONVERSATION_START_INSTRUCTION_STRING_FN_BASE = r"""You will give feedback to a variable with the following role:
# <ROLE> {{variable_desc}} </ROLE>.
# Here is an evaluation of the variable using the eval function:
# {{conversation}}"""

# Has the gradient on the output, the layer in the backpropagation chain
# Conversation will be provided differently.

### Variable Information ###
CONVERSATION_START_INSTRUCTION_STRING_FN = r"""
TARGET VARIABLE:
<NAME>: {{variable_name}}
<ROLE> {{variable_desc}} </ROLE>
<VARIABLE> {{variable_value}} </VARIABLE>
{{conversation_str}}
"""

# Third part of the user prompt
OBJECTIVE_INSTRUCTION_BASE = r"""<OBJECTIVE_FUNCTION>
Your only goal is to clearly states how it obtained the "<OUTPUTS/SCORE>".
Especially when the score is low.
Be CONCISE.
If you have enough context, add a more specific feedback on how it failed.
</OBJECTIVE_FUNCTION>"""


OBJECTIVE_INSTRUCTION_CHAIN = r"""This conversation is part of a larger system. The <INPUTS/SCORE> was later used as "{{response_name}}: {{response_desc}}".
<OBJECTIVE_FUNCTION>
Your only goal is to clearly states how it obtained the "Eval output/score": {{response_gradient}}.
Especially when the score is low.
Be CONCISE.
If you have enough context, add a more specific feedback on how it failed.
</OBJECTIVE_FUNCTION>"""


[docs] class EvalFnToTextLoss(LossComponent): __doc__ = """Convert an evaluation function to a text loss. LossComponent will take an eval function and output a score (usually a float in range [0, 1], and the higher the better, unlike the loss function in model training). In math: score/loss = eval_fn(y_pred, y_gt) The gradident/feedback = d(score)/d(y_pred) will be computed using a backward engine. Gradient_context = GradientContext( context=conversation_str, response_desc=response.role_desc, variable_desc=role_desc, ) Args: eval_fn: The evaluation function that takes a pair of y and y_gt and returns a score. eval_fn_desc: Description of the evaluation function. backward_engine: The backward engine to use for the text prompt optimization. model_client: The model client to use for the backward engine if backward_engine is not provided. model_kwargs: The model kwargs to use for the backward engine if backward_engine is not provided. """ def __init__( self, eval_fn: Union[Callable, BaseEvaluator], eval_fn_desc: str, backward_engine: Optional["BackwardEngine"] = None, model_client: "ModelClient" = None, model_kwargs: Dict[str, object] = None, ): from adalflow.core.generator import BackwardEngine super().__init__() self.eval_fn = eval_fn self.eval_fn_desc = eval_fn_desc self.name = f"{self.__class__.__name__}" self.backward_engine = None if backward_engine is None: log.info( "EvalFnToTextLoss: No backward engine provided. Creating one using model_client and model_kwargs." ) if model_client and model_kwargs: self.set_backward_engine(backward_engine, model_client, model_kwargs) else: if not isinstance(backward_engine, BackwardEngine): raise TypeError( "EvalFnToTextLoss: backward_engine must be an instance of BackwardEngine." ) self.backward_engine = backward_engine # def __call__(self, *args, **kwargs) -> Parameter: # r"""Different from default GradComponent __call__. # Only Parameter as this is not needed in a normal task pipeline, but only # for training generator.""" # return self.forward(*args, **kwargs)
[docs] def forward( self, kwargs: Dict[str, Parameter], response_desc: str = None, metadata: Dict[str, str] = None, # additional notes on the input kwargs ) -> Parameter: if response_desc is None: response_desc = "Output of EvalFnToTextLoss." # validate the type of kwargs predesessors = [] for k, v in kwargs.items(): if not isinstance(v, Parameter): raise TypeError( f"EvalFnToTextLoss: All inputs must be Parameters. Got {type(v)} for {k}." ) if isinstance(v, Parameter): predesessors.append(v) eval_inputs = {} for k, v in kwargs.items(): eval_inputs[k] = v.eval_input score: float = self.eval_fn(**eval_inputs) # Create a parameter # TODO: improve the readability of the input and response eval_param: Parameter = Parameter( name=self.name + "_output", data=score, requires_opt=True, role_desc=response_desc, score=score, param_type=ParameterType.LOSS_OUTPUT, ) eval_param.set_predecessors(predesessors) log.info(f"EvalFnToTextLoss: Input: {kwargs}, Output: {eval_param}") eval_param.set_grad_fn( BackwardContext( backward_fn=self.backward, backward_engine=self.backward_engine, response=eval_param, eval_fn_desc=self.eval_fn_desc, kwargs=kwargs, metadata=metadata, ) ) return eval_param
[docs] def set_backward_engine( self, backward_engine: "BackwardEngine" = None, model_client: "ModelClient" = None, model_kwargs: Dict[str, object] = None, ): from adalflow.core.generator import BackwardEngine self.backward_engine = backward_engine if not backward_engine: log.info( "EvalFnToTextLoss: No backward engine provided. Creating one using model_client and model_kwargs." ) self.backward_engine = BackwardEngine(model_client, model_kwargs) else: if type(backward_engine) is not BackwardEngine: raise TypeError( f"EvalFnToTextLoss: backward_engine must be an instance of BackwardEngine. Got {type(backward_engine)}." )
@staticmethod def _backward_through_one_predecessor( pred: Parameter, inputs_string: str, response: Parameter, eval_fn_desc: str, backward_engine: "BackwardEngine", is_chain: bool = False, metadata: Dict[str, str] = None, ): if not pred.requires_opt: log.debug( f"EvalFnToTextLoss: Skipping {pred} as it does not require optimization." ) return log.debug(f"EvalFnToTextLoss: Backward through {pred}, is_chain: {is_chain}") if pred.check_if_already_computed_gradient_respect_to(response.id): log.info( f"EvalFnToTextLoss: Gradient already computed for {pred.role_desc} with respect to {response.role_desc}" ) # print( # f"Gradient already computed for {pred.role_desc} with respect to {response.role_desc}" # ) return if backward_engine is None: log.error( "EvalFnToTextLoss: backward_engine is required for text prompt optimization." ) raise ValueError( "EvalFnToTextLoss: backward_engine is required for text prompt optimization." ) instruction_str, objective_str = None, None # response information conversation_str = Prompt( CONVERSATION_TEMPLATE_STRING, prompt_kwargs={ "input_str": inputs_string, "eval_fn_desc": eval_fn_desc, "response_value": response.data, "metadata": json.dumps(metadata) if metadata else None, }, )() conv_ins_template = CONVERSATION_START_INSTRUCTION_STRING_FN obj_ins_template = OBJECTIVE_INSTRUCTION_BASE if is_chain: # conv_ins_template = CONVERSATION_START_INSTRUCTION_STRING_FN_CHAIN obj_ins_template = OBJECTIVE_INSTRUCTION_CHAIN instruction_str = Prompt( conv_ins_template, prompt_kwargs={ "variable_desc": pred.role_desc, "variable_name": pred.name, "variable_value": pred.data, "conversation_str": conversation_str, }, )() objective_str = Prompt( obj_ins_template, prompt_kwargs={ "response_name": response.name, "response_desc": response.role_desc, "response_gradient": response.data, }, )() log.info(f"EvalFnToTextLoss: Instruction: {instruction_str}") log.info(f"EvalFnToTextLoss: Objective: {objective_str}") log.info(f"EvalFnToTextLoss: Conversation: {conversation_str}") # Compute the gradient backward_engine_prompt_kwargs = { "conversation_sec": instruction_str, "objective_instruction_sec": objective_str, # "evaluate_variable_instruction_sec": eval_str, } gradient_value: GeneratorOutput = backward_engine( prompt_kwargs=backward_engine_prompt_kwargs ) # gradient_prompt = backward_engine.get_prompt(**backward_engine_prompt_kwargs) gradient_value_data = ( gradient_value.data or backward_engine.failure_message_to_optimizer( gradient_response=gradient_value ) ) # print(f"gradient_prompt: {gradient_prompt}") # gradient_value_data = response.data.to_yaml() log.debug(f"EvalFnToTextLoss: Gradient for {pred}: {gradient_value_data}") # score should be passed to grad gradient_param = Parameter( name=f"{response.name}_to_{pred.name}_grad", data=gradient_value_data, requires_opt=True, # gradient_prompt=gradient_prompt, role_desc=f"Feedback for {pred.role_desc}", score=response.data, from_response_id=response.id, param_type=ParameterType.GRADIENT, ) pred.add_gradient(gradient_param) pred.gradients_context[gradient_param] = GradientContext( context=conversation_str, response_desc=response.role_desc, variable_desc=pred.role_desc, ) # backward the end to end score pred.set_score(response.data) print(f"setting pred name {pred.name} score to {response.data}") # TODO: reduce meta
[docs] def backward( self, response: Parameter, eval_fn_desc: str, kwargs: Dict[str, Parameter], backward_engine: Optional[ "BackwardEngine" ] = None, # only needed for text prompt optimization metadata: Dict[str, str] = None, ): r"""Ensure to set backward_engine for the text prompt optimization. It can be None if you are only doing demo optimization and it will not have gradients but simply backpropagate the score. """ log.info(f"EvalFnToTextLoss: Backward: {response}") children_params = response.predecessors is_chain = True response_gradient_context = response.get_gradient_and_context_text().strip() if response_gradient_context == "": log.info(f"EvalFnToTextLoss: Backward: No gradient found for {response}.") is_chain = False log.info(f"response_gradient_context: {response_gradient_context}") # go through all child parameters if backward_engine and not response.backward_engine_disabled: # Convert all input arguments to string inputs_string = "\n\n".join( [ f"({k}) (role: {v.role_desc}), data: {v.data}, input_to_eval_fn: {v.eval_input}, data_type: {type(v.eval_input)}" for k, v in kwargs.items() ] ) for pred in children_params: if not pred.requires_opt: log.debug( f"EvalFnToTextLoss: Skipping {pred} as it does not require optimization." ) continue self._backward_through_one_predecessor( pred, inputs_string, response, eval_fn_desc, backward_engine, is_chain, metadata, ) # backward for the score for the demo for pred in children_params: if not pred.requires_opt: log.debug( f"EvalFnToTextLoss: Skipping {pred} as it does not require optimization." ) continue pred._score = float(response.data) log.info(f"setting pred name {pred.name} score to {response.data}")
if __name__ == "__main__": # Example of using EvalFnToTextLoss from adalflow.utils import setup_env, get_logger from adalflow.eval.answer_match_acc import AnswerMatchAcc from adalflow.components.model_client import OpenAIClient # dir: model_client from adalflow.core.generator import Generator, BackwardEngine from adalflow.core.component import fun_to_component logger = get_logger(level="DEBUG", filename="lib_text_grad.log") setup_env() gpt_3_model = { "model_client": OpenAIClient(), "model_kwargs": { "model": "gpt-3.5-turbo", }, } gpt_4o_model = { "model_client": OpenAIClient(), "model_kwargs": { "model": "gpt-4o", }, } @fun_to_component def parse_integer_answer(answer: str, only_first_line: bool = False): try: if only_first_line: answer = answer.strip().split("\n")[0] answer = answer.strip() # find the last token that has a number in it answer = [ token for token in answer.split() if any(c.isdigit() for c in token) ][-1] answer = answer.split(".")[0] answer = "".join([c for c in answer if c.isdigit()]) answer = int(answer) except (ValueError, IndexError): answer = 0 return answer evaluator = AnswerMatchAcc() eval_fn_desc = "Answer Match Accuracy" single_compute_eval = AnswerMatchAcc().compute_single_item backward_engine = BackwardEngine(**gpt_4o_model) # backward_engine.set_mock_output(mock_output_data="1") eval_fn_to_text_loss = EvalFnToTextLoss( eval_fn=single_compute_eval, eval_fn_desc=eval_fn_desc, backward_engine=backward_engine, ) x = Parameter( name="x", data="I have a cauliflower, a stalk of celery, a cabbage, and a garlic. How many vegetables do I have?", requires_opt=False, role_desc="The question to the language model", ) system_prompt = Parameter( name="system_prompt", data="You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.", requires_opt=True, role_desc="structured system prompt to a somewhat capable language model that specifies the behavior and strategies for the QA task", ) model = Generator( prompt_kwargs={"task_desc_str": system_prompt}, **gpt_3_model, output_processors=parse_integer_answer, ) # model.set_mock_output(mock_output_data="4") model.train() print(f"model.train: {model.training}") y: Parameter = model(prompt_kwargs={"input_str": x}) print(f"y: {y}") loss = eval_fn_to_text_loss( { "y": y, "y_gt": Parameter( name="y_gt", data="4", requires_opt=False, role_desc="Correct answer", ), } ) print(f"loss: {loss}") loss.backward() print(loss.to_dict()) assert len(loss.predecessors) == 2 assert len(y.predecessors) == 2 dot = loss.draw_graph(add_grads=True, filepath="real_data") # print("dot: ", dot) # Variable(data=1, requires_opt=True, role_desc=Output of the string-based function with purpose: # The runtime of string-based function that checks if the prediction is correct., # predecessors={Variable(data=4, requires_opt=False, role_desc=correct answer for the query, # predecessors=set(), gradients=set()), # Variable(data=To determine the number of vegetables, # we need to count each individual item. # The cauliflower, celery, cabbage, # and garlic are all separate vegetables. # Therefore, you have 4 vegetables in total. # Answer: 4, requires_opt=True, role_desc=response from the language model, # predecessors={Variable(data=I have a cauliflower, a stalk of celery, a cabbage, and a garlic. How many vegetables do I have?, requires_opt=False, role_desc=query to the language model, predecessors=set(), gradients=set()), Variable(data=You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value., requires_opt=True, role_desc=structured system prompt to a somewhat capable language model that specifies the behavior and strategies for the QA task, predecessors=set(), gradients=set())}, gradients=set())}, gradients=set()) # loss: Parameter(alias=None, data=1.0, requires_opt=True, role_desc=Output of EvalFnToTextLoss with eval_fn_desc: Answer Match Accuracy, # predecessors={Parameter(alias=None, data=1, requires_opt=False, role_desc=Predicted answer, predecessors=set(), gradients=set()), # Parameter(alias=None, data=1, requires_opt=False, role_desc=Correct answer, predecessors=set(), gradients=set())}, gradients=set()) # {'alias': None, 'data': 1.0, 'requires_opt': True, 'role_desc': 'Output of EvalFnToTextLoss with eval_fn_desc: Answer Match Accuracy', 'predecessors': [{'alias': None, 'data': '1', 'requires_opt': False, 'role_desc': 'Predicted answer', 'predecessors': [], 'gradients': [], 'proposed_data': None, 'gradients_context': [], 'grad_fn': 'None'}, {'alias': None, 'data': '1', 'requires_opt': False, 'role_desc': 'Correct answer', 'predecessors': [], 'gradients': [], 'proposed_data': None, 'gradients_context': [], 'grad_fn': 'None'}], 'gradients': [], 'proposed_data': None, 'gradients_context': [], 'grad_fn': '__main__.EvalFnToTextLoss.backward'}