Source code for optim.text_grad.text_loss_with_eval_fn

"""Adapted from text_grad's String Based Function"""

from typing import Callable, Dict, Union, TYPE_CHECKING, Optional
import logging
import json
from adalflow.optim.loss_component import LossComponent
from adalflow.optim.function import BackwardContext


if TYPE_CHECKING:
    from adalflow.core import ModelClient
    from adalflow.core.generator import BackwardEngine
from adalflow.core.types import GeneratorOutput
from adalflow.optim.parameter import (
    Parameter,
    OutputParameter,
)
from adalflow.optim.gradient import GradientContext, Gradient
from adalflow.optim.types import ParameterType

from adalflow.core.prompt_builder import Prompt
from adalflow.eval.base import BaseEvaluator
from adalflow.optim.text_grad.backend_engine_prompt import (
    LOSS_CONVERSATION_TEMPLATE_STRING,
    LOSS_CONVERSATION_START_INSTRUCTION_STRING_FN,
    OBJECTIVE_INSTRUCTION_BASE,
    OBJECTIVE_INSTRUCTION_CHAIN,  # often not used
)

# from adalflow.utils import printc


log = logging.getLogger(__name__)


[docs] class EvalFnToTextLoss(LossComponent): __doc__ = """Convert an evaluation function to a text loss. LossComponent will take an eval function and output a score (usually a float in range [0, 1], and the higher the better, unlike the loss function in model training). In math: score/loss = eval_fn(y_pred, y_gt) The gradident/feedback = d(score)/d(y_pred) will be computed using a backward engine. Gradient_context = GradientContext( context=conversation_str, response_desc=response.role_desc, variable_desc=role_desc, ) Args: eval_fn: The evaluation function that takes a pair of y and y_gt and returns a score. eval_fn_desc: Description of the evaluation function. backward_engine: The backward engine to use for the text prompt optimization. model_client: The model client to use for the backward engine if backward_engine is not provided. model_kwargs: The model kwargs to use for the backward engine if backward_engine is not provided. """ def __init__( self, eval_fn: Union[Callable, BaseEvaluator], eval_fn_desc: str, backward_engine: Optional["BackwardEngine"] = None, model_client: "ModelClient" = None, model_kwargs: Dict[str, object] = None, ): from adalflow.core.generator import BackwardEngine super().__init__() self.eval_fn = eval_fn self.eval_fn_desc = eval_fn_desc self.name = f"{self.__class__.__name__}" self.backward_engine = None if backward_engine is None: log.info( "EvalFnToTextLoss: No backward engine provided. Creating one using model_client and model_kwargs." ) if model_client and model_kwargs: self.set_backward_engine(backward_engine, model_client, model_kwargs) else: if not isinstance(backward_engine, BackwardEngine): raise TypeError( "EvalFnToTextLoss: backward_engine must be an instance of BackwardEngine." ) self.backward_engine = backward_engine
[docs] def forward( self, kwargs: Dict[str, Parameter], response_desc: str = None, metadata: Dict[str, str] = None, # additional notes on the input kwargs id: str = None, gt: object = None, input: Dict[str, object] = None, ) -> Parameter: r""" Args: kwargs: The inputs to the eval_fn. response_desc: Description of the output. metadata: Additional notes on the input kwargs. id: The unique identifier for the data point. gt: The ground truth for the evaluation function. """ if response_desc is None: response_desc = "Output of EvalFnToTextLoss." # validate the type of kwargs predesessors = [] for k, v in kwargs.items(): if not isinstance(v, Parameter): raise TypeError( f"EvalFnToTextLoss: All inputs must be Parameters. Got {type(v)} for {k}." ) if isinstance(v, Parameter): predesessors.append(v) eval_inputs = {} for k, v in kwargs.items(): eval_inputs[k] = v.eval_input score: float = self.eval_fn(**eval_inputs) eval_param: Parameter = OutputParameter( name=self.name + "_output", data=score, requires_opt=True, role_desc=response_desc, score=score, param_type=ParameterType.LOSS_OUTPUT, data_id=id, ) eval_param.set_gt(gt) eval_param.set_predecessors(predesessors) eval_param.trace_forward_pass( input_args=kwargs, full_response=score, id=self.id, name=self.name, ) log.info(f"EvalFnToTextLoss: Input: {kwargs}, Output: {eval_param}") # extract ground truth from eval_inputs, anything eval_param.set_grad_fn( BackwardContext( backward_fn=self.backward, backward_engine=self.backward_engine, response=eval_param, eval_fn_desc=self.eval_fn_desc, kwargs=kwargs, metadata=metadata, ground_truth=gt, input=input, disable_backward_engine=self._disable_backward_engine, ) ) return eval_param
[docs] def set_backward_engine( self, backward_engine: "BackwardEngine" = None, model_client: "ModelClient" = None, model_kwargs: Dict[str, object] = None, ): from adalflow.core.generator import BackwardEngine self.backward_engine = backward_engine if not backward_engine: log.info( "EvalFnToTextLoss: No backward engine provided. Creating one using model_client and model_kwargs." ) self.backward_engine = BackwardEngine(model_client, model_kwargs) else: if type(backward_engine) is not BackwardEngine: raise TypeError( f"EvalFnToTextLoss: backward_engine must be an instance of BackwardEngine. Got {type(backward_engine)}." )
@staticmethod def _backward_through_one_predecessor( pred: Parameter, kwargs: Dict[str, Parameter], response: Parameter, eval_fn_desc: str, backward_engine: "BackwardEngine", ground_truth: object = None, is_intermediate_node: bool = False, # if the node is an intermediate node in the backpropagation chain metadata: Dict[str, str] = None, input: Dict[str, object] = None, # system input disable_backward_engine: bool = False, ): if not pred.requires_opt: if response.score is not None: pred.set_score(response.score) log.debug( f"EvalFnToTextLoss: Skipping {pred} as it does not require optimization." ) return log.debug( f"EvalFnToTextLoss: Backward through {pred}, is_intermediate_node: {is_intermediate_node}" ) if pred.check_if_already_computed_gradient_respect_to(response.id): log.info( f"EvalFnToTextLoss: Gradient already computed for {pred.role_desc} with respect to {response.role_desc}" ) return if backward_engine is None: log.error( "EvalFnToTextLoss: backward_engine is required for text prompt optimization." ) raise ValueError( "EvalFnToTextLoss: backward_engine is required for text prompt optimization." ) instruction_str, objective_str = None, None # convert kwargs to key, (value, type(eval_input)) inputs = {} for k, v in kwargs.items(): inputs[k] = (v.get_param_info(), str(type(v.eval_input))) # response information conversation_str = Prompt( LOSS_CONVERSATION_TEMPLATE_STRING, prompt_kwargs={ "system_question": input, "inputs": inputs, "eval_fn_desc": eval_fn_desc, "response_value": response.get_prompt_data(), "metadata": json.dumps(metadata) if metadata else None, }, )() conv_ins_template = LOSS_CONVERSATION_START_INSTRUCTION_STRING_FN obj_ins_template = OBJECTIVE_INSTRUCTION_BASE if is_intermediate_node: obj_ins_template = OBJECTIVE_INSTRUCTION_CHAIN instruction_str = Prompt( conv_ins_template, prompt_kwargs={ "variable": pred.get_param_info(), "conversation_str": conversation_str, }, )() objective_str = Prompt( obj_ins_template, prompt_kwargs={ "response_name": response.name, "response_desc": response.role_desc, "response_gradient": response.data, }, )() log.info(f"EvalFnToTextLoss: Instruction: {instruction_str}") log.info(f"EvalFnToTextLoss: Objective: {objective_str}") log.info(f"EvalFnToTextLoss: Conversation: {conversation_str}") # Compute the gradient backward_engine_prompt_kwargs = { "conversation_sec": instruction_str, "objective_instruction_sec": objective_str, # "evaluate_variable_instruction_sec": eval_str, } gradient_value_data = None gradient_prompt = None if not disable_backward_engine: gradient_value: GeneratorOutput = backward_engine( prompt_kwargs=backward_engine_prompt_kwargs ) gradient_prompt = backward_engine.get_prompt( **backward_engine_prompt_kwargs ) # print(f"Backward engine prompt: {gradient_prompt}") gradient_value_data = ( gradient_value.data or backward_engine.failure_message_to_optimizer( gradient_response=gradient_value ) ) gradient_value_data = ( f"expected answer: {ground_truth},\n Feedback: {gradient_value_data}" ) # print(f"gradient_value_data: {gradient_value_data}") log.debug(f"EvalFnToTextLoss: Gradient for {pred}: {gradient_value_data}") # score should be passed to grad gradient_param = Gradient( data=gradient_value_data, data_id=response.data_id, score=response.data, from_response=response, to_pred=pred, ) gradient_param.add_prompt(gradient_prompt) gradient_param.add_context( GradientContext( input_output=conversation_str, response_desc=response.role_desc, variable_desc=pred.role_desc, # input=input, # ground_truth=ground_truth, ) ) pred.add_gradient(gradient_param) # backward the end to end score # TODO: not really useful if response.score is not None: pred.set_score(response.score) pred.set_gt(ground_truth) log.debug(f"pred: {pred.eval_input}, gt: {ground_truth}")
[docs] def backward( self, response: Parameter, eval_fn_desc: str, kwargs: Dict[str, Parameter], ground_truth: object = None, backward_engine: Optional[ "BackwardEngine" ] = None, # only needed for text prompt optimization metadata: Dict[str, str] = None, input: Dict[str, object] = None, disable_backward_engine: bool = False, ): r"""Ensure to set backward_engine for the text prompt optimization. It can be None if you are only doing demo optimization and it will not have gradients but simply backpropagate the score. """ log.info(f"EvalFnToTextLoss: Backward: {response}") children_params = response.predecessors is_intermediate_node = False response_gradient_context = response.get_gradient_and_context_text().strip() if response_gradient_context != "": log.info("EvalFnToTextLoss is an intermediate node.") is_intermediate_node = True log.info(f"response_gradient_context: {response_gradient_context}") # go through all child parameters if backward_engine: for pred in children_params: if not pred.requires_opt: log.debug( f"EvalFnToTextLoss: Skipping {pred} as it does not require optimization." ) continue self._backward_through_one_predecessor( pred, kwargs, response, eval_fn_desc, backward_engine, ground_truth=ground_truth, is_intermediate_node=is_intermediate_node, metadata=metadata, input=input, disable_backward_engine=disable_backward_engine, ) # else: # recursively disable backward for all children # for pred in children_params: # pred.backward_engine_disabled = True # backward for the score for the demo for pred in children_params: if not (isinstance(response.data, float) or isinstance(response.data, int)): raise TypeError( f"EvalFnToTextLoss: response.data must be a float. Got {type(response.data)}." ) pred.score = response.data log.debug( f"EvalFnToTextLoss: {pred.name} set_score: {response.data}, {response.name}", ) log.info(f"setting pred name {pred.name} score to {response.data}")
if __name__ == "__main__": # Example of using EvalFnToTextLoss from adalflow.utils import setup_env, get_logger from adalflow.eval.answer_match_acc import AnswerMatchAcc from adalflow.components.model_client import OpenAIClient # dir: model_client from adalflow.core.generator import Generator, BackwardEngine from adalflow.core.component import func_to_data_component logger = get_logger(level="DEBUG", filename="lib_text_grad.log") setup_env() gpt_3_model = { "model_client": OpenAIClient(), "model_kwargs": { "model": "gpt-3.5-turbo", }, } gpt_4o_model = { "model_client": OpenAIClient(), "model_kwargs": { "model": "gpt-4o", }, } @func_to_data_component def parse_integer_answer(answer: str, only_first_line: bool = False): try: if only_first_line: answer = answer.strip().split("\n")[0] answer = answer.strip() # find the last token that has a number in it answer = [ token for token in answer.split() if any(c.isdigit() for c in token) ][-1] answer = answer.split(".")[0] answer = "".join([c for c in answer if c.isdigit()]) answer = int(answer) except (ValueError, IndexError): answer = 0 return answer evaluator = AnswerMatchAcc() eval_fn_desc = "Answer Match Accuracy" single_compute_eval = AnswerMatchAcc().compute_single_item backward_engine = BackwardEngine(**gpt_4o_model) # backward_engine.set_mock_output(mock_output_data="1") eval_fn_to_text_loss = EvalFnToTextLoss( eval_fn=single_compute_eval, eval_fn_desc=eval_fn_desc, backward_engine=backward_engine, ) x = Parameter( name="x", data="I have a cauliflower, a stalk of celery, a cabbage, and a garlic. How many vegetables do I have?", requires_opt=False, role_desc="The question to the language model", ) system_prompt = Parameter( name="system_prompt", data="You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.", requires_opt=True, role_desc="structured system prompt to a somewhat capable language model that specifies the behavior and strategies for the QA task", ) model = Generator( prompt_kwargs={"task_desc_str": system_prompt}, **gpt_3_model, output_processors=parse_integer_answer, ) # model.set_mock_output(mock_output_data="4") model.train() y: Parameter = model(prompt_kwargs={"input_str": x}) loss = eval_fn_to_text_loss( { "y": y, "y_gt": Parameter( name="y_gt", data="4", requires_opt=False, role_desc="Correct answer", ), } ) loss.backward() assert len(loss.predecessors) == 2 assert len(y.predecessors) == 2 dot = loss.draw_graph(add_grads=True, filepath="real_data") # print("dot: ", dot) # Variable(data=1, requires_opt=True, role_desc=Output of the string-based function with purpose: # The runtime of string-based function that checks if the prediction is correct., # predecessors={Variable(data=4, requires_opt=False, role_desc=correct answer for the query, # predecessors=set(), gradients=set()), # Variable(data=To determine the number of vegetables, # we need to count each individual item. # The cauliflower, celery, cabbage, # and garlic are all separate vegetables. # Therefore, you have 4 vegetables in total. # Answer: 4, requires_opt=True, role_desc=response from the language model, # predecessors={Variable(data=I have a cauliflower, a stalk of celery, a cabbage, and a garlic. How many vegetables do I have?, requires_opt=False, role_desc=query to the language model, predecessors=set(), gradients=set()), Variable(data=You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value., requires_opt=True, role_desc=structured system prompt to a somewhat capable language model that specifies the behavior and strategies for the QA task, predecessors=set(), gradients=set())}, gradients=set())}, gradients=set()) # loss: Parameter(alias=None, data=1.0, requires_opt=True, role_desc=Output of EvalFnToTextLoss with eval_fn_desc: Answer Match Accuracy, # predecessors={Parameter(alias=None, data=1, requires_opt=False, role_desc=Predicted answer, predecessors=set(), gradients=set()), # Parameter(alias=None, data=1, requires_opt=False, role_desc=Correct answer, predecessors=set(), gradients=set())}, gradients=set()) # {'alias': None, 'data': 1.0, 'requires_opt': True, 'role_desc': 'Output of EvalFnToTextLoss with eval_fn_desc: Answer Match Accuracy', 'predecessors': [{'alias': None, 'data': '1', 'requires_opt': False, 'role_desc': 'Predicted answer', 'predecessors': [], 'gradients': [], 'proposed_data': None, 'gradients_context': [], 'grad_fn': 'None'}, {'alias': None, 'data': '1', 'requires_opt': False, 'role_desc': 'Correct answer', 'predecessors': [], 'gradients': [], 'proposed_data': None, 'gradients_context': [], 'grad_fn': 'None'}], 'gradients': [], 'proposed_data': None, 'gradients_context': [], 'grad_fn': '__main__.EvalFnToTextLoss.backward'}