Source code for optim.few_shot.bootstrap_optimizer

"""Adapted and optimized boostrap fewshot optimizer:

DSPy: Com-piling declarative language model calls into state-of-the-art pipelines."""

from typing import List, Optional, Dict
import logging

from adalflow.core.base_data_class import DataClass

from adalflow.optim.parameter import Parameter

from adalflow.core.functional import random_sample
from adalflow.optim.optimizer import DemoOptimizer
from adalflow.optim.types import ParameterType
from adalflow.utils import printc

log = logging.getLogger(__name__)

[docs] class BootstrapFewShot(DemoOptimizer): __doc__ = r"""BootstrapFewShot performs few-shot sampling used in few-shot ICL. It will be used to optimize paramters of demos. Based on research from AdalFlow team and DsPy library. Compared with Dspy's version: 1. we added weighted sampling for both the raw and augmented demos to prioritize failed demos but successful in augmented demos based on the evaluation score while we backpropagate the demo samples. 2. In default, we exclude the input fields from the augmented demos. Our reserch finds that using the reasoning demostrations from teacher model can be more effective in some cases than taking both inputs and output samples and be more token efficient. Reference: - DsPy: Com-piling declarative language model calls into state-of-the-art pipelines. """ exclude_input_fields_from_bootstrap_demos: bool def __init__( self, params: List[Parameter], raw_shots: Optional[int] = None, bootstrap_shots: Optional[int] = None, dataset: Optional[List[DataClass]] = None, weighted: bool = True, exclude_input_fields_from_bootstrap_demos: bool = False, ): super().__init__(weighted=weighted, dataset=dataset) self.params = [ param for param in params if param.requires_opt and param.param_type == ParameterType.DEMOS ]"BootstrapFewShot: {[ for p in self.params]}") print(f"BootstrapFewShot: {[ for p in self.params]}") self._raw_shots = raw_shots self._bootstrap_shots = bootstrap_shots self.proposing = False self._teacher_scores: Dict[str, float] = {} # data id to score self._student_scores: Dict[str, float] = {} # data id to score self.exclude_input_fields_from_bootstrap_demos = ( exclude_input_fields_from_bootstrap_demos ) # TODO: use the scores from the backward engine (optionally) on the demo parameters # needs to make a decision on which # this score does not make sense for multiple demo parameters
[docs] def add_scores(self, ids: List[str], scores: List[float], is_teacher: bool = True): r"""Add scores for each demo via _teacher_scores or _student_scores.""" if len(ids) != len(scores): raise ValueError( f"ids and scores must have the same length, got ids: {ids}, scores: {scores}" ) for score in scores: if not isinstance(score, float): raise ValueError( f"score must be a float, got {type(score)}, score: {score}" ) if score < 0 or score > 1: raise ValueError("score must be in range [0, 1]") target = self._teacher_scores if is_teacher else self._student_scores for i, id in enumerate(ids): target[id] = scores[i]
[docs] def config_shots(self, raw_shots: int, bootstrap_shots: int): self._raw_shots = raw_shots self._bootstrap_shots = bootstrap_shots
[docs] def config_dataset(self, dataset: List[DataClass]): self.dataset = dataset
def _pre_check(self): if not self.dataset: raise ValueError("dataset must be provided") if self._raw_shots is None or self._bootstrap_shots is None: raise ValueError("raw_shots and bootstrap_shots must be provided") @property def num_shots(self) -> int: return ( self._raw_shots + self._bootstrap_shots if self._raw_shots and self._bootstrap_shots else 0 )
[docs] def sample( self, augmented_demos: Dict[str, DataClass], demos: Dict[str, DataClass], dataset: List[DataClass], raw_shots: int, bootstrap_shots: int, weighted: bool = True, ): r"""Performs weighted sampling, ensure the score is in range [0, 1]. The higher score means better accuracy.""" # 1. sample from augmented demos (from teacher) # set weights to be score # add 1 to all score to avoid negative weights augmented_options = list(augmented_demos.values()) # get the teacher scores length and the augmented demos length len_teacher_scores = len(self._teacher_scores) len_augmented_options = len(augmented_options) print(f"len_teacher_scores: {len_teacher_scores}") print(f"len_augmented_options: {len_augmented_options}") weights = None if weighted: weights: List[float] = [] for demo in augmented_options: demo_score = self._teacher_scores.get(, None) if demo_score is None: raise ValueError( f"score must be provided for each demo, id: {}, all scores: {self._teacher_scores}" ) if demo_score < 0 or demo_score > 1: raise ValueError(f"score must be in range [0, 1], got {demo_score}") w = demo_score student_demo_score = self._student_scores.get(, None) if student_demo_score is not None: if student_demo_score < 0 or student_demo_score > 1: raise ValueError( f"score must be in range [0, 1], got {student_demo_score}" ) # if in demos and demos[].score is not None: w = ( w - student_demo_score # w - demos[].score ) # assign higher weights to failed demos but successful in augmented if w < 0: w = 0 weights.append(w) # print(f"augs: {augmented_options}") sampled_augmented_demos = ( random_sample( augmented_options, bootstrap_shots, replace=False, weights=weights ) if len(augmented_options) > 0 else [] ) # 2. sample from raw demos # exclude the sampled augmented demos # TODO: ensure all data points has unique ids filtered_dataset = list( filter( lambda x: not in set([ for demo in sampled_augmented_demos]), dataset, ) ) if len(filtered_dataset) == 0: # If no demos left we will get raw_weights [], sum to 0 return sampled_augmented_demos, [] # assigne weights 0 to all options raw_weights = None if weighted: raw_weights = [0.0] * len(filtered_dataset) # for those exist in the demos, assign higher score with failed demos for i, demo in enumerate(filtered_dataset): student_demo_score = self._student_scores.get(, None) if student_demo_score is not None: # ensure the score is in range [0, 1] if student_demo_score < 0 or student_demo_score > 1: raise ValueError( f"score must be in range [0, 1], got {student_demo_score}" ) raw_weights[i] = 1 - student_demo_score sampled_raw_demos = random_sample( filtered_dataset, raw_shots, replace=False, weights=raw_weights ) return sampled_augmented_demos, sampled_raw_demos
[docs] @staticmethod def samples_to_str( samples: List[DataClass], augmented: bool = False, exclude_inputs: bool = False ) -> str: sample_strs = [] for sample in samples: try: # process the input fields if augmented: exclude_fields = ["id", "score"] if exclude_inputs: exclude_fields.extend(sample.get_input_fields()) yaml_str = sample.to_yaml(exclude=exclude_fields) else: yaml_str = sample.to_yaml( include=sample.get_input_fields() + sample.get_output_fields() ) printc(f"yaml_str: {yaml_str}") sample_strs.append(yaml_str + "\n") except Exception as e: print(f"Error: {e} to yaml for {sample}") sample_strs.append(str(sample)) return "\n".join(sample_strs)
[docs] def propose(self): r"""Proposing a value while keeping previous value saved on parameter.""" self._pre_check() if self.proposing: raise ValueError("Already proposing a value.") for demo_param in self.params: if demo_param.requires_opt: augmented_demos = demo_param._traces demos = demo_param._student_traces if len(augmented_demos) != len(demos): log.warning( f"augmented and raw demos must have the same length, got {len(augmented_demos)} and {len(demos)} \n {augmented_demos} \n and student demos {demos}" ) try: sampled_augmented_demos, sampled_raw_demos = self.sample( augmented_demos=augmented_demos, demos=demos, dataset=self.dataset, raw_shots=self._raw_shots, bootstrap_shots=self._bootstrap_shots, weighted=self._weighted, ) print( f"sampled_augmented_demos: {[ for demo in sampled_augmented_demos]}" ) samples = sampled_augmented_demos + sampled_raw_demos demo_str = "" if len(sampled_augmented_demos) > 0: demo_str = self.samples_to_str( samples=sampled_augmented_demos, augmented=True, exclude_inputs=self.exclude_input_fields_from_bootstrap_demos, ) if len(sampled_raw_demos) > 0: demo_str += "\n" + self.samples_to_str( samples=sampled_raw_demos, augmented=False ) demo_str = demo_str.strip() demo_param.propose_data(demo_str, samples) except Exception as e: print(f"Error: {e} for {}") raise e self.proposing = True
[docs] def revert(self): """Revert to the previous value when the evaluation is worse.""" self._pre_check() if not self.proposing: raise ValueError("Not proposing a value.") for param in self.params: param.revert_data(include_demos=True) self.proposing = False
[docs] def step(self): """Discard the previous value and keep the proposed value.""" self._pre_check() if not self.proposing: raise ValueError("Not proposing a value.") for param in self.params: param.step_data(include_demos=True) # TODO: track all past history self.proposing = False
def __str__(self) -> str: s = f"BootstrapFewShot(raw_shots={self._raw_shots}, bootstrap_shots={self._bootstrap_shots}, \ params={[ for p in self.params]}, dataset={len(self.dataset) if self.dataset else 0})" return s def __repr__(self) -> str: return self.__str__()