[docs]deff1_score(y:str,y_gt:str)->float:ifnotisinstance(y,str)ornotisinstance(y_gt,str):raiseValueError(f"y: {y},{type(y)}, y_gt: {y_gt},{type(y_gt)} must be string.")prediction_tokens=normalize_answer(y).split()ground_truth_tokens=normalize_answer(y_gt).split()common=Counter(prediction_tokens)&Counter(ground_truth_tokens)num_same=sum(common.values())iflen(prediction_tokens)==len(ground_truth_tokens)==0:# Unlike most tasks, QReCC and SQuAD-2.0 assign 1.0 in this edge case. We don't for uniformity.print("\n#> F1 Metric: Rare edge case of len(prediction_tokens) == len(ground_truth_tokens) == 0.\n")ifnum_same==0:return0precision=1.0*num_same/len(prediction_tokens)recall=1.0*num_same/len(ground_truth_tokens)f1=(2*precision*recall)/(precision+recall)returnf1