Skip to content

Commit 6c8dff8

Browse files
committed
merge error
1 parent b73b5aa commit 6c8dff8

File tree

1 file changed

+0
-23
lines changed

1 file changed

+0
-23
lines changed

eval_protocol/pytest/evaluation_test.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -304,13 +304,6 @@ def wrapper_body(**kwargs):
304304

305305
cohort_id = generate_id()
306306

307-
def _log_eval_error(
308-
status: Literal["finished", "error"], rows: Optional[List[EvaluationRow]] | None, passed: bool
309-
) -> None:
310-
log_eval_status_and_rows(eval_metadata, rows, status, passed, default_logger)
311-
312-
cohort_id = generate_id()
313-
314307
def _log_eval_error(
315308
status: Literal["finished", "error"], rows: Optional[List[EvaluationRow]] | None, passed: bool
316309
) -> None:
@@ -470,25 +463,9 @@ def _log_eval_error(
470463
sum([r.evaluation_result.score for r in result if r.evaluation_result]) / len(result)
471464
for result in all_results
472465
]
473-
print(f"SCORES: {scores}")
474466
agg_score = aggregate(scores, aggregation_method)
475467
score_std = statistics.stdev(scores) if len(scores) > 1 else 0.0
476468

477-
# Compute 95% confidence interval for the fixed-set mean μ (by-question, using repeats)
478-
ci_low: float | None = None
479-
ci_high: float | None = None
480-
if aggregation_method == "mean":
481-
try:
482-
result_ci = compute_fixed_set_mu_ci([item for sublist in all_results for item in sublist])
483-
mu_ci_low, mu_ci_high = result_ci[1], result_ci[2]
484-
if mu_ci_low is not None and mu_ci_high is not None:
485-
ci_low = float(mu_ci_low)
486-
ci_high = float(mu_ci_high)
487-
# Keep agg_score as-is (mean over scores). For equal repeats per question these match.
488-
except Exception:
489-
ci_low = None
490-
ci_high = None
491-
492469
# Compute 95% confidence interval for the fixed-set mean μ (by-question, using repeats)
493470
ci_low: float | None = None
494471
ci_high: float | None = None

0 commit comments

Comments
 (0)