Skip to content
This repository was archived by the owner on Jul 18, 2025. It is now read-only.

Commit 43c26ea

Browse files
add support for partial credit (#70)
1 parent 2c0322b commit 43c26ea

File tree

1 file changed

+19
-6
lines changed

1 file changed

+19
-6
lines changed

swelancer.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,9 @@ async def grade(self, computer: ComputerInterface) -> SWELancerGrade:
263263
"pytest_logs": pytest_outputs,
264264
"variant": self.variant,
265265
"model_patch": model_patch.decode("utf-8"),
266+
267+
# You can compute this using percentage of tests passed, how far along the e2e test got, etc.
268+
"partial_credit": correct
266269
}
267270
),
268271
patch_path="",
@@ -411,6 +414,9 @@ async def get_summary(self, results: list[tuple[ComputerTask, FinalResult]]) ->
411414
"swe_manager_hard": {"correct": 0, "total": 0},
412415
}
413416

417+
partial_credit_sum = 0.0
418+
partial_credit_count = 0
419+
414420
for task, result in results:
415421
try:
416422
grader_log = json.loads(result.grade.grader_log)
@@ -438,19 +444,24 @@ async def get_summary(self, results: list[tuple[ComputerTask, FinalResult]]) ->
438444
hard_variant = f"{variant}_hard"
439445
if hard_variant in original_summary["metadata_variants"]["accuracy"]:
440446
# Update earnings and available for hard variants
441-
original_summary["metadata_variants"]["earnings"][hard_variant] += (
442-
earned
443-
)
444-
original_summary["metadata_variants"]["available"][hard_variant] += (
445-
available
446-
)
447+
original_summary["metadata_variants"]["earnings"][hard_variant] += earned
448+
original_summary["metadata_variants"]["available"][hard_variant] += available
447449

448450
# Update accuracy counters for hard variants
449451
variant_counts[hard_variant]["total"] += 1
450452
if earned > 0:
451453
variant_counts[hard_variant]["correct"] += 1
452454

453455
original_summary["pytest_log"] = grader_log.get("pytest_log", "No logs found")
456+
457+
# Compute partial credit for each task. Use provided partial_credit if available,
458+
# otherwise derive it: if earned equals available (and available > 0), then full credit, else 0.
459+
pc = grader_log.get("partial_credit")
460+
if pc is None:
461+
pc = 1.0 if (available > 0 and earned == available) else 0.0
462+
partial_credit_sum += pc
463+
partial_credit_count += 1
464+
454465
except Exception as e:
455466
print(str(e))
456467

@@ -461,6 +472,8 @@ async def get_summary(self, results: list[tuple[ComputerTask, FinalResult]]) ->
461472
if total > 0:
462473
original_summary["metadata_variants"]["accuracy"][variant] = correct / total
463474

475+
original_summary["average_partial_credit"] = partial_credit_sum / partial_credit_count if partial_credit_count else 0.0
476+
464477
return original_summary
465478

466479
except Exception as e:

0 commit comments

Comments
 (0)