Skip to content

Commit a55ca46

Browse files
committed
Add READMEs to ablations
1 parent beb66dd commit a55ca46

15 files changed

+73
-22
lines changed

codeclash/agents/minisweagent.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@
1616
from codeclash.utils.environment import copy_to_container
1717

1818
os.environ["MSWEA_MODEL_RETRY_STOP_AFTER_ATTEMPT"] = "90"
19-
os.environ["LITELLM_MODEL_REGISTRY_PATH"] = str((REPO_DIR / "configs" / "litellm_custom_model_config.yaml").resolve())
19+
os.environ["LITELLM_MODEL_REGISTRY_PATH"] = str(
20+
(REPO_DIR / "configs" / "mini" / "litellm_custom_model_config.yaml").resolve()
21+
)
2022

2123

2224
class ClashAgent(DefaultAgent):

codeclash/arenas/robotrumble/robotrumble.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from codeclash.arenas.arena import CodeArena, RoundStats
99
from codeclash.constants import RESULT_TIE
1010

11+
DEFAULT_SIMS = 100
1112
MAP_EXT_TO_HEADER = {
1213
"js": "function robot(state, unit) {",
1314
"py": "def robot(state, unit):",
@@ -66,15 +67,16 @@ def execute_round(self, agents: list[Player]):
6667
# Submit all simulations to the thread pool
6768
futures = [
6869
executor.submit(self._run_single_simulation, agents, idx, cmd)
69-
for idx in range(self.game_config.get("sims_per_round", 100))
70+
for idx in range(self.game_config.get("sims_per_round", DEFAULT_SIMS))
7071
]
7172

7273
# Collect results as they complete
7374
i_completed = 0
7475
for future in as_completed(futures):
7576
future.result()
7677
i_completed += 1
77-
self.logger.info(f"Completed {i_completed} of {len(futures)} simulations")
78+
if i_completed % 10 == 0:
79+
self.logger.info(f"Completed {i_completed} of {len(futures)} simulations")
7880

7981
def _get_winner_txt(self, output_file: str, agents: list[Player]) -> str:
8082
try:
@@ -114,7 +116,7 @@ def _get_winner_json(self, output_file: str, agents: list[Player]) -> str:
114116

115117
def get_results(self, agents: list[Player], round_num: int, stats: RoundStats):
116118
winners = []
117-
for idx in range(self.game_config.get("sims_per_round", 100)):
119+
for idx in range(self.game_config.get("sims_per_round", DEFAULT_SIMS)):
118120
output_file = self.log_round(round_num) / f"sim_{idx}.{self.sim_ext}"
119121
if not output_file.exists():
120122
self.logger.warning(f"Simulation {idx} not found, skipping")

configs/ablations/multi/README.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Multi-player competitions
2+
3+
The default CodeClash tournament setting pits players head to head.
4+
5+
In these configurations, we explore how competitive dynamics change when 3+ players are competing. Specifically, we run Core War tournaments lasting 15 rounds with 6 players. Our findings can be found in the original paper in Section 4.1, specifically *Multi-agent competitions (3+ players) reflect similar rankings*.
6+
7+
To enable multi-player competitions, simply add more players under the `players` field in your configuration, such as:
8+
9+
```yaml
10+
players:
11+
- agent: mini
12+
...
13+
- agent: mini
14+
...
15+
- agent: mini
16+
...
17+
```
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Transparent Codebases
2+
3+
These set of configurations correspond to Section 4.1 of the original paper, specifically *Models have limited capacity for opponent analysis even with transparent codebases.*.
4+
5+
Under normal CodeClash circumstances, models' codebases are not made available to one another. One of CodeClash's challenges is to see whether models are capable of discerning opponent behavior via logs.
6+
7+
In this ablation, we explore lifting this restriction. Each round, in addition to the competition logs, opponents' codebases are also made available to each player. All that's required to enable this feature is to set:
8+
9+
```yaml
10+
tournament:
11+
...
12+
transparent: true
13+
``
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# vs. Human
2+
3+
These set of configurations correspond to Section 4.1 of the original paper, specifically the subsection *On RobotRumble, models trail substantially behind expert human programmers*.
4+
5+
Each configuration pits a model against an open source codebase written by a human expert for a particular arena. Across a tournament spanning 15 rounds, the model is allowed the evolve the codebase as it sees fit to beat the human expert's solution. The human's solution is *not* changing for the duration of the tournament.
6+
7+
To make models compete against static human solutions, do the following two steps.
8+
9+
1. Make sure the human solution is working and pushed as a branch to the corresponding arena. E.g. [gigachad](https://github.com/CodeClash-ai/RobotRumble/tree/human/entropicdrifter/gigachad) for RobotRumble.
10+
2. Then, in your configuration, simply specify one of the players as a `dummy` agent, with `branch_init` set to the branch name, such as:
11+
12+
```yaml
13+
players:
14+
- agent: dummy
15+
branch_init: human/entropicdrifter/gigachad
16+
name: gigachad
17+
```

configs/ablations/vs_human/RobotRumble__claude-sonnet-4-20250514__vs__seven-of-nine__r15__s250.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ game:
88
players:
99
- agent: mini
1010
name: claude-sonnet-4-20250514
11-
branch_init: python
11+
branch_init: starter/python
1212
config:
1313
agent: !include mini/default.yaml
1414
model:
@@ -19,7 +19,7 @@ players:
1919
max_tokens: 4096
2020
- agent: dummy
2121
name: seven-of-nine
22-
branch_init: human/seven-of-nine
22+
branch_init: human/entropicdrifter/seven-of-nine
2323
prompts:
2424
game_description: |
2525
You are a software developer ({{player_id}}) competing in a coding game called RobotRumble.

configs/ablations/vs_human/RobotRumble_gigachad.yaml renamed to configs/ablations/vs_human/RobotRumble__claude-sonnet-4-5-20250929__vs__gigachad__r15__s250.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ game:
66
players:
77
- agent: mini
88
name: claude-sonnet-4-5-20250929
9-
branch_init: python
9+
branch_init: starter/python
1010
config:
1111
agent: !include mini/default.yaml
1212
model:
@@ -17,7 +17,7 @@ players:
1717
max_tokens: 4096
1818
- agent: dummy
1919
name: gigachad
20-
branch_init: human/gigachad
20+
branch_init: human/entropicdrifter/gigachad
2121
prompts:
2222
game_description: |
2323
You are a software developer ({{player_id}}) competing in a coding game called RobotRumble.

configs/ablations/vs_human/RobotRumble__claude-sonnet-4-5-20250929__vs__seven-of-nine__r15__s250.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ game:
88
players:
99
- agent: mini
1010
name: claude-sonnet-4-5-20250929
11-
branch_init: python
11+
branch_init: starter/python
1212
config:
1313
agent: !include mini/default.yaml
1414
model:
@@ -19,7 +19,7 @@ players:
1919
max_tokens: 4096
2020
- agent: dummy
2121
name: seven-of-nine
22-
branch_init: human/seven-of-nine
22+
branch_init: human/entropicdrifter/seven-of-nine
2323
prompts:
2424
game_description: |
2525
You are a software developer ({{player_id}}) competing in a coding game called RobotRumble.

configs/ablations/vs_human/RobotRumble__gemini-2.5-pro__vs__seven-of-nine__r15__s250.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ game:
88
players:
99
- agent: mini
1010
name: gemini-2.5-pro
11-
branch_init: python
11+
branch_init: starter/python
1212
config:
1313
agent: !include mini/default.yaml
1414
model:
@@ -18,7 +18,7 @@ players:
1818
temperature: 0.2
1919
- agent: dummy
2020
name: seven-of-nine
21-
branch_init: human/seven-of-nine
21+
branch_init: human/entropicdrifter/seven-of-nine
2222
prompts:
2323
game_description: |
2424
You are a software developer ({{player_id}}) competing in a coding game called RobotRumble.

configs/ablations/vs_human/RobotRumble__gpt-5-mini__vs__seven-of-nine__r15__s250.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,15 @@ game:
88
players:
99
- agent: mini
1010
name: gpt-5-mini
11-
branch_init: python
11+
branch_init: starter/python
1212
config:
1313
agent: !include mini/default.yaml
1414
model:
1515
model_name: '@openai/gpt-5-mini'
1616
model_class: portkey
1717
- agent: dummy
1818
name: seven-of-nine
19-
branch_init: human/seven-of-nine
19+
branch_init: human/entropicdrifter/seven-of-nine
2020
prompts:
2121
game_description: |
2222
You are a software developer ({{player_id}}) competing in a coding game called RobotRumble.

0 commit comments

Comments
 (0)