Add READMEs to ablations

john-b-yang · john-b-yang · commit a55ca46278b0 · 2025-12-11T17:17:23.000Z
diff --git a/codeclash/agents/minisweagent.py b/codeclash/agents/minisweagent.py
@@ -16,7 +16,9 @@
 from codeclash.utils.environment import copy_to_container
 
 os.environ["MSWEA_MODEL_RETRY_STOP_AFTER_ATTEMPT"] = "90"
-os.environ["LITELLM_MODEL_REGISTRY_PATH"] = str((REPO_DIR / "configs" / "litellm_custom_model_config.yaml").resolve())
+os.environ["LITELLM_MODEL_REGISTRY_PATH"] = str(
+    (REPO_DIR / "configs" / "mini" / "litellm_custom_model_config.yaml").resolve()
+)
 
 
 class ClashAgent(DefaultAgent):
diff --git a/codeclash/arenas/robotrumble/robotrumble.py b/codeclash/arenas/robotrumble/robotrumble.py
@@ -8,6 +8,7 @@
 from codeclash.arenas.arena import CodeArena, RoundStats
 from codeclash.constants import RESULT_TIE
 
+DEFAULT_SIMS = 100
 MAP_EXT_TO_HEADER = {
     "js": "function robot(state, unit) {",
     "py": "def robot(state, unit):",
@@ -66,15 +67,16 @@ def execute_round(self, agents: list[Player]):
             # Submit all simulations to the thread pool
             futures = [
                 executor.submit(self._run_single_simulation, agents, idx, cmd)
-                for idx in range(self.game_config.get("sims_per_round", 100))
+                for idx in range(self.game_config.get("sims_per_round", DEFAULT_SIMS))
             ]
 
             # Collect results as they complete
             i_completed = 0
             for future in as_completed(futures):
                 future.result()
                 i_completed += 1
-                self.logger.info(f"Completed {i_completed} of {len(futures)} simulations")
+                if i_completed % 10 == 0:
+                    self.logger.info(f"Completed {i_completed} of {len(futures)} simulations")
 
     def _get_winner_txt(self, output_file: str, agents: list[Player]) -> str:
         try:
@@ -114,7 +116,7 @@ def _get_winner_json(self, output_file: str, agents: list[Player]) -> str:
 
     def get_results(self, agents: list[Player], round_num: int, stats: RoundStats):
         winners = []
-        for idx in range(self.game_config.get("sims_per_round", 100)):
+        for idx in range(self.game_config.get("sims_per_round", DEFAULT_SIMS)):
             output_file = self.log_round(round_num) / f"sim_{idx}.{self.sim_ext}"
             if not output_file.exists():
                 self.logger.warning(f"Simulation {idx} not found, skipping")
diff --git a/configs/ablations/multi/README.md b/configs/ablations/multi/README.md
@@ -0,0 +1,17 @@
+# Multi-player competitions
+
+The default CodeClash tournament setting pits players head to head.
+
+In these configurations, we explore how competitive dynamics change when 3+ players are competing. Specifically, we run Core War tournaments lasting 15 rounds with 6 players. Our findings can be found in the original paper in Section 4.1, specifically *Multi-agent competitions (3+ players) reflect similar rankings*.
+
+To enable multi-player competitions, simply add more players under the `players` field in your configuration, such as:
+
+```yaml
+players:
+- agent: mini
+  ...
+- agent: mini
+  ...
+- agent: mini
+  ...
+```
diff --git a/configs/ablations/transparent/README.md b/configs/ablations/transparent/README.md
@@ -0,0 +1,13 @@
+# Transparent Codebases
+
+These set of configurations correspond to Section 4.1 of the original paper, specifically *Models have limited capacity for opponent analysis even with transparent codebases.*.
+
+Under normal CodeClash circumstances, models' codebases are not made available to one another. One of CodeClash's challenges is to see whether models are capable of discerning opponent behavior via logs.
+
+In this ablation, we explore lifting this restriction. Each round, in addition to the competition logs, opponents' codebases are also made available to each player. All that's required to enable this feature is to set:
+
+```yaml
+tournament:
+    ...
+    transparent: true
+``
diff --git a/configs/ablations/vs_human/README.md b/configs/ablations/vs_human/README.md
@@ -0,0 +1,17 @@
+# vs. Human
+
+These set of configurations correspond to Section 4.1 of the original paper, specifically the subsection *On RobotRumble, models trail substantially behind expert human programmers*.
+
+Each configuration pits a model against an open source codebase written by a human expert for a particular arena. Across a tournament spanning 15 rounds, the model is allowed the evolve the codebase as it sees fit to beat the human expert's solution. The human's solution is *not* changing for the duration of the tournament.
+
+To make models compete against static human solutions, do the following two steps.
+
+1. Make sure the human solution is working and pushed as a branch to the corresponding arena. E.g. [gigachad](https://github.com/CodeClash-ai/RobotRumble/tree/human/entropicdrifter/gigachad) for RobotRumble.
+2. Then, in your configuration, simply specify one of the players as a `dummy` agent, with `branch_init` set to the branch name, such as:
+
+```yaml
+players:
+- agent: dummy
+  branch_init: human/entropicdrifter/gigachad
+  name: gigachad
+```
diff --git a/configs/ablations/vs_human/RobotRumble__claude-sonnet-4-20250514__vs__seven-of-nine__r15__s250.yaml b/configs/ablations/vs_human/RobotRumble__claude-sonnet-4-20250514__vs__seven-of-nine__r15__s250.yaml
@@ -8,7 +8,7 @@ game:
 players:
 - agent: mini
   name: claude-sonnet-4-20250514
-  branch_init: python
+  branch_init: starter/python
   config:
     agent: !include mini/default.yaml
     model:
@@ -19,7 +19,7 @@ players:
         max_tokens: 4096
 - agent: dummy
   name: seven-of-nine
-  branch_init: human/seven-of-nine
+  branch_init: human/entropicdrifter/seven-of-nine
 prompts:
   game_description: |
     You are a software developer ({{player_id}}) competing in a coding game called RobotRumble.
diff --git a/configs/ablations/vs_human/RobotRumble__claude-sonnet-4-5-20250929__vs__gigachad__r15__s250.yaml b/configs/ablations/vs_human/RobotRumble__claude-sonnet-4-5-20250929__vs__gigachad__r15__s250.yaml
@@ -6,7 +6,7 @@ game:
 players:
 - agent: mini
   name: claude-sonnet-4-5-20250929
-  branch_init: python
+  branch_init: starter/python
   config:
     agent: !include mini/default.yaml
     model:
@@ -17,7 +17,7 @@ players:
         max_tokens: 4096
 - agent: dummy
   name: gigachad
-  branch_init: human/gigachad
+  branch_init: human/entropicdrifter/gigachad
 prompts:
   game_description: |
     You are a software developer ({{player_id}}) competing in a coding game called RobotRumble.
diff --git a/configs/ablations/vs_human/RobotRumble__claude-sonnet-4-5-20250929__vs__seven-of-nine__r15__s250.yaml b/configs/ablations/vs_human/RobotRumble__claude-sonnet-4-5-20250929__vs__seven-of-nine__r15__s250.yaml
@@ -8,7 +8,7 @@ game:
 players:
 - agent: mini
   name: claude-sonnet-4-5-20250929
-  branch_init: python
+  branch_init: starter/python
   config:
     agent: !include mini/default.yaml
     model:
@@ -19,7 +19,7 @@ players:
         max_tokens: 4096
 - agent: dummy
   name: seven-of-nine
-  branch_init: human/seven-of-nine
+  branch_init: human/entropicdrifter/seven-of-nine
 prompts:
   game_description: |
     You are a software developer ({{player_id}}) competing in a coding game called RobotRumble.
diff --git a/configs/ablations/vs_human/RobotRumble__gemini-2.5-pro__vs__seven-of-nine__r15__s250.yaml b/configs/ablations/vs_human/RobotRumble__gemini-2.5-pro__vs__seven-of-nine__r15__s250.yaml
@@ -8,7 +8,7 @@ game:
 players:
 - agent: mini
   name: gemini-2.5-pro
-  branch_init: python
+  branch_init: starter/python
   config:
     agent: !include mini/default.yaml
     model:
@@ -18,7 +18,7 @@ players:
         temperature: 0.2
 - agent: dummy
   name: seven-of-nine
-  branch_init: human/seven-of-nine
+  branch_init: human/entropicdrifter/seven-of-nine
 prompts:
   game_description: |
     You are a software developer ({{player_id}}) competing in a coding game called RobotRumble.
diff --git a/configs/ablations/vs_human/RobotRumble__gpt-5-mini__vs__seven-of-nine__r15__s250.yaml b/configs/ablations/vs_human/RobotRumble__gpt-5-mini__vs__seven-of-nine__r15__s250.yaml
@@ -8,15 +8,15 @@ game:
 players:
 - agent: mini
   name: gpt-5-mini
-  branch_init: python
+  branch_init: starter/python
   config:
     agent: !include mini/default.yaml
     model:
       model_name: '@openai/gpt-5-mini'
       model_class: portkey
 - agent: dummy
   name: seven-of-nine
-  branch_init: human/seven-of-nine
+  branch_init: human/entropicdrifter/seven-of-nine
 prompts:
   game_description: |
     You are a software developer ({{player_id}}) competing in a coding game called RobotRumble.
diff --git a/configs/ablations/vs_human/RobotRumble__gpt-5__vs__seven-of-nine__r15__s250.yaml b/configs/ablations/vs_human/RobotRumble__gpt-5__vs__seven-of-nine__r15__s250.yaml
@@ -8,15 +8,15 @@ game:
 players:
 - agent: mini
   name: gpt-5
-  branch_init: python
+  branch_init: starter/python
   config:
     agent: !include mini/default.yaml
     model:
       model_name: '@openai/gpt-5'
       model_class: portkey
 - agent: dummy
   name: seven-of-nine
-  branch_init: human/seven-of-nine
+  branch_init: human/entropicdrifter/seven-of-nine
 prompts:
   game_description: |
     You are a software developer ({{player_id}}) competing in a coding game called RobotRumble.
diff --git a/configs/ablations/vs_human/RobotRumble__grok-code-fast-1__vs__seven-of-nine__r15__s250.yaml b/configs/ablations/vs_human/RobotRumble__grok-code-fast-1__vs__seven-of-nine__r15__s250.yaml
@@ -8,7 +8,7 @@ game:
 players:
 - agent: mini
   name: grok-code-fast-1
-  branch_init: python
+  branch_init: starter/python
   config:
     agent: !include mini/default.yaml
     model:
@@ -19,7 +19,7 @@ players:
         temperature: 0.2
 - agent: dummy
   name: seven-of-nine
-  branch_init: human/seven-of-nine
+  branch_init: human/entropicdrifter/seven-of-nine
 prompts:
   game_description: |
     You are a software developer ({{player_id}}) competing in a coding game called RobotRumble.
diff --git a/configs/ablations/vs_human/RobotRumble__o3__vs__seven-of-nine__r15__s250.yaml b/configs/ablations/vs_human/RobotRumble__o3__vs__seven-of-nine__r15__s250.yaml
@@ -8,15 +8,15 @@ game:
 players:
 - agent: mini
   name: o3
-  branch_init: python
+  branch_init: starter/python
   config:
     agent: !include mini/default.yaml
     model:
       model_name: '@openai/o3'
       model_class: portkey
 - agent: dummy
   name: seven-of-nine
-  branch_init: human/seven-of-nine
+  branch_init: human/entropicdrifter/seven-of-nine
 prompts:
   game_description: |
     You are a software developer ({{player_id}}) competing in a coding game called RobotRumble.
diff --git a/configs/ablations/vs_human/RobotRumble__qwen3-coder-plus-2025-09-23__vs__seven-of-nine__r15__s250.yaml b/configs/ablations/vs_human/RobotRumble__qwen3-coder-plus-2025-09-23__vs__seven-of-nine__r15__s250.yaml
@@ -8,7 +8,7 @@ game:
 players:
 - agent: mini
   name: qwen3-coder-plus-2025-09-23
-  branch_init: python
+  branch_init: starter/python
   config:
     agent: !include mini/default.yaml
     model:
@@ -17,7 +17,7 @@ players:
         temperature: 0.2
 - agent: dummy
   name: seven-of-nine
-  branch_init: human/seven-of-nine
+  branch_init: human/entropicdrifter/seven-of-nine
 prompts:
   game_description: |
     You are a software developer ({{player_id}}) competing in a coding game called RobotRumble.
diff --git a/configs/mini/litellm_custom_model_config.yaml b/configs/mini/litellm_custom_model_config.yaml