|
| 1 | +"""Bridge Arena for CodeClash.""" |
| 2 | + |
| 3 | +import json |
| 4 | +import shlex |
| 5 | +import subprocess |
| 6 | +from collections import Counter |
| 7 | +from concurrent.futures import ThreadPoolExecutor, as_completed |
| 8 | + |
| 9 | +from tqdm.auto import tqdm |
| 10 | + |
| 11 | +from codeclash.agents.player import Player |
| 12 | +from codeclash.arenas.arena import CodeArena, RoundStats |
| 13 | +from codeclash.constants import RESULT_TIE |
| 14 | + |
| 15 | + |
| 16 | +class BridgeArena(CodeArena): |
| 17 | + name: str = "Bridge" |
| 18 | + submission: str = "bridge_agent.py" |
| 19 | + description: str = """Bridge is a 4-player trick-taking card game played in teams. |
| 20 | +
|
| 21 | +Teams: North/South (positions 0/2) vs East/West (positions 1/3) |
| 22 | +
|
| 23 | +Your bot (bridge_agent.py) must implement these functions: |
| 24 | +- get_bid(game_state) -> str: Make bidding decisions, return bid string like "1H", "2NT", "PASS" |
| 25 | +- play_card(game_state) -> str: Play a card, return card string like "AS", "7H" |
| 26 | +
|
| 27 | +game_state is a dict containing: |
| 28 | +- position: Your position (0=North, 1=East, 2=South, 3=West) |
| 29 | +- hand: List of cards in your hand (e.g., ["AS", "KH", "7D"]) |
| 30 | +- bids: List of previous bids |
| 31 | +- legal_bids: List of legal bids you can make (during bidding) |
| 32 | +- legal_cards: List of legal cards you can play (during playing) |
| 33 | +- current_trick: Cards played so far in current trick |
| 34 | +- contract: The current contract (if bidding is complete) |
| 35 | +""" |
| 36 | + default_args: dict = { |
| 37 | + "sims_per_round": 10, |
| 38 | + } |
| 39 | + |
| 40 | + def __init__(self, config, **kwargs): |
| 41 | + # Validate player count before initializing (to avoid Docker build on invalid config) |
| 42 | + num_players = len(config.get("players", [])) |
| 43 | + if num_players != 4: |
| 44 | + raise ValueError(f"Bridge requires exactly 4 players, got {num_players}") |
| 45 | + super().__init__(config, **kwargs) |
| 46 | + self.run_cmd = "python3 /workspace/run_game.py" |
| 47 | + |
| 48 | + def validate_code(self, agent: Player) -> tuple[bool, str | None]: |
| 49 | + """Validate agent code has required functions.""" |
| 50 | + if self.submission not in agent.environment.execute("ls")["output"]: |
| 51 | + return False, f"No {self.submission} file found in root directory" |
| 52 | + |
| 53 | + content = agent.environment.execute(f"cat {self.submission}")["output"] |
| 54 | + |
| 55 | + # Check for required function definitions |
| 56 | + required_functions = [ |
| 57 | + "def get_bid(", |
| 58 | + "def play_card(" |
| 59 | + ] |
| 60 | + |
| 61 | + missing = [] |
| 62 | + for func in required_functions: |
| 63 | + if func not in content: |
| 64 | + missing.append(func) |
| 65 | + |
| 66 | + if missing: |
| 67 | + return False, f"Missing required functions: {', '.join(missing)}" |
| 68 | + |
| 69 | + return True, None |
| 70 | + |
| 71 | + def _run_single_simulation(self, agents: list[Player], idx: int, cmd: str): |
| 72 | + """Run a single Bridge game simulation.""" |
| 73 | + full_cmd = f"{cmd} -o {self.log_env / f'sim_{idx}.json'}" |
| 74 | + |
| 75 | + try: |
| 76 | + response = self.environment.execute(full_cmd, timeout=60) |
| 77 | + except subprocess.TimeoutExpired: |
| 78 | + self.logger.warning(f"Bridge simulation {idx} timed out") |
| 79 | + return "" |
| 80 | + |
| 81 | + if response["returncode"] != 0: |
| 82 | + self.logger.warning( |
| 83 | + f"Bridge simulation {idx} failed with exit code {response['returncode']}:\n{response['output']}" |
| 84 | + ) |
| 85 | + return response["output"] |
| 86 | + |
| 87 | + def execute_round(self, agents: list[Player]): |
| 88 | + """Execute a round of Bridge games.""" |
| 89 | + sims = self.game_config.get('sims_per_round', 10) |
| 90 | + self.logger.info(f"Running {sims} Bridge simulations with 4 players") |
| 91 | + |
| 92 | + # Build agent paths for the command |
| 93 | + agent_paths = [] |
| 94 | + for agent in agents: |
| 95 | + agent_paths.append(f"/{agent.name}/{self.submission}") |
| 96 | + |
| 97 | + # Build base command |
| 98 | + cmd = f"{self.run_cmd} {shlex.join(agent_paths)}" |
| 99 | + |
| 100 | + # Run simulations in parallel |
| 101 | + with ThreadPoolExecutor(max_workers=8) as executor: |
| 102 | + futures = [ |
| 103 | + executor.submit( |
| 104 | + self._run_single_simulation, |
| 105 | + agents, |
| 106 | + idx, |
| 107 | + f"{cmd} --seed {idx} --dealer {idx % 4}" |
| 108 | + ) |
| 109 | + for idx in range(sims) |
| 110 | + ] |
| 111 | + for future in tqdm(as_completed(futures), total=len(futures), desc="Bridge simulations"): |
| 112 | + future.result() |
| 113 | + |
| 114 | + def get_results(self, agents: list[Player], round_num: int, stats: RoundStats): |
| 115 | + """Parse results and determine winners.""" |
| 116 | + # Initialize team scores |
| 117 | + team_scores = {'NS': 0.0, 'EW': 0.0} |
| 118 | + games_played = 0 |
| 119 | + |
| 120 | + # Parse all simulation logs |
| 121 | + for idx in range(self.game_config.get('sims_per_round', 10)): |
| 122 | + log_file = self.log_round(round_num) / f"sim_{idx}.json" |
| 123 | + |
| 124 | + if not log_file.exists(): |
| 125 | + self.logger.warning(f"Log file {log_file} not found, skipping") |
| 126 | + continue |
| 127 | + |
| 128 | + try: |
| 129 | + with open(log_file) as f: |
| 130 | + result = json.load(f) |
| 131 | + |
| 132 | + # Check for error |
| 133 | + if 'error' in result: |
| 134 | + self.logger.warning(f"Simulation {idx} had error: {result['error']}") |
| 135 | + continue |
| 136 | + |
| 137 | + # Extract VP scores for each team |
| 138 | + vp_scores = result.get('normalized_score', {}) |
| 139 | + if vp_scores: |
| 140 | + team_scores['NS'] += vp_scores.get('NS', 0.0) |
| 141 | + team_scores['EW'] += vp_scores.get('EW', 0.0) |
| 142 | + games_played += 1 |
| 143 | + except (json.JSONDecodeError, KeyError) as e: |
| 144 | + self.logger.warning(f"Error parsing {log_file}: {e}") |
| 145 | + continue |
| 146 | + |
| 147 | + if games_played == 0: |
| 148 | + self.logger.error("No valid game results found") |
| 149 | + stats.winner = RESULT_TIE |
| 150 | + for agent in agents: |
| 151 | + stats.scores[agent.name] = 0.0 |
| 152 | + stats.player_stats[agent.name].score = 0.0 |
| 153 | + return |
| 154 | + |
| 155 | + # Average the scores |
| 156 | + team_scores['NS'] /= games_played |
| 157 | + team_scores['EW'] /= games_played |
| 158 | + |
| 159 | + # Determine winning team |
| 160 | + if abs(team_scores['NS'] - team_scores['EW']) < 0.01: # Tie threshold |
| 161 | + stats.winner = RESULT_TIE |
| 162 | + elif team_scores['NS'] > team_scores['EW']: |
| 163 | + stats.winner = f"{agents[0].name}/{agents[2].name}" |
| 164 | + else: |
| 165 | + stats.winner = f"{agents[1].name}/{agents[3].name}" |
| 166 | + |
| 167 | + # Assign scores to individual players based on their team |
| 168 | + for position, agent in enumerate(agents): |
| 169 | + team = 'NS' if position % 2 == 0 else 'EW' |
| 170 | + score = team_scores[team] |
| 171 | + stats.scores[agent.name] = score |
| 172 | + stats.player_stats[agent.name].score = score |
| 173 | + |
| 174 | + self.logger.info( |
| 175 | + f"Round {round_num} results - NS: {team_scores['NS']:.3f}, " |
| 176 | + f"EW: {team_scores['EW']:.3f}, Winner: {stats.winner}" |
| 177 | + ) |
0 commit comments