From 0cfcf47f8c69258b09c9a4e4c1e7c64a0940b1cf Mon Sep 17 00:00:00 2001
From: qialex <qialex.i@gmail.com>
Date: Thu, 17 Apr 2025 10:48:22 +0200
Subject: [PATCH 01/13] fixednolimitholdem first attempt

---
 rlcard/envs/__init__.py                     |   5 +
 rlcard/envs/env.py                          |   2 +-
 rlcard/envs/fixednolimitholdem.py           | 119 +++++++++
 rlcard/games/fixednolimitholdem/__init__.py |   7 +
 rlcard/games/fixednolimitholdem/dealer.py   |   5 +
 rlcard/games/fixednolimitholdem/game.py     | 252 ++++++++++++++++++++
 rlcard/games/fixednolimitholdem/judger.py   |   5 +
 rlcard/games/fixednolimitholdem/player.py   |  19 ++
 rlcard/games/fixednolimitholdem/round.py    | 183 ++++++++++++++
 9 files changed, 596 insertions(+), 1 deletion(-)
 create mode 100644 rlcard/envs/fixednolimitholdem.py
 create mode 100644 rlcard/games/fixednolimitholdem/__init__.py
 create mode 100644 rlcard/games/fixednolimitholdem/dealer.py
 create mode 100644 rlcard/games/fixednolimitholdem/game.py
 create mode 100644 rlcard/games/fixednolimitholdem/judger.py
 create mode 100644 rlcard/games/fixednolimitholdem/player.py
 create mode 100644 rlcard/games/fixednolimitholdem/round.py

diff --git a/rlcard/envs/__init__.py b/rlcard/envs/__init__.py
index de9dbb8c1..6cafd110f 100644
--- a/rlcard/envs/__init__.py
+++ b/rlcard/envs/__init__.py
@@ -23,6 +23,11 @@
     entry_point='rlcard.envs.nolimitholdem:NolimitholdemEnv',
 )
 
+register(
+    env_id='fixed-no-limit-holdem',
+    entry_point='rlcard.envs.fixednolimitholdem:FixedNolimitholdemEnv',
+)
+
 register(
     env_id='leduc-holdem',
     entry_point='rlcard.envs.leducholdem:LeducholdemEnv'
diff --git a/rlcard/envs/env.py b/rlcard/envs/env.py
index 93e239548..300eb37f1 100644
--- a/rlcard/envs/env.py
+++ b/rlcard/envs/env.py
@@ -30,7 +30,7 @@ def __init__(self, config):
         # Game specific configurations
         # Currently only support blackjack、limit-holdem、no-limit-holdem
         # TODO support game configurations for all the games
-        supported_envs = ['blackjack', 'leduc-holdem', 'limit-holdem', 'no-limit-holdem']
+        supported_envs = ['blackjack', 'leduc-holdem', 'limit-holdem', 'no-limit-holdem', 'fixed-no-limit-holdem']
         if self.name in supported_envs:
             _game_config = self.default_game_config.copy()
             for key in config:
diff --git a/rlcard/envs/fixednolimitholdem.py b/rlcard/envs/fixednolimitholdem.py
new file mode 100644
index 000000000..23cbde9f2
--- /dev/null
+++ b/rlcard/envs/fixednolimitholdem.py
@@ -0,0 +1,119 @@
+import json
+import os
+import numpy as np
+from collections import OrderedDict
+
+import rlcard
+from rlcard.envs import Env
+from rlcard.games.nolimitholdem import Game
+from rlcard.games.nolimitholdem.round import Action
+
+DEFAULT_GAME_CONFIG = {
+        'game_num_players': 2,
+        'chips_for_each': 200,
+        'dealer_id': None,
+        }
+
+class FixedNolimitholdemEnv(Env):
+    ''' NoLimitholdem Environment
+    '''
+
+    def __init__(self, config):
+        ''' Initialize the Limitholdem environment
+        '''
+        self.name = 'fixed-no-limit-holdem'
+        self.default_game_config = DEFAULT_GAME_CONFIG
+        self.game = Game()
+        super().__init__(config)
+        self.actions = Action
+        self.state_shape = [[54] for _ in range(self.num_players)]
+        self.action_shape = [None for _ in range(self.num_players)]
+        # for raise_amount in range(1, self.game.init_chips+1):
+        #     self.actions.append(raise_amount)
+
+        with open(os.path.join(rlcard.__path__[0], 'games/limitholdem/card2index.json'), 'r') as file:
+            self.card2index = json.load(file)
+
+    def _get_legal_actions(self):
+        ''' Get all leagal actions
+
+        Returns:
+            encoded_action_list (list): return encoded legal action list (from str to int)
+        '''
+        return self.game.get_legal_actions()
+
+    def _extract_state(self, state):
+        ''' Extract the state representation from state dictionary for agent
+
+        Note: Currently the use the hand cards and the public cards. TODO: encode the states
+
+        Args:
+            state (dict): Original state from the game
+
+        Returns:
+            observation (list): combine the player's score and dealer's observable score for observation
+        '''
+        extracted_state = {}
+
+        legal_actions = OrderedDict({action.value: None for action in state['legal_actions']})
+        extracted_state['legal_actions'] = legal_actions
+
+        public_cards = state['public_cards']
+        hand = state['hand']
+        my_chips = state['my_chips']
+        all_chips = state['all_chips']
+        cards = public_cards + hand
+        idx = [self.card2index[card] for card in cards]
+        obs = np.zeros(54)
+        obs[idx] = 1
+        obs[52] = float(my_chips)
+        obs[53] = float(max(all_chips))
+        extracted_state['obs'] = obs
+
+        extracted_state['raw_obs'] = state
+        extracted_state['raw_legal_actions'] = [a for a in state['legal_actions']]
+        extracted_state['action_record'] = self.action_recorder
+
+        return extracted_state
+
+    def get_payoffs(self):
+        ''' Get the payoff of a game
+
+        Returns:
+           payoffs (list): list of payoffs
+        '''
+        return np.array(self.game.get_payoffs())
+
+    def _decode_action(self, action_id):
+        ''' Decode the action for applying to the game
+
+        Args:
+            action id (int): action id
+
+        Returns:
+            action (str): action for the game
+        '''
+        legal_actions = self.game.get_legal_actions()
+        if self.actions(action_id) not in legal_actions:
+            if Action.CHECK in legal_actions:
+                return Action.CHECK
+            else:
+                print("Tried non legal action", action_id, self.actions(action_id), legal_actions)
+                return Action.FOLD
+        return self.actions(action_id)
+
+    def get_perfect_information(self):
+        ''' Get the perfect information of the current state
+
+        Returns:
+            (dict): A dictionary of all the perfect information of the current state
+        '''
+        state = {}
+        state['chips'] = [self.game.players[i].in_chips for i in range(self.num_players)]
+        state['public_card'] = [c.get_index() for c in self.game.public_cards] if self.game.public_cards else None
+        state['hand_cards'] = [[c.get_index() for c in self.game.players[i].hand] for i in range(self.num_players)]
+        state['current_player'] = self.game.game_pointer
+        state['legal_actions'] = self.game.get_legal_actions()
+        return state
+
+
diff --git a/rlcard/games/fixednolimitholdem/__init__.py b/rlcard/games/fixednolimitholdem/__init__.py
new file mode 100644
index 000000000..0c3951b37
--- /dev/null
+++ b/rlcard/games/fixednolimitholdem/__init__.py
@@ -0,0 +1,7 @@
+from rlcard.games.fixednolimitholdem.dealer import NolimitholdemDealer as Dealer
+from rlcard.games.fixednolimitholdem.judger import NolimitholdemJudger as Judger
+from rlcard.games.fixednolimitholdem.player import NolimitholdemPlayer as Player
+from rlcard.games.fixednolimitholdem.round import Action
+from rlcard.games.fixednolimitholdem.round import NolimitholdemRound as Round
+from rlcard.games.fixednolimitholdem.game import NolimitholdemGame as Game
+
diff --git a/rlcard/games/fixednolimitholdem/dealer.py b/rlcard/games/fixednolimitholdem/dealer.py
new file mode 100644
index 000000000..867d65572
--- /dev/null
+++ b/rlcard/games/fixednolimitholdem/dealer.py
@@ -0,0 +1,5 @@
+from rlcard.games.limitholdem import Dealer
+
+
+class NolimitholdemDealer(Dealer):
+    pass
diff --git a/rlcard/games/fixednolimitholdem/game.py b/rlcard/games/fixednolimitholdem/game.py
new file mode 100644
index 000000000..b4d653ed8
--- /dev/null
+++ b/rlcard/games/fixednolimitholdem/game.py
@@ -0,0 +1,252 @@
+from enum import Enum
+
+import numpy as np
+from copy import deepcopy
+from rlcard.games.limitholdem import Game
+from rlcard.games.limitholdem import PlayerStatus
+
+from rlcard.games.nolimitholdem import Dealer
+from rlcard.games.nolimitholdem import Player
+from rlcard.games.nolimitholdem import Judger
+from rlcard.games.nolimitholdem import Round, Action
+
+
+class Stage(Enum):
+    PREFLOP = 0
+    FLOP = 1
+    TURN = 2
+    RIVER = 3
+    END_HIDDEN = 4
+    SHOWDOWN = 5
+
+
+class NolimitholdemGame(Game):
+    def __init__(self, allow_step_back=False, num_players=2):
+        """Initialize the class no limit holdem Game"""
+        super().__init__(allow_step_back, num_players)
+
+        self.np_random = np.random.RandomState()
+
+        # small blind and big blind
+        self.small_blind = 1
+        self.big_blind = 2 * self.small_blind
+
+        # config players
+        self.init_chips = [self.big_blind * 100] * num_players
+
+        # If None, the dealer will be randomly chosen
+        self.dealer_id = None
+
+    def configure(self, game_config):
+        """
+        Specify some game specific parameters, such as number of players, initial chips, and dealer id.
+        If dealer_id is None, he will be randomly chosen
+        """
+        self.num_players = game_config['game_num_players']
+        # must have num_players length
+        self.init_chips = [game_config['chips_for_each']] * game_config["game_num_players"]
+        self.dealer_id = game_config['dealer_id']
+
+    def init_game(self):
+        """
+        Initialize the game of not limit holdem
+
+        This version supports two-player no limit texas holdem
+
+        Returns:
+            (tuple): Tuple containing:
+
+                (dict): The first state of the game
+                (int): Current player's id
+        """
+        if self.dealer_id is None:
+            self.dealer_id = self.np_random.randint(0, self.num_players)
+
+        # Initialize a dealer that can deal cards
+        self.dealer = Dealer(self.np_random)
+
+        # Initialize players to play the game
+        self.players = [Player(i, self.init_chips[i], self.np_random) for i in range(self.num_players)]
+
+        # Initialize a judger class which will decide who wins in the end
+        self.judger = Judger(self.np_random)
+
+        # Deal cards to each  player to prepare for the first round
+        for i in range(2 * self.num_players):
+            self.players[i % self.num_players].hand.append(self.dealer.deal_card())
+
+        # Initialize public cards
+        self.public_cards = []
+        self.stage = Stage.PREFLOP
+
+        # Big blind and small blind
+        s = (self.dealer_id + 1) % self.num_players
+        b = (self.dealer_id + 2) % self.num_players
+        self.players[b].bet(chips=self.big_blind)
+        self.players[s].bet(chips=self.small_blind)
+
+        # The player next to the big blind plays the first
+        self.game_pointer = (b + 1) % self.num_players
+
+        # Initialize a bidding round, in the first round, the big blind and the small blind needs to
+        # be passed to the round for processing.
+        self.round = Round(self.num_players, self.big_blind, dealer=self.dealer, np_random=self.np_random)
+
+        self.round.start_new_round(game_pointer=self.game_pointer, raised=[p.in_chips for p in self.players])
+
+        # Count the round. There are 4 rounds in each game.
+        self.round_counter = 0
+
+        # Save the history for stepping back to the last state.
+        self.history = []
+
+        state = self.get_state(self.game_pointer)
+
+        return state, self.game_pointer
+
+    def get_legal_actions(self):
+        """
+        Return the legal actions for current player
+
+        Returns:
+            (list): A list of legal actions
+        """
+        return self.round.get_nolimit_legal_actions(players=self.players)
+
+    def step(self, action):
+        """
+        Get the next state
+
+        Args:
+            action (str): a specific action. (call, raise, fold, or check)
+
+        Returns:
+            (tuple): Tuple containing:
+
+                (dict): next player's state
+                (int): next player id
+        """
+
+        if action not in self.get_legal_actions():
+            print(action, self.get_legal_actions())
+            print(self.get_state(self.game_pointer))
+            raise Exception('Action not allowed')
+
+        if self.allow_step_back:
+            # First snapshot the current state
+            r = deepcopy(self.round)
+            b = self.game_pointer
+            r_c = self.round_counter
+            d = deepcopy(self.dealer)
+            p = deepcopy(self.public_cards)
+            ps = deepcopy(self.players)
+            self.history.append((r, b, r_c, d, p, ps))
+
+        # Then we proceed to the next round
+        self.game_pointer = self.round.proceed_round(self.players, action)
+
+        players_in_bypass = [1 if player.status in (PlayerStatus.FOLDED, PlayerStatus.ALLIN) else 0 for player in self.players]
+        if self.num_players - sum(players_in_bypass) == 1:
+            last_player = players_in_bypass.index(0)
+            if self.round.raised[last_player] >= max(self.round.raised):
+                # If the last player has put enough chips, he is also bypassed
+                players_in_bypass[last_player] = 1
+
+        # If a round is over, we deal more public cards
+        if self.round.is_over():
+            # Game pointer goes to the first player not in bypass after the dealer, if there is one
+            self.game_pointer = (self.dealer_id + 1) % self.num_players
+            if sum(players_in_bypass) < self.num_players:
+                while players_in_bypass[self.game_pointer]:
+                    self.game_pointer = (self.game_pointer + 1) % self.num_players
+
+            # For the first round, we deal 3 cards
+            if self.round_counter == 0:
+                self.stage = Stage.FLOP
+                self.public_cards.append(self.dealer.deal_card())
+                self.public_cards.append(self.dealer.deal_card())
+                self.public_cards.append(self.dealer.deal_card())
+                if len(self.players) == np.sum(players_in_bypass):
+                    self.round_counter += 1
+            # For the following rounds, we deal only 1 card
+            if self.round_counter == 1:
+                self.stage = Stage.TURN
+                self.public_cards.append(self.dealer.deal_card())
+                if len(self.players) == np.sum(players_in_bypass):
+                    self.round_counter += 1
+            if self.round_counter == 2:
+                self.stage = Stage.RIVER
+                self.public_cards.append(self.dealer.deal_card())
+                if len(self.players) == np.sum(players_in_bypass):
+                    self.round_counter += 1
+
+            self.round_counter += 1
+            self.round.start_new_round(self.game_pointer)
+
+        state = self.get_state(self.game_pointer)
+
+        return state, self.game_pointer
+
+    def get_state(self, player_id):
+        """
+        Return player's state
+
+        Args:
+            player_id (int): player id
+
+        Returns:
+            (dict): The state of the player
+        """
+        self.dealer.pot = np.sum([player.in_chips for player in self.players])
+
+        chips = [self.players[i].in_chips for i in range(self.num_players)]
+        legal_actions = self.get_legal_actions()
+        state = self.players[player_id].get_state(self.public_cards, chips, legal_actions)
+        state['stakes'] = [self.players[i].remained_chips for i in range(self.num_players)]
+        state['current_player'] = self.game_pointer
+        state['pot'] = self.dealer.pot
+        state['stage'] = self.stage
+        return state
+
+    def step_back(self):
+        """
+        Return to the previous state of the game
+
+        Returns:
+            (bool): True if the game steps back successfully
+        """
+        if len(self.history) > 0:
+            self.round, self.game_pointer, self.round_counter, self.dealer, self.public_cards, self.players = self.history.pop()
+            self.stage = Stage(self.round_counter)
+            return True
+        return False
+
+    def get_num_players(self):
+        """
+        Return the number of players in no limit texas holdem
+
+        Returns:
+            (int): The number of players in the game
+        """
+        return self.num_players
+
+    def get_payoffs(self):
+        """
+        Return the payoffs of the game
+
+        Returns:
+            (list): Each entry corresponds to the payoff of one player
+        """
+        hands = [p.hand + self.public_cards if p.status in (PlayerStatus.ALIVE, PlayerStatus.ALLIN) else None for p in self.players]
+        chips_payoffs = self.judger.judge_game(self.players, hands)
+        return chips_payoffs
+
+    @staticmethod
+    def get_num_actions():
+        """
+        Return the number of applicable actions
+
+        Returns:
+            (int): The number of actions. There are 6 actions (call, raise_half_pot, raise_pot, all_in, check and fold)
+        """
+        return len(Action)
diff --git a/rlcard/games/fixednolimitholdem/judger.py b/rlcard/games/fixednolimitholdem/judger.py
new file mode 100644
index 000000000..0c9c427e7
--- /dev/null
+++ b/rlcard/games/fixednolimitholdem/judger.py
@@ -0,0 +1,5 @@
+from rlcard.games.limitholdem import Judger
+
+
+class NolimitholdemJudger(Judger):
+    pass
diff --git a/rlcard/games/fixednolimitholdem/player.py b/rlcard/games/fixednolimitholdem/player.py
new file mode 100644
index 000000000..c63cc6e89
--- /dev/null
+++ b/rlcard/games/fixednolimitholdem/player.py
@@ -0,0 +1,19 @@
+from rlcard.games.limitholdem import Player
+
+
+class NolimitholdemPlayer(Player):
+    def __init__(self, player_id, init_chips, np_random):
+        """
+        Initialize a player.
+
+        Args:
+            player_id (int): The id of the player
+            init_chips (int): The number of chips the player has initially
+        """
+        super().__init__(player_id, np_random)
+        self.remained_chips = init_chips
+
+    def bet(self, chips):
+        quantity = chips if chips <= self.remained_chips else self.remained_chips
+        self.in_chips += quantity
+        self.remained_chips -= quantity
diff --git a/rlcard/games/fixednolimitholdem/round.py b/rlcard/games/fixednolimitholdem/round.py
new file mode 100644
index 000000000..45189c090
--- /dev/null
+++ b/rlcard/games/fixednolimitholdem/round.py
@@ -0,0 +1,183 @@
+# -*- coding: utf-8 -*-
+"""Implement no limit texas holdem Round class"""
+from enum import Enum
+
+from rlcard.games.limitholdem import PlayerStatus
+
+
+class Action(Enum):
+    FOLD = 0
+    CHECK = 1
+    CALL = 2
+    # RAISE_3BB = 3
+    RAISE_HALF_POT = 3
+    RAISE_POT = 4
+    # RAISE_2POT = 5
+    ALL_IN = 5
+    # SMALL_BLIND = 7
+    # BIG_BLIND = 8
+
+
+class NolimitholdemRound:
+    """Round can call functions from other classes to keep the game running"""
+
+    def __init__(self, num_players, init_raise_amount, dealer, np_random):
+        """
+        Initialize the round class
+
+        Args:
+            num_players (int): The number of players
+            init_raise_amount (int): The min raise amount when every round starts
+        """
+        self.np_random = np_random
+        self.game_pointer = None
+        self.num_players = num_players
+        self.init_raise_amount = init_raise_amount
+
+        self.dealer = dealer
+
+        # Count the number without raise
+        # If every player agree to not raise, the round is over
+        self.not_raise_num = 0
+
+        # Count players that are not playing anymore (folded or all-in)
+        self.not_playing_num = 0
+
+        # Raised amount for each player
+        self.raised = [0 for _ in range(self.num_players)]
+
+    def start_new_round(self, game_pointer, raised=None):
+        """
+        Start a new bidding round
+
+        Args:
+            game_pointer (int): The game_pointer that indicates the next player
+            raised (list): Initialize the chips for each player
+
+        Note: For the first round of the game, we need to setup the big/small blind
+        """
+        self.game_pointer = game_pointer
+        self.not_raise_num = 0
+        if raised:
+            self.raised = raised
+        else:
+            self.raised = [0 for _ in range(self.num_players)]
+
+    def proceed_round(self, players, action):
+        """
+        Call functions from other classes to keep one round running
+
+        Args:
+            players (list): The list of players that play the game
+            action (str/int): An legal action taken by the player
+
+        Returns:
+            (int): The game_pointer that indicates the next player
+        """
+        player = players[self.game_pointer]
+
+        if action == Action.CHECK:
+            self.not_raise_num += 1
+
+        if action == Action.CALL:
+            diff = max(self.raised) - self.raised[self.game_pointer]
+            self.raised[self.game_pointer] = max(self.raised)
+            player.bet(chips=diff)
+            self.not_raise_num += 1
+
+        elif action == Action.ALL_IN:
+            all_in_quantity = player.remained_chips
+            self.raised[self.game_pointer] = all_in_quantity + self.raised[self.game_pointer]
+            player.bet(chips=all_in_quantity)
+
+            self.not_raise_num = 1
+
+        elif action == Action.RAISE_POT:
+            self.raised[self.game_pointer] += self.dealer.pot
+            player.bet(chips=self.dealer.pot)
+            self.not_raise_num = 1
+
+        elif action == Action.RAISE_HALF_POT:
+            quantity = int(self.dealer.pot / 2)
+            self.raised[self.game_pointer] += quantity
+            player.bet(chips=quantity)
+            self.not_raise_num = 1
+
+        elif action == Action.FOLD:
+            player.status = PlayerStatus.FOLDED
+
+        if player.remained_chips < 0:
+            raise Exception("Player in negative stake")
+
+        if player.remained_chips == 0 and player.status != PlayerStatus.FOLDED:
+            player.status = PlayerStatus.ALLIN
+
+        self.game_pointer = (self.game_pointer + 1) % self.num_players
+
+        if player.status == PlayerStatus.ALLIN:
+            self.not_playing_num += 1
+            self.not_raise_num -= 1  # Because already counted in not_playing_num
+        if player.status == PlayerStatus.FOLDED:
+            self.not_playing_num += 1
+
+        # Skip the folded players
+        while players[self.game_pointer].status == PlayerStatus.FOLDED:
+            self.game_pointer = (self.game_pointer + 1) % self.num_players
+
+        return self.game_pointer
+
+    def get_nolimit_legal_actions(self, players):
+        """
+        Obtain the legal actions for the current player
+
+        Args:
+            players (list): The players in the game
+
+        Returns:
+           (list):  A list of legal actions
+        """
+
+        full_actions = list(Action)
+
+        # The player can always check or call
+        player = players[self.game_pointer]
+
+        diff = max(self.raised) - self.raised[self.game_pointer]
+        if diff == 0: 
+            full_actions.remove(Action.CALL)
+            full_actions.remove(Action.FOLD)
+
+        if diff > 0: 
+            full_actions.remove(Action.CHECK)
+
+        # If the current player has no more chips after call, we cannot raise
+        if diff > 0 and diff >= player.remained_chips:
+            full_actions.remove(Action.RAISE_HALF_POT)
+            full_actions.remove(Action.RAISE_POT)
+            full_actions.remove(Action.ALL_IN)
+        # Even if we can raise, we have to check remained chips
+        else:
+            if self.dealer.pot > player.remained_chips:
+                full_actions.remove(Action.RAISE_POT)
+
+            if int(self.dealer.pot / 2) > player.remained_chips:
+                full_actions.remove(Action.RAISE_HALF_POT)
+
+            # Can't raise if the total raise amount is leq than the max raise amount of this round
+            # If raise by pot, there is no such concern
+            if Action.RAISE_HALF_POT in full_actions and \
+                int(self.dealer.pot / 2) + self.raised[self.game_pointer] <= max(self.raised):
+                full_actions.remove(Action.RAISE_HALF_POT)
+
+        return full_actions
+
+    def is_over(self):
+        """
+        Check whether the round is over
+
+        Returns:
+            (boolean): True if the current round is over
+        """
+        if self.not_raise_num + self.not_playing_num >= self.num_players:
+            return True
+        return False

From dccd79a70e489503cd62cffc79fcef56e2aad9aa Mon Sep 17 00:00:00 2001
From: qialex <qialex.i@gmail.com>
Date: Thu, 17 Apr 2025 12:01:07 +0200
Subject: [PATCH 02/13] fix headsup blinds positng

---
 rlcard/games/fixednolimitholdem/game.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/rlcard/games/fixednolimitholdem/game.py b/rlcard/games/fixednolimitholdem/game.py
index b4d653ed8..097fe1e0a 100644
--- a/rlcard/games/fixednolimitholdem/game.py
+++ b/rlcard/games/fixednolimitholdem/game.py
@@ -80,10 +80,16 @@ def init_game(self):
         self.stage = Stage.PREFLOP
 
         # Big blind and small blind
-        s = (self.dealer_id + 1) % self.num_players
-        b = (self.dealer_id + 2) % self.num_players
+        if self.num_players == 2:
+            # In heads-up dealer posts small blind
+            s = (self.dealer_id) % self.num_players
+            b = (self.dealer_id + 1) % self.num_players
+        else: 
+            s = (self.dealer_id + 1) % self.num_players
+            b = (self.dealer_id + 2) % self.num_players
+
         self.players[b].bet(chips=self.big_blind)
-        self.players[s].bet(chips=self.small_blind)
+        self.players[s].bet(chips=self.small_blind)            
 
         # The player next to the big blind plays the first
         self.game_pointer = (b + 1) % self.num_players

From d2102510ec81d919f0d1e3fba63970127212b29e Mon Sep 17 00:00:00 2001
From: qialex <qialex.i@gmail.com>
Date: Thu, 17 Apr 2025 12:04:20 +0200
Subject: [PATCH 03/13] comment fix

---
 rlcard/envs/fixednolimitholdem.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rlcard/envs/fixednolimitholdem.py b/rlcard/envs/fixednolimitholdem.py
index 23cbde9f2..033bf3949 100644
--- a/rlcard/envs/fixednolimitholdem.py
+++ b/rlcard/envs/fixednolimitholdem.py
@@ -15,7 +15,7 @@
         }
 
 class FixedNolimitholdemEnv(Env):
-    ''' NoLimitholdem Environment
+    ''' FixedNolimitholdem Environment
     '''
 
     def __init__(self, config):

From 414b2287977dcde9dcf20d465aba5b79cf2907fa Mon Sep 17 00:00:00 2001
From: qialex <qialex.i@gmail.com>
Date: Thu, 17 Apr 2025 12:59:07 +0200
Subject: [PATCH 04/13] update imports

---
 rlcard/envs/fixednolimitholdem.py       | 4 ++--
 rlcard/games/fixednolimitholdem/game.py | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/rlcard/envs/fixednolimitholdem.py b/rlcard/envs/fixednolimitholdem.py
index 033bf3949..43e37fc50 100644
--- a/rlcard/envs/fixednolimitholdem.py
+++ b/rlcard/envs/fixednolimitholdem.py
@@ -5,8 +5,8 @@
 
 import rlcard
 from rlcard.envs import Env
-from rlcard.games.nolimitholdem import Game
-from rlcard.games.nolimitholdem.round import Action
+from rlcard.games.fixednolimitholdem import Game
+from rlcard.games.fixednolimitholdem.round import Action
 
 DEFAULT_GAME_CONFIG = {
         'game_num_players': 2,
diff --git a/rlcard/games/fixednolimitholdem/game.py b/rlcard/games/fixednolimitholdem/game.py
index 097fe1e0a..e6317d076 100644
--- a/rlcard/games/fixednolimitholdem/game.py
+++ b/rlcard/games/fixednolimitholdem/game.py
@@ -5,10 +5,10 @@
 from rlcard.games.limitholdem import Game
 from rlcard.games.limitholdem import PlayerStatus
 
-from rlcard.games.nolimitholdem import Dealer
-from rlcard.games.nolimitholdem import Player
-from rlcard.games.nolimitholdem import Judger
-from rlcard.games.nolimitholdem import Round, Action
+from rlcard.games.fixednolimitholdem import Dealer
+from rlcard.games.fixednolimitholdem import Player
+from rlcard.games.fixednolimitholdem import Judger
+from rlcard.games.fixednolimitholdem import Round, Action
 
 
 class Stage(Enum):

From d6bde9d1787053a7ca333644e07901319a2f255f Mon Sep 17 00:00:00 2001
From: qialex <qialex.i@gmail.com>
Date: Thu, 17 Apr 2025 13:16:16 +0200
Subject: [PATCH 05/13] rotate dealer

---
 rlcard/games/fixednolimitholdem/game.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/rlcard/games/fixednolimitholdem/game.py b/rlcard/games/fixednolimitholdem/game.py
index e6317d076..0f0cdc7e9 100644
--- a/rlcard/games/fixednolimitholdem/game.py
+++ b/rlcard/games/fixednolimitholdem/game.py
@@ -61,6 +61,9 @@ def init_game(self):
         """
         if self.dealer_id is None:
             self.dealer_id = self.np_random.randint(0, self.num_players)
+        else:
+            self.dealer_id = (self.dealer_id + 1) % self.num_players
+            
 
         # Initialize a dealer that can deal cards
         self.dealer = Dealer(self.np_random)

From bbcafb0b836416ca3f50d7fe0cd7704a0db6b224 Mon Sep 17 00:00:00 2001
From: qialex <qialex.i@gmail.com>
Date: Sun, 20 Apr 2025 01:26:15 +0200
Subject: [PATCH 06/13] add manual dealer for controlling the cards dealt

---
 rlcard/games/fixednolimitholdem/dealer.py | 121 +++++++++++-
 rlcard/games/fixednolimitholdem/game.py   | 222 +++++++++++++++-------
 2 files changed, 276 insertions(+), 67 deletions(-)

diff --git a/rlcard/games/fixednolimitholdem/dealer.py b/rlcard/games/fixednolimitholdem/dealer.py
index 867d65572..31f7efef3 100644
--- a/rlcard/games/fixednolimitholdem/dealer.py
+++ b/rlcard/games/fixednolimitholdem/dealer.py
@@ -2,4 +2,123 @@
 
 
 class NolimitholdemDealer(Dealer):
-    pass
+    def __init__(self, np_random):
+        super().__init__(np_random)
+        # Initialize new properties with default values
+        self.preset_player0_hand = []
+        self.preset_flop = []
+        self.preset_turn = None
+        self.preset_river = None
+        self.current_stage = None
+        self.manual_mode = False  # Default to automatic dealing
+    
+    def enable_manual_mode(self):
+        """Enable manual card selection mode"""
+        self.manual_mode = True
+    
+    def set_player0_hand(self, cards):
+        """Set specific cards for Player 0's hand
+        
+        Args:
+            cards (list): List of card objects to be used as Player 0's hand
+        """
+        if not self.manual_mode:
+            return  # Do nothing if not in manual mode
+            
+        self.preset_player0_hand = cards
+        # Remove these cards from the deck
+        for card in cards:
+            if card in self.deck:
+                self.deck.remove(card)
+    
+    def set_flop(self, cards):
+        """Set specific flop cards
+        
+        Args:
+            cards (list): List of 3 card objects to be used as flop
+        """
+        if not self.manual_mode:
+            return  # Do nothing if not in manual mode
+            
+        if len(cards) != 3:
+            raise ValueError("Flop must consist of exactly 3 cards")
+        self.preset_flop = cards
+        # Remove these cards from the deck
+        for card in cards:
+            if card in self.deck:
+                self.deck.remove(card)
+    
+    def set_turn(self, card):
+        """Set specific turn card
+        
+        Args:
+            card (object): Card object to be used as turn
+        """
+        if not self.manual_mode:
+            return  # Do nothing if not in manual mode
+            
+        self.preset_turn = card
+        # Remove this card from the deck
+        if card in self.deck:
+            self.deck.remove(card)
+    
+    def set_river(self, card):
+        """Set specific river card
+        
+        Args:
+            card (object): Card object to be used as river
+        """
+        if not self.manual_mode:
+            return  # Do nothing if not in manual mode
+            
+        self.preset_river = card
+        # Remove this card from the deck
+        if card in self.deck:
+            self.deck.remove(card)
+    
+    def has_preset_cards(self, stage):
+        """Check if dealer has preset cards for the given stage
+        
+        Args:
+            stage (str): The stage to check ('flop', 'turn', or 'river')
+            
+        Returns:
+            (bool): True if dealer has preset cards for the stage
+        """
+        if not self.manual_mode:
+            return True  # In automatic mode, we always have cards
+            
+        if stage == 'flop':
+            return len(self.preset_flop) == 3
+        elif stage == 'turn':
+            return self.preset_turn is not None
+        elif stage == 'river':
+            return self.preset_river is not None
+        return False
+    
+    def deal_card(self):
+        """Deal a card from the deck
+        
+        Returns:
+            (object): A card object
+        """
+        # Only use preset cards if in manual mode
+        if self.manual_mode:
+            # For player 0's hand
+            if len(self.preset_player0_hand) > 0:
+                return self.preset_player0_hand.pop(0)
+            
+            # For community cards based on current stage
+            if self.current_stage == 'flop' and len(self.preset_flop) > 0:
+                return self.preset_flop.pop(0)
+            elif self.current_stage == 'turn' and self.preset_turn is not None:
+                card = self.preset_turn
+                self.preset_turn = None
+                return card
+            elif self.current_stage == 'river' and self.preset_river is not None:
+                card = self.preset_river
+                self.preset_river = None
+                return card
+        
+        # Default behavior - deal from deck
+        return super().deal_card()
diff --git a/rlcard/games/fixednolimitholdem/game.py b/rlcard/games/fixednolimitholdem/game.py
index 0f0cdc7e9..003adcfb3 100644
--- a/rlcard/games/fixednolimitholdem/game.py
+++ b/rlcard/games/fixednolimitholdem/game.py
@@ -1,5 +1,4 @@
 from enum import Enum
-
 import numpy as np
 from copy import deepcopy
 from rlcard.games.limitholdem import Game
@@ -18,6 +17,9 @@ class Stage(Enum):
     RIVER = 3
     END_HIDDEN = 4
     SHOWDOWN = 5
+    WAITING_FOR_FLOP = 6  # Waiting for flop cards
+    WAITING_FOR_TURN = 7  # Waiting for turn card
+    WAITING_FOR_RIVER = 8  # Waiting for river card
 
 
 class NolimitholdemGame(Game):
@@ -36,6 +38,10 @@ def __init__(self, allow_step_back=False, num_players=2):
 
         # If None, the dealer will be randomly chosen
         self.dealer_id = None
+        
+        # For manually setting cards
+        self.player0_hand = []
+        self.manual_dealer = False  # Default to automatic dealing
 
     def configure(self, game_config):
         """
@@ -46,6 +52,13 @@ def configure(self, game_config):
         # must have num_players length
         self.init_chips = [game_config['chips_for_each']] * game_config["game_num_players"]
         self.dealer_id = game_config['dealer_id']
+        
+        # Check for manual dealer flag
+        self.manual_dealer = game_config.get('manual_dealer', False)
+        
+        # Set player0 hand if provided and manual dealer is enabled
+        if self.manual_dealer and 'player0_hand' in game_config:
+            self.player0_hand = game_config['player0_hand']
 
     def init_game(self):
         """
@@ -64,9 +77,17 @@ def init_game(self):
         else:
             self.dealer_id = (self.dealer_id + 1) % self.num_players
             
-
         # Initialize a dealer that can deal cards
         self.dealer = Dealer(self.np_random)
+        
+        # Enable manual mode if configured
+        if self.manual_dealer:
+            if hasattr(self.dealer, 'enable_manual_mode'):
+                self.dealer.enable_manual_mode()
+            
+            # Set preset cards for Player 0's hand if provided
+            if self.player0_hand and hasattr(self.dealer, 'set_player0_hand'):
+                self.dealer.set_player0_hand(self.player0_hand)
 
         # Initialize players to play the game
         self.players = [Player(i, self.init_chips[i], self.np_random) for i in range(self.num_players)]
@@ -74,7 +95,7 @@ def init_game(self):
         # Initialize a judger class which will decide who wins in the end
         self.judger = Judger(self.np_random)
 
-        # Deal cards to each  player to prepare for the first round
+        # Deal cards to each player to prepare for the first round
         for i in range(2 * self.num_players):
             self.players[i % self.num_players].hand.append(self.dealer.deal_card())
 
@@ -112,6 +133,77 @@ def init_game(self):
         state = self.get_state(self.game_pointer)
 
         return state, self.game_pointer
+    
+    def set_flop(self, cards):
+        """Set specific flop cards
+        
+        Args:
+            cards (list): List of 3 card objects to be used as flop
+        """
+        if not self.manual_dealer:
+            return  # Do nothing if not in manual mode
+            
+        if hasattr(self.dealer, 'set_flop'):
+            self.dealer.set_flop(cards)
+            
+            # If we were waiting for flop cards, resume the game
+            if self.stage == Stage.WAITING_FOR_FLOP:
+                self._deal_flop()
+                self.stage = Stage.FLOP
+    
+    def set_turn(self, card):
+        """Set specific turn card
+        
+        Args:
+            card (object): Card object to be used as turn
+        """
+        if not self.manual_dealer:
+            return  # Do nothing if not in manual mode
+            
+        if hasattr(self.dealer, 'set_turn'):
+            self.dealer.set_turn(card)
+            
+            # If we were waiting for turn card, resume the game
+            if self.stage == Stage.WAITING_FOR_TURN:
+                self._deal_turn()
+                self.stage = Stage.TURN
+    
+    def set_river(self, card):
+        """Set specific river card
+        
+        Args:
+            card (object): Card object to be used as river
+        """
+        if not self.manual_dealer:
+            return  # Do nothing if not in manual mode
+            
+        if hasattr(self.dealer, 'set_river'):
+            self.dealer.set_river(card)
+            
+            # If we were waiting for river card, resume the game
+            if self.stage == Stage.WAITING_FOR_RIVER:
+                self._deal_river()
+                self.stage = Stage.RIVER
+    
+    def _deal_flop(self):
+        """Deal the flop cards"""
+        if hasattr(self.dealer, 'current_stage'):
+            self.dealer.current_stage = 'flop'
+        self.public_cards.append(self.dealer.deal_card())
+        self.public_cards.append(self.dealer.deal_card())
+        self.public_cards.append(self.dealer.deal_card())
+    
+    def _deal_turn(self):
+        """Deal the turn card"""
+        if hasattr(self.dealer, 'current_stage'):
+            self.dealer.current_stage = 'turn'
+        self.public_cards.append(self.dealer.deal_card())
+    
+    def _deal_river(self):
+        """Deal the river card"""
+        if hasattr(self.dealer, 'current_stage'):
+            self.dealer.current_stage = 'river'
+        self.public_cards.append(self.dealer.deal_card())
 
     def get_legal_actions(self):
         """
@@ -120,6 +212,10 @@ def get_legal_actions(self):
         Returns:
             (list): A list of legal actions
         """
+        # If we're waiting for manual cards, no betting actions are allowed
+        if self.stage in (Stage.WAITING_FOR_FLOP, Stage.WAITING_FOR_TURN, Stage.WAITING_FOR_RIVER):
+            return []  # No betting actions allowed while waiting for cards
+            
         return self.round.get_nolimit_legal_actions(players=self.players)
 
     def step(self, action):
@@ -135,6 +231,9 @@ def step(self, action):
                 (dict): next player's state
                 (int): next player id
         """
+        # If we're waiting for manual cards, don't allow any actions
+        if self.stage in (Stage.WAITING_FOR_FLOP, Stage.WAITING_FOR_TURN, Stage.WAITING_FOR_RIVER):
+            raise Exception('Cannot take actions while waiting for manual cards')
 
         if action not in self.get_legal_actions():
             print(action, self.get_legal_actions())
@@ -169,27 +268,53 @@ def step(self, action):
                 while players_in_bypass[self.game_pointer]:
                     self.game_pointer = (self.game_pointer + 1) % self.num_players
 
-            # For the first round, we deal 3 cards
-            if self.round_counter == 0:
-                self.stage = Stage.FLOP
-                self.public_cards.append(self.dealer.deal_card())
-                self.public_cards.append(self.dealer.deal_card())
-                self.public_cards.append(self.dealer.deal_card())
-                if len(self.players) == np.sum(players_in_bypass):
-                    self.round_counter += 1
-            # For the following rounds, we deal only 1 card
-            if self.round_counter == 1:
-                self.stage = Stage.TURN
-                self.public_cards.append(self.dealer.deal_card())
-                if len(self.players) == np.sum(players_in_bypass):
-                    self.round_counter += 1
-            if self.round_counter == 2:
-                self.stage = Stage.RIVER
-                self.public_cards.append(self.dealer.deal_card())
-                if len(self.players) == np.sum(players_in_bypass):
-                    self.round_counter += 1
-
+            # Handle the end of each betting round
+            if self.round_counter == 0:  # End of preflop
+                if self.manual_dealer:
+                    # Check if we have preset flop cards
+                    if hasattr(self.dealer, 'has_preset_cards') and self.dealer.has_preset_cards('flop'):
+                        self._deal_flop()
+                        self.stage = Stage.FLOP
+                    else:
+                        # Wait for flop cards to be set
+                        self.stage = Stage.WAITING_FOR_FLOP
+                else:
+                    # Automatic dealer mode
+                    self._deal_flop()
+                    self.stage = Stage.FLOP
+                
+            elif self.round_counter == 1:  # End of flop
+                if self.manual_dealer:
+                    # Check if we have preset turn card
+                    if hasattr(self.dealer, 'has_preset_cards') and self.dealer.has_preset_cards('turn'):
+                        self._deal_turn()
+                        self.stage = Stage.TURN
+                    else:
+                        # Wait for turn card to be set
+                        self.stage = Stage.WAITING_FOR_TURN
+                else:
+                    # Automatic dealer mode
+                    self._deal_turn()
+                    self.stage = Stage.TURN
+                
+            elif self.round_counter == 2:  # End of turn
+                if self.manual_dealer:
+                    # Check if we have preset river card
+                    if hasattr(self.dealer, 'has_preset_cards') and self.dealer.has_preset_cards('river'):
+                        self._deal_river()
+                        self.stage = Stage.RIVER
+                    else:
+                        # Wait for river card to be set
+                        self.stage = Stage.WAITING_FOR_RIVER
+                else:
+                    # Automatic dealer mode
+                    self._deal_river()
+                    self.stage = Stage.RIVER
+            
+            # Increment round counter
             self.round_counter += 1
+            
+            # Start a new bidding round
             self.round.start_new_round(self.game_pointer)
 
         state = self.get_state(self.game_pointer)
@@ -215,47 +340,12 @@ def get_state(self, player_id):
         state['current_player'] = self.game_pointer
         state['pot'] = self.dealer.pot
         state['stage'] = self.stage
+        
+        # Add waiting_for_cards flag for manual dealer mode
+        if self.stage in (Stage.WAITING_FOR_FLOP, Stage.WAITING_FOR_TURN, Stage.WAITING_FOR_RIVER):
+            state['waiting_for_cards'] = True
+            state['waiting_stage'] = self.stage
+        else:
+            state['waiting_for_cards'] = False
+            
         return state
-
-    def step_back(self):
-        """
-        Return to the previous state of the game
-
-        Returns:
-            (bool): True if the game steps back successfully
-        """
-        if len(self.history) > 0:
-            self.round, self.game_pointer, self.round_counter, self.dealer, self.public_cards, self.players = self.history.pop()
-            self.stage = Stage(self.round_counter)
-            return True
-        return False
-
-    def get_num_players(self):
-        """
-        Return the number of players in no limit texas holdem
-
-        Returns:
-            (int): The number of players in the game
-        """
-        return self.num_players
-
-    def get_payoffs(self):
-        """
-        Return the payoffs of the game
-
-        Returns:
-            (list): Each entry corresponds to the payoff of one player
-        """
-        hands = [p.hand + self.public_cards if p.status in (PlayerStatus.ALIVE, PlayerStatus.ALLIN) else None for p in self.players]
-        chips_payoffs = self.judger.judge_game(self.players, hands)
-        return chips_payoffs
-
-    @staticmethod
-    def get_num_actions():
-        """
-        Return the number of applicable actions
-
-        Returns:
-            (int): The number of actions. There are 6 actions (call, raise_half_pot, raise_pot, all_in, check and fold)
-        """
-        return len(Action)

From f581e003b58691f94b9481e9c5c18ce18e6a364e Mon Sep 17 00:00:00 2001
From: qialex <qialex.i@gmail.com>
Date: Sun, 20 Apr 2025 02:04:17 +0200
Subject: [PATCH 07/13] add utils

---
 rlcard/games/fixednolimitholdem/utils.py | 36 ++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 rlcard/games/fixednolimitholdem/utils.py

diff --git a/rlcard/games/fixednolimitholdem/utils.py b/rlcard/games/fixednolimitholdem/utils.py
new file mode 100644
index 000000000..b7f59ec4c
--- /dev/null
+++ b/rlcard/games/fixednolimitholdem/utils.py
@@ -0,0 +1,36 @@
+def get_card_id(suit, rank):
+    """
+    Get the card ID based on suit and rank
+    
+    Args:
+        suit (str): The suit of the card ('S', 'H', 'D', 'C')
+        rank (str): The rank of the card ('A', '2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K')
+        
+    Returns:
+        (int): The ID of the card
+    """
+    suit_list = ['S', 'H', 'D', 'C']
+    rank_list = ['A', '2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K']
+    
+    suit_index = suit_list.index(suit)
+    rank_index = rank_list.index(rank)
+    
+    return rank_index + 13 * suit_index
+
+def get_card_from_id(card_id):
+    """
+    Get a Card object from a card ID
+    
+    Args:
+        card_id (int): The ID of the card
+        
+    Returns:
+        (Card): A Card object
+    """
+    suit_list = ['S', 'H', 'D', 'C']
+    rank_list = ['A', '2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K']
+    
+    suit_index = card_id // 13
+    rank_index = card_id % 13
+    
+    return Card(suit_list[suit_index], rank_list[rank_index])
\ No newline at end of file

From 24745eef4a7c5c5cb3d5e2f3d691bb9b985ecf8a Mon Sep 17 00:00:00 2001
From: qialex <qialex.i@gmail.com>
Date: Sun, 20 Apr 2025 02:05:42 +0200
Subject: [PATCH 08/13] add utils

---
 rlcard/games/fixednolimitholdem/utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/rlcard/games/fixednolimitholdem/utils.py b/rlcard/games/fixednolimitholdem/utils.py
index b7f59ec4c..b4524c2cf 100644
--- a/rlcard/games/fixednolimitholdem/utils.py
+++ b/rlcard/games/fixednolimitholdem/utils.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+"""Implement utils for no limit texas holdem"""
 def get_card_id(suit, rank):
     """
     Get the card ID based on suit and rank

From 9d2d0481205a69f12e657df1a38ebbe78ced29d8 Mon Sep 17 00:00:00 2001
From: qialex <qialex.i@gmail.com>
Date: Sun, 20 Apr 2025 02:05:56 +0200
Subject: [PATCH 09/13] add utils

---
 rlcard/games/fixednolimitholdem/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/rlcard/games/fixednolimitholdem/utils.py b/rlcard/games/fixednolimitholdem/utils.py
index b4524c2cf..440f7ccc3 100644
--- a/rlcard/games/fixednolimitholdem/utils.py
+++ b/rlcard/games/fixednolimitholdem/utils.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 """Implement utils for no limit texas holdem"""
+
 def get_card_id(suit, rank):
     """
     Get the card ID based on suit and rank

From f64d66ee9426278e2b800d62e7d098667dbd0456 Mon Sep 17 00:00:00 2001
From: qialex <qialex.i@gmail.com>
Date: Sun, 20 Apr 2025 02:07:16 +0200
Subject: [PATCH 10/13] add utils

---
 rlcard/games/fixednolimitholdem/utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/rlcard/games/fixednolimitholdem/utils.py b/rlcard/games/fixednolimitholdem/utils.py
index 440f7ccc3..90fc38a4b 100644
--- a/rlcard/games/fixednolimitholdem/utils.py
+++ b/rlcard/games/fixednolimitholdem/utils.py
@@ -20,6 +20,7 @@ def get_card_id(suit, rank):
     
     return rank_index + 13 * suit_index
 
+# get_card_from_id
 def get_card_from_id(card_id):
     """
     Get a Card object from a card ID
@@ -36,4 +37,5 @@ def get_card_from_id(card_id):
     suit_index = card_id // 13
     rank_index = card_id % 13
     
-    return Card(suit_list[suit_index], rank_list[rank_index])
\ No newline at end of file
+    return Card(suit_list[suit_index], rank_list[rank_index])
+

From 162d092080809af65d7f71376c6ce234d3362194 Mon Sep 17 00:00:00 2001
From: qialex <qialex.i@gmail.com>
Date: Sun, 20 Apr 2025 03:59:38 +0200
Subject: [PATCH 11/13] fixes

---
 rlcard/games/fixednolimitholdem/dealer.py | 28 ++++++++----
 rlcard/games/fixednolimitholdem/game.py   | 53 ++++++++++++++++++++++-
 2 files changed, 71 insertions(+), 10 deletions(-)

diff --git a/rlcard/games/fixednolimitholdem/dealer.py b/rlcard/games/fixednolimitholdem/dealer.py
index 31f7efef3..16b87682c 100644
--- a/rlcard/games/fixednolimitholdem/dealer.py
+++ b/rlcard/games/fixednolimitholdem/dealer.py
@@ -11,6 +11,7 @@ def __init__(self, np_random):
         self.preset_river = None
         self.current_stage = None
         self.manual_mode = False  # Default to automatic dealing
+        self.player0_cards_dealt = 0  # Track how many cards have been dealt to player 0
     
     def enable_manual_mode(self):
         """Enable manual card selection mode"""
@@ -25,7 +26,7 @@ def set_player0_hand(self, cards):
         if not self.manual_mode:
             return  # Do nothing if not in manual mode
             
-        self.preset_player0_hand = cards
+        self.preset_player0_hand = cards.copy()  # Make a copy to avoid modifying the original
         # Remove these cards from the deck
         for card in cards:
             if card in self.deck:
@@ -42,7 +43,7 @@ def set_flop(self, cards):
             
         if len(cards) != 3:
             raise ValueError("Flop must consist of exactly 3 cards")
-        self.preset_flop = cards
+        self.preset_flop = cards.copy()  # Make a copy to avoid modifying the original
         # Remove these cards from the deck
         for card in cards:
             if card in self.deck:
@@ -96,19 +97,24 @@ def has_preset_cards(self, stage):
             return self.preset_river is not None
         return False
     
-    def deal_card(self):
+    def deal_card(self, player_id=None):
         """Deal a card from the deck
         
+        Args:
+            player_id (int, optional): The ID of the player to deal to
+            
         Returns:
             (object): A card object
         """
         # Only use preset cards if in manual mode
-        if self.manual_mode:
-            # For player 0's hand
-            if len(self.preset_player0_hand) > 0:
-                return self.preset_player0_hand.pop(0)
+        if self.manual_mode and player_id == 0 and self.player0_cards_dealt < 2 and len(self.preset_player0_hand) > 0:
+            # For player 0's hand (first two cards)
+            card = self.preset_player0_hand.pop(0)
+            self.player0_cards_dealt += 1
+            return card
             
-            # For community cards based on current stage
+        # For community cards based on current stage
+        if self.manual_mode:
             if self.current_stage == 'flop' and len(self.preset_flop) > 0:
                 return self.preset_flop.pop(0)
             elif self.current_stage == 'turn' and self.preset_turn is not None:
@@ -122,3 +128,9 @@ def deal_card(self):
         
         # Default behavior - deal from deck
         return super().deal_card()
+    
+    def shuffle(self):
+        """Shuffle the deck"""
+        super().shuffle()
+        # Reset the player0_cards_dealt counter when shuffling
+        self.player0_cards_dealt = 0
diff --git a/rlcard/games/fixednolimitholdem/game.py b/rlcard/games/fixednolimitholdem/game.py
index 003adcfb3..20548a20e 100644
--- a/rlcard/games/fixednolimitholdem/game.py
+++ b/rlcard/games/fixednolimitholdem/game.py
@@ -96,8 +96,14 @@ def init_game(self):
         self.judger = Judger(self.np_random)
 
         # Deal cards to each player to prepare for the first round
-        for i in range(2 * self.num_players):
-            self.players[i % self.num_players].hand.append(self.dealer.deal_card())
+        for i in range(self.num_players):
+            for _ in range(2):  # Each player gets 2 cards
+                # Pass player_id to deal_card if the dealer supports it
+                if hasattr(self.dealer, 'deal_card') and 'player_id' in self.dealer.deal_card.__code__.co_varnames:
+                    card = self.dealer.deal_card(player_id=i)
+                else:
+                    card = self.dealer.deal_card()
+                self.players[i].hand.append(card)
 
         # Initialize public cards
         self.public_cards = []
@@ -349,3 +355,46 @@ def get_state(self, player_id):
             state['waiting_for_cards'] = False
             
         return state
+
+    def get_payoffs(self):
+        """
+        Return the payoffs of the game
+
+        Returns:
+            (list): Each entry corresponds to the payoff of one player
+        """
+        hands = [p.hand + self.public_cards if p.status in (PlayerStatus.ALIVE, PlayerStatus.ALLIN) else None for p in self.players]
+        chips_payoffs = self.judger.judge_game(self.players, hands)
+        return chips_payoffs
+
+    def get_num_players(self):
+        """
+        Return the number of players in no limit texas holdem
+
+        Returns:
+            (int): The number of players in the game
+        """
+        return self.num_players
+
+    def step_back(self):
+        """
+        Return to the previous state of the game
+
+        Returns:
+            (bool): True if the game steps back successfully
+        """
+        if len(self.history) > 0:
+            self.round, self.game_pointer, self.round_counter, self.dealer, self.public_cards, self.players = self.history.pop()
+            self.stage = Stage(self.round_counter)
+            return True
+        return False
+
+    @staticmethod
+    def get_num_actions():
+        """
+        Return the number of applicable actions
+
+        Returns:
+            (int): The number of actions. There are 6 actions (call, raise_half_pot, raise_pot, all_in, check and fold)
+        """
+        return len(Action)

From 111ff4bcb237f6b334fd61ed4e7a8a35f9f1405c Mon Sep 17 00:00:00 2001
From: qialex <qialex.i@gmail.com>
Date: Mon, 21 Apr 2025 22:36:01 +0200
Subject: [PATCH 12/13] rules update

---
 rlcard/games/fixednolimitholdem/round.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/rlcard/games/fixednolimitholdem/round.py b/rlcard/games/fixednolimitholdem/round.py
index 45189c090..c07fca4de 100644
--- a/rlcard/games/fixednolimitholdem/round.py
+++ b/rlcard/games/fixednolimitholdem/round.py
@@ -178,6 +178,12 @@ def is_over(self):
         Returns:
             (boolean): True if the current round is over
         """
+        # If all players are either all-in or folded, the round is over
+        if self.not_playing_num >= self.num_players:
+            return True
+        
+        # If all players have checked or called, the round is over
         if self.not_raise_num + self.not_playing_num >= self.num_players:
             return True
+        
         return False

From 1910e58d221a54f7fa16c44bea338d616444b91c Mon Sep 17 00:00:00 2001
From: qialex <qialex.i@gmail.com>
Date: Mon, 21 Apr 2025 22:37:09 +0200
Subject: [PATCH 13/13] rules update

---
 rlcard/games/fixednolimitholdem/game.py | 32 +++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/rlcard/games/fixednolimitholdem/game.py b/rlcard/games/fixednolimitholdem/game.py
index 20548a20e..d7d0c056d 100644
--- a/rlcard/games/fixednolimitholdem/game.py
+++ b/rlcard/games/fixednolimitholdem/game.py
@@ -221,6 +221,11 @@ def get_legal_actions(self):
         # If we're waiting for manual cards, no betting actions are allowed
         if self.stage in (Stage.WAITING_FOR_FLOP, Stage.WAITING_FOR_TURN, Stage.WAITING_FOR_RIVER):
             return []  # No betting actions allowed while waiting for cards
+        
+        # Check if all players are either folded or all-in
+        players_in_bypass = [1 if player.status in (PlayerStatus.FOLDED, PlayerStatus.ALLIN) else 0 for player in self.players]
+        if sum(players_in_bypass) == self.num_players:
+            return []  # No actions allowed when all players are all-in or folded
             
         return self.round.get_nolimit_legal_actions(players=self.players)
 
@@ -266,6 +271,33 @@ def step(self, action):
                 # If the last player has put enough chips, he is also bypassed
                 players_in_bypass[last_player] = 1
 
+        # Check if all players are either folded or all-in
+        if sum(players_in_bypass) == self.num_players:
+            # Skip all betting rounds and deal all remaining community cards
+            if self.stage == Stage.PREFLOP:
+                # Deal flop
+                self._deal_flop()
+                # Deal turn
+                self._deal_turn()
+                # Deal river
+                self._deal_river()
+                self.stage = Stage.SHOWDOWN
+                self.round_counter = 3  # Set to end of river
+            elif self.stage == Stage.FLOP:
+                # Deal turn
+                self._deal_turn()
+                # Deal river
+                self._deal_river()
+                self.stage = Stage.SHOWDOWN
+                self.round_counter = 3  # Set to end of river
+            elif self.stage == Stage.TURN:
+                # Deal river
+                self._deal_river()
+                self.stage = Stage.SHOWDOWN
+                self.round_counter = 3  # Set to end of river
+            elif self.stage == Stage.RIVER:
+                self.stage = Stage.SHOWDOWN
+
         # If a round is over, we deal more public cards
         if self.round.is_over():
             # Game pointer goes to the first player not in bypass after the dealer, if there is one