fix merge issues

TJ5 · TJ5 · commit 9f704a4ca418 · 2025-08-05T12:52:35.000-07:00
diff --git a/genai_bench/sampling/text.py b/genai_bench/sampling/text.py
@@ -210,7 +210,8 @@ def _get_current_prefix(self, prefix_length: int) -> str:
         # Get the difference in length between the existing
         # prefix and the desired prefix length
 
-        current_prefix_length = self.get_token_length(current_prefix)
+        current_prefix_tokens = self.tokenizer.encode(current_prefix)
+        current_prefix_length = len(current_prefix_tokens)
         prefix_length_diff: int = prefix_length - current_prefix_length
 
         # Generate the prefix if it hasn't been created yet, or add
@@ -221,8 +222,9 @@ def _get_current_prefix(self, prefix_length: int) -> str:
 
         elif prefix_length_diff < 0:
             # If the prefix is longer than needed, truncate it
-            char_to_token_ratio = len(current_prefix) / current_prefix_length
-            current_prefix = self.prefix[: int(prefix_length * char_to_token_ratio)]
+            current_prefix = self.tokenizer.decode(
+                current_prefix_tokens[:prefix_length]
+            )
         return current_prefix
 
     def _sample_text(self, num_input_tokens: int) -> str:
@@ -259,10 +261,12 @@ def _sample_text(self, num_input_tokens: int) -> str:
 
         # Prepend the prefix to all prompts with a randomly picked 4 digits
         prompt = f"{current_prefix}{random.randint(1000,9999)}"
-        left_tokens_to_sample = num_input_tokens - self.get_token_length(prompt)
+
+        prompt_tokens = self.tokenizer.encode(prompt)
+        left_tokens_to_sample = num_input_tokens - len(prompt_tokens)
 
         if left_tokens_to_sample < 0:
-            return prompt[: self.get_token_length(prompt) + left_tokens_to_sample]
+            return self.tokenizer.decode(prompt_tokens[:num_input_tokens])
         while left_tokens_to_sample > 0:
             random.shuffle(data_copy)
             for line in data_copy:
diff --git a/tests/sampling/test_text.py b/tests/sampling/test_text.py
@@ -235,6 +235,11 @@ def mock_encode(text):
             # Count actual tokens in result
             # Need to handle mixed content (original lines + decoded text)
             total_tokens = 0
+
+            # All prompts start with 4 numbers, which are 1 token
+            total_tokens += 1
+            result = result[4:]
+
             # Split by our test lines to count tokens properly
             remaining = result
             for line in self.test_data:
@@ -270,38 +275,6 @@ def test_sample_text_truncation(self):
 
         # Verify decode was called with truncated tokens
         self.tokenizer.decode.assert_called_with(line_tokens[:requested_tokens])
-    def test_sample_chat_prefix_request(self):
-        self.tokenizer.encode.side_effect = [
-            [1] * 0,
-            [1] * 11,
-            [1] * 14,
-            [1] * 11,
-            [1] * 11,
-            [1] * 11,
-            [1] * 11,
-        ]
-        scenario = NormalDistribution(
-            mean_input_tokens=20,
-            stddev_input_tokens=0,
-            mean_output_tokens=20,
-            stddev_output_tokens=0,
-        )
-        prefix_sampler = TextSampler(
-            tokenizer=self.tokenizer,
-            model=self.model,
-            output_modality=self.output_modality,
-            data=self.test_data,
-            use_scenario=True,
-            prompt_prefix_ratio=0.5,  # Set a prefix ratio for testing
-        )
-        result = prefix_sampler.sample(scenario)
-        self.assertIsInstance(result, UserChatRequest)
-        self.assertEqual(result.model, self.model)
-        self.assertTrue(isinstance(result.prompt, str))
-        self.assertGreater(len(result.prompt), 0)
-        # The prompt should start with the generated prefix and a 4-digit number
-        self.assertTrue(result.prompt.startswith(prefix_sampler.prefix))
-        self.assertEqual(len(result.prompt), 20)
 
     def test_sample_chat_prefix_ratio_request(self):
         """Test prefix generation using ratio."""
@@ -343,7 +316,21 @@ def mock_decode(tokens):
         self.assertEqual(len(result.prompt), 20)
 
     def test_short_prompt_request(self):
-        self.tokenizer.encode.return_value = [1] * 10
+        """Test that short prompts are handled correctly."""
+
+        def mock_encode(text, add_special_tokens=False):
+            return [1] * len(text)
+
+        self.tokenizer.encode = mock_encode
+
+        # Mock decode to return the original text
+        def mock_decode(tokens):
+            if isinstance(tokens, list):
+                return "a" * len(tokens)  # Return 'a' repeated for the token count
+            return "decoded_text"
+
+        self.tokenizer.decode = mock_decode
+
         self.sampler.data = ["2"]
 
         # Scenario asks for only 1 input token