diff --git a/lm_eval/models/openai_completions.py b/lm_eval/models/openai_completions.py index d89f63d31e..96aa7ea0f0 100644 --- a/lm_eval/models/openai_completions.py +++ b/lm_eval/models/openai_completions.py @@ -95,7 +95,15 @@ def parse_generations(outputs: Union[Dict, List[Dict]], **kwargs) -> List[str]: for out in outputs: tmp = [None] * len(out["choices"]) for choices in out["choices"]: - tmp[choices["index"]] = choices["text"] + x = choices["text"] + content = x if x is not None else "" + if not content: + eval_logger.warning( + f"Received empty response for choice {choices['index']}. " + "This can happen when using reasoning models if the model spends the entire token budget on reasoning. " + "Consider increasing the number of allowed tokens." + ) + tmp[choices["index"]] = content res = res + tmp return res @@ -167,7 +175,15 @@ def parse_generations(outputs: Union[Dict, List[Dict]], **kwargs) -> List[str]: for out in outputs: tmp = [None] * len(out["choices"]) for choices in out["choices"]: - tmp[choices["index"]] = choices["message"]["content"] + x = choices["message"]["content"] + content = x if x is not None else "" + if not content: + eval_logger.warning( + f"Received empty response for choice {choices['index']}. " + "This can happen when using reasoning models if the model spends the entire token budget on reasoning. " + "Consider increasing the number of allowed tokens." + ) + tmp[choices["index"]] = content res = res + tmp return res diff --git a/tests/models/test_api.py b/tests/models/test_api.py index 2db2237747..4ae368d83c 100644 --- a/tests/models/test_api.py +++ b/tests/models/test_api.py @@ -3,6 +3,7 @@ import pytest +from lm_eval.api.instance import Instance from lm_eval.models.openai_completions import LocalCompletionsAPI @@ -161,6 +162,29 @@ def test_model_tokenized_call_usage( assert result == {"result": "success"} +def test_generate_until_with_null_message_content(api): + with patch("requests.post") as mock_post: + mock_response = MagicMock() + mock_response.json.return_value = { + "choices": [ + { + "index": 0, + "text": None, + } + ] + } + mock_response.ok = True + mock_post.return_value = mock_response + request = Instance( + request_type="generate_until", + doc={}, + arguments=("Test prompt", {"max_gen_toks": 10}), + idx=0, + ) + + _ = api.generate_until([request]) + + class DummyAsyncContextManager: def __init__(self, result): self.result = result