diff --git a/python/packages/autogen-core/src/autogen_core/models/_types.py b/python/packages/autogen-core/src/autogen_core/models/_types.py index 6fd2e5c3534d..273ed5be225f 100644 --- a/python/packages/autogen-core/src/autogen_core/models/_types.py +++ b/python/packages/autogen-core/src/autogen_core/models/_types.py @@ -86,6 +86,7 @@ class FunctionExecutionResultMessage(BaseModel): class RequestUsage: prompt_tokens: int completion_tokens: int + cached_tokens: int FinishReasons = Literal["stop", "length", "function_calls", "content_filter", "unknown"] diff --git a/python/packages/autogen-ext/src/autogen_ext/models/anthropic/_anthropic_client.py b/python/packages/autogen-ext/src/autogen_ext/models/anthropic/_anthropic_client.py index 6f68cf7b8cbc..fef98eac8edc 100644 --- a/python/packages/autogen-ext/src/autogen_ext/models/anthropic/_anthropic_client.py +++ b/python/packages/autogen-ext/src/autogen_ext/models/anthropic/_anthropic_client.py @@ -685,6 +685,7 @@ async def create( usage = RequestUsage( prompt_tokens=result.usage.input_tokens, completion_tokens=result.usage.output_tokens, + cached_tokens=result.usage.cache_read_input_tokens or 0, ) serializable_messages: List[Dict[str, Any]] = [self._serialize_message(msg) for msg in anthropic_messages] diff --git a/python/packages/autogen-ext/src/autogen_ext/models/openai/_openai_client.py b/python/packages/autogen-ext/src/autogen_ext/models/openai/_openai_client.py index a80e912534ab..c73b5da36283 100644 --- a/python/packages/autogen-ext/src/autogen_ext/models/openai/_openai_client.py +++ b/python/packages/autogen-ext/src/autogen_ext/models/openai/_openai_client.py @@ -711,6 +711,7 @@ async def create( # TODO backup token counting prompt_tokens=getattr(result.usage, "prompt_tokens", 0) if result.usage is not None else 0, completion_tokens=getattr(result.usage, "completion_tokens", 0) if result.usage is not None else 0, + cached_tokens=getattr(result.usage.prompt_tokens_details, "cached_tokens", 0) if result.usage is not None else 0, ) logger.info(