Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion models/gemini/manifest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ resource:
tool:
enabled: true
type: plugin
version: 0.4.0
version: 0.4.1
96 changes: 74 additions & 22 deletions models/gemini/models/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,48 @@ def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]
],
}

@staticmethod
def _calculate_tokens_from_usage_metadata(
usage_metadata: types.GenerateContentResponseUsageMetadata | None,
) -> tuple[int, int]:
"""
Calculate prompt and completion tokens from usage metadata.

:param usage_metadata: Usage metadata from Gemini response
:return: Tuple of (prompt_tokens, completion_tokens)
"""
if not usage_metadata:
return 0, 0

# The pricing of tokens varies depending on the input modality.
prompt_tokens_standard = 0

# [ Pricing ]
# https://ai.google.dev/gemini-api/docs/pricing?hl=zh-cn#gemini-2.5-pro
# FIXME: Currently, Dify's pricing model cannot cover the tokens of multimodal resources
# FIXME: Unable to track caching, Grounding, Live API
for _mtc in usage_metadata.prompt_tokens_details:
if _mtc.modality in [
types.MediaModality.TEXT,
types.MediaModality.IMAGE,
types.MediaModality.VIDEO,
types.MediaModality.MODALITY_UNSPECIFIED,
types.MediaModality.AUDIO,
types.MediaModality.DOCUMENT,
]:
prompt_tokens_standard += _mtc.token_count

# Number of tokens present in thoughts output.
thoughts_token_count = usage_metadata.thoughts_token_count or 0
# Number of tokens in the response(s).
candidates_token_count = usage_metadata.candidates_token_count or 0
# The reasoning content and final answer of the Gemini model are priced using the same standard.
completion_tokens = thoughts_token_count + candidates_token_count
# The `prompt_tokens` includes the historical conversation QA plus the current input.
prompt_tokens = prompt_tokens_standard

return prompt_tokens, completion_tokens

def validate_credentials(self, model: str, credentials: dict) -> None:
"""
Validate model credentials
Expand Down Expand Up @@ -500,33 +542,32 @@ def _handle_generate_response(
assistant_prompt_message = AssistantPromptMessage(content=response.text)

# calculate num tokens
if response.usage_metadata:
prompt_tokens = response.usage_metadata.prompt_token_count
if prompt_tokens is None:
raise ValueError("prompt_tokens is None")
completion_tokens = response.usage_metadata.candidates_token_count
if completion_tokens is None:
raise ValueError("completion_tokens is None")
else:
prompt_tokens, completion_tokens = self._calculate_tokens_from_usage_metadata(
response.usage_metadata
)

# Fallback to manual calculation if tokens are not available
if prompt_tokens == 0 or completion_tokens == 0:
prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages)
completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message])

# transform usage
# copy credentials to avoid modifying the original dict
usage = self._calc_response_usage(
model, dict(credentials), prompt_tokens, completion_tokens
model=model,
credentials=dict(credentials),
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
)

# transform response
result = LLMResult(
return LLMResult(
model=model,
prompt_messages=prompt_messages,
message=assistant_prompt_message,
usage=usage,
)

return result

def _handle_generate_stream_response(
self,
model: str,
Expand All @@ -537,6 +578,20 @@ def _handle_generate_stream_response(
"""
Handle llm stream response

# -- Usage Sample -- #
chunk.usage_metadata=GenerateContentResponseUsageMetadata(
candidates_token_count=58,
prompt_token_count=24,
prompt_tokens_details=[
ModalityTokenCount(
modality=<MediaModality.TEXT: 'TEXT'>,
token_count=24
),
],
thoughts_token_count=862,
total_token_count=944
)

:param model: model name
:param credentials: credentials
:param response: response
Expand All @@ -547,6 +602,7 @@ def _handle_generate_stream_response(
prompt_tokens = 0
completion_tokens = 0
self.is_thinking = False

for chunk in response:
if not chunk.candidates:
continue
Expand All @@ -557,10 +613,6 @@ def _handle_generate_stream_response(
message = self._parse_parts(candidate.content.parts)
index += len(candidate.content.parts)

# TODO(QIN2DIM): Fix the issue of inaccurate counting of Gemini Tokens
if chunk.usage_metadata:
completion_tokens += chunk.usage_metadata.candidates_token_count or 0

# if the stream is not finished, yield the chunk
if not candidate.finish_reason:
yield LLMResultChunk(
Expand All @@ -573,12 +625,12 @@ def _handle_generate_stream_response(
# If we're still in thinking mode at the end, close it
if self.is_thinking:
message.content.append(TextPromptMessageContent(data="\n\n</think>"))
if chunk.usage_metadata:
prompt_tokens = chunk.usage_metadata.prompt_token_count or 0
if chunk.usage_metadata.thoughts_token_count:
completion_tokens = (
completion_tokens + chunk.usage_metadata.thoughts_token_count
)

prompt_tokens, completion_tokens = self._calculate_tokens_from_usage_metadata(
chunk.usage_metadata
)

# Fallback to manual calculation if tokens are not available
if prompt_tokens == 0 or completion_tokens == 0:
prompt_tokens = self.get_num_tokens(
model=model, credentials=credentials, prompt_messages=prompt_messages
Expand Down
2 changes: 1 addition & 1 deletion models/gemini/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ requires-python = ">=3.12"
# uv pip compile pyproject.toml -o ./requirements.txt
dependencies = [
"dify-plugin>=0.4.3,<0.5.0",
"google-genai>=1.27.0,<2.0.0",
"google-genai>=1.29.0,<2.0.0",
"google-generativeai>=0.8.5",
"numpy>=2.3.2",
]
Expand Down
2 changes: 1 addition & 1 deletion models/gemini/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ google-auth==2.40.3
# google-generativeai
google-auth-httplib2==0.2.0
# via google-api-python-client
google-genai==1.27.0
google-genai==1.29.0
# via gemini-g9cie8 (pyproject.toml)
google-generativeai==0.8.5
# via gemini-g9cie8 (pyproject.toml)
Expand Down