Fix: Tongyi Qianwen model temporary file leak issue. (#2076)

WzzP · e-zhenwei.wang1 · gemini-code-assist[bot] · web-flow · commit edf201e349f8 · 2025-11-20T15:49:16.000+08:00
* 修复: 通义千问模型临时文件泄漏问题

- 添加临时文件追踪机制，在类初始化时创建 _temp_files 列表
- 在 _save_base64_to_file 方法中记录创建的临时文件
- 添加 _cleanup_temp_files 方法用于清理临时文件
- 在 _handle_generate_response 和 _handle_generate_stream_response 中使用 try-finally 确保临时文件被清理
- 修复 _upload_file_to_tongyi 方法中的临时文件泄漏问题

这个修复解决了处理 base64 编码的图片/视频和文档上传时临时文件累积的问题，避免长期运行服务时磁盘空间被占用。

* Update models/tongyi/models/llm/llm.py

Co-authored-by: gemini-code-assist[bot] &lt;176961590+gemini-code-assist[bot]@users.noreply.github.com&gt;

* 改进: 添加日志记录和清理列表

- 添加 logging 模块导入和 logger 实例
- 在 _cleanup_temp_files 方法中添加日志记录，记录删除失败的文件
- 在 _upload_file_to_tongyi 的 finally 块中添加日志记录
- 在 _cleanup_temp_files 结束时清空 _temp_files 列表

这些改进解决了 code review 中提到的异常被静默忽略的问题，便于调试和监控。

* add：更新版本号

* fix

---------

Co-authored-by: e-zhenwei.wang1 &lt;e-zhenwei.wang1@geely.com&gt;
Co-authored-by: gemini-code-assist[bot] &lt;176961590+gemini-code-assist[bot]@users.noreply.github.com&gt;
Co-authored-by: crazywoola &lt;100913391+crazywoola@users.noreply.github.com&gt;
diff --git a/models/tongyi/manifest.yaml b/models/tongyi/manifest.yaml
@@ -25,5 +25,5 @@ resource:
     model:
       enabled: false
 type: plugin
-version: 0.1.1
+version: 0.1.3
 created_at: "2024-09-20T00:13:50.29298939-04:00"
diff --git a/models/tongyi/models/llm/llm.py b/models/tongyi/models/llm/llm.py
@@ -1,4 +1,5 @@
 import base64
+import logging
 import os
 import tempfile
 import uuid
@@ -60,10 +61,16 @@
 from dify_plugin.interfaces.model.large_language_model import LargeLanguageModel
 from openai import OpenAI
 
+logger = logging.getLogger(__name__)
+
 
 class TongyiLargeLanguageModel(LargeLanguageModel):
     tokenizers = {}
 
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._temp_files = []
+
     def _invoke(
         self,
         model: str,
@@ -266,26 +273,29 @@ def _handle_generate_response(
         :param prompt_messages: prompt messages
         :return: llm response
         """
-        if response.status_code not in {200, HTTPStatus.OK}:
-            self._handle_error_response(response.status_code, response.message)
-        resp_content = response.output.choices[0].message.content
-        # special for qwen-vl
-        if isinstance(resp_content, list):
-            resp_content = resp_content[0]["text"]
-        assistant_prompt_message = AssistantPromptMessage(content=resp_content)
-        usage = self._calc_response_usage(
-            model,
-            credentials,
-            response.usage.input_tokens,
-            response.usage.output_tokens,
-        )
-        result = LLMResult(
-            model=model,
-            message=assistant_prompt_message,
-            prompt_messages=prompt_messages,
-            usage=usage,
-        )
-        return result
+        try:
+            if response.status_code not in {200, HTTPStatus.OK}:
+                self._handle_error_response(response.status_code, response.message)
+            resp_content = response.output.choices[0].message.content
+            # special for qwen-vl
+            if isinstance(resp_content, list):
+                resp_content = resp_content[0]["text"]
+            assistant_prompt_message = AssistantPromptMessage(content=resp_content)
+            usage = self._calc_response_usage(
+                model,
+                credentials,
+                response.usage.input_tokens,
+                response.usage.output_tokens,
+            )
+            result = LLMResult(
+                model=model,
+                message=assistant_prompt_message,
+                prompt_messages=prompt_messages,
+                usage=usage,
+            )
+            return result
+        finally:
+            self._cleanup_temp_files()
 
     def _handle_tool_call_stream(self, response, tool_calls, incremental_output):
         tool_calls_stream = response.output.choices[0].message["tool_calls"]
@@ -331,83 +341,86 @@ def _handle_generate_stream_response(
         # This is used to handle unincremental output correctly
         full_text = ""
         tool_calls = []
-        for index, response in enumerate(responses):
-            if response.status_code not in {200, HTTPStatus.OK}:
-                self._handle_error_response(response.status_code, response.message, model)
-            resp_finish_reason = response.output.choices[0].finish_reason
-            if resp_finish_reason is not None and resp_finish_reason != "null":
-                resp_content = response.output.choices[0].message.content
-                assistant_prompt_message = AssistantPromptMessage(content="")
-                if "tool_calls" in response.output.choices[0].message:
-                    self._handle_tool_call_stream(response, tool_calls, incremental_output)
-                elif resp_content:
-                    if isinstance(resp_content, list):
-                        resp_content = resp_content[0]["text"]
-                    if incremental_output:
-                        assistant_prompt_message.content = resp_content
-                        full_text += resp_content
-                    else:
-                        assistant_prompt_message.content = resp_content.replace(
-                            full_text, "", 1
-                        )
-                        full_text = resp_content
-                elif is_reasoning:
-                    assistant_prompt_message.content = "\n</think>"
-                    full_text += "\n</think>"
-                if tool_calls:
-                    message_tool_calls = []
-                    for tool_call_obj in tool_calls:
-                        message_tool_call = AssistantPromptMessage.ToolCall(
-                            id=tool_call_obj["function"]["name"],
-                            type="function",
-                            function=AssistantPromptMessage.ToolCall.ToolCallFunction(
-                                name=tool_call_obj["function"]["name"],
-                                arguments=tool_call_obj["function"]["arguments"],
-                            ),
-                        )
-                        message_tool_calls.append(message_tool_call)
-                    assistant_prompt_message.tool_calls = message_tool_calls
-                usage = response.usage
-                usage = self._calc_response_usage(
-                    model, credentials, usage.input_tokens, usage.output_tokens
-                )
-                yield LLMResultChunk(
-                    model=model,
-                    prompt_messages=prompt_messages,
-                    delta=LLMResultChunkDelta(
-                        index=index,
-                        message=assistant_prompt_message,
-                        finish_reason=resp_finish_reason,
-                        usage=usage,
-                    ),
-                )
-            else:
-                message = response.output.choices[0].message
-
-                resp_content, is_reasoning = self._wrap_thinking_by_reasoning_content(
-                    message, is_reasoning
-                )
-                if not resp_content:
+        try:
+            for index, response in enumerate(responses):
+                if response.status_code not in {200, HTTPStatus.OK}:
+                    self._handle_error_response(response.status_code, response.message, model)
+                resp_finish_reason = response.output.choices[0].finish_reason
+                if resp_finish_reason is not None and resp_finish_reason != "null":
+                    resp_content = response.output.choices[0].message.content
+                    assistant_prompt_message = AssistantPromptMessage(content="")
                     if "tool_calls" in response.output.choices[0].message:
                         self._handle_tool_call_stream(response, tool_calls, incremental_output)
-                    continue
-                if incremental_output:
-                    delta = resp_content
-                    full_text += delta
+                    elif resp_content:
+                        if isinstance(resp_content, list):
+                            resp_content = resp_content[0]["text"]
+                        if incremental_output:
+                            assistant_prompt_message.content = resp_content
+                            full_text += resp_content
+                        else:
+                            assistant_prompt_message.content = resp_content.replace(
+                                full_text, "", 1
+                            )
+                            full_text = resp_content
+                    elif is_reasoning:
+                        assistant_prompt_message.content = "\n</think>"
+                        full_text += "\n</think>"
+                    if tool_calls:
+                        message_tool_calls = []
+                        for tool_call_obj in tool_calls:
+                            message_tool_call = AssistantPromptMessage.ToolCall(
+                                id=tool_call_obj["function"]["name"],
+                                type="function",
+                                function=AssistantPromptMessage.ToolCall.ToolCallFunction(
+                                    name=tool_call_obj["function"]["name"],
+                                    arguments=tool_call_obj["function"]["arguments"],
+                                ),
+                            )
+                            message_tool_calls.append(message_tool_call)
+                        assistant_prompt_message.tool_calls = message_tool_calls
+                    usage = response.usage
+                    usage = self._calc_response_usage(
+                        model, credentials, usage.input_tokens, usage.output_tokens
+                    )
+                    yield LLMResultChunk(
+                        model=model,
+                        prompt_messages=prompt_messages,
+                        delta=LLMResultChunkDelta(
+                            index=index,
+                            message=assistant_prompt_message,
+                            finish_reason=resp_finish_reason,
+                            usage=usage,
+                        ),
+                    )
                 else:
-                    delta = resp_content.replace(full_text, "", 1)
-                    full_text = resp_content
+                    message = response.output.choices[0].message
 
-                assistant_prompt_message = AssistantPromptMessage(
-                    content=delta
-                )
-                yield LLMResultChunk(
-                    model=model,
-                    prompt_messages=prompt_messages,
-                    delta=LLMResultChunkDelta(
-                        index=index, message=assistant_prompt_message
-                    ),
-                )
+                    resp_content, is_reasoning = self._wrap_thinking_by_reasoning_content(
+                        message, is_reasoning
+                    )
+                    if not resp_content:
+                        if "tool_calls" in response.output.choices[0].message:
+                            self._handle_tool_call_stream(response, tool_calls, incremental_output)
+                        continue
+                    if incremental_output:
+                        delta = resp_content
+                        full_text += delta
+                    else:
+                        delta = resp_content.replace(full_text, "", 1)
+                        full_text = resp_content
+
+                    assistant_prompt_message = AssistantPromptMessage(
+                        content=delta
+                    )
+                    yield LLMResultChunk(
+                        model=model,
+                        prompt_messages=prompt_messages,
+                        delta=LLMResultChunkDelta(
+                            index=index, message=assistant_prompt_message
+                        ),
+                    )
+        finally:
+            self._cleanup_temp_files()
 
     def _to_credential_kwargs(self, credentials: dict) -> dict:
         """
@@ -582,8 +595,19 @@ def _save_base64_to_file(self, base64_data: str) -> str:
         temp_dir = tempfile.gettempdir()
         file_path = os.path.join(temp_dir, f"{uuid.uuid4()}.{mime_type.split('/')[1]}")
         Path(file_path).write_bytes(base64.b64decode(encoded_string))
+        self._temp_files.append(file_path)
         return f"file://{file_path}"
 
+    def _cleanup_temp_files(self):
+        """Clean up temporary files"""
+        for file_path in self._temp_files:
+            try:
+                if os.path.exists(file_path):
+                    os.remove(file_path)
+            except Exception as e:
+                logger.warning(f"Failed to remove temporary file {file_path}: {e}")
+        self._temp_files.clear()
+
     def _upload_file_to_tongyi(
         self, credentials: dict, message_content: DocumentPromptMessageContent
     ) -> str:
@@ -603,22 +627,33 @@ def _upload_file_to_tongyi(
                 api_key=credentials.dashscope_api_key,
                 base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
             )
-        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
-            if message_content.base64_data:
-                file_content = base64.b64decode(message_content.base64_data)
-                temp_file.write(file_content)
-            else:
+        temp_file_path = None
+        try:
+            with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+                temp_file_path = temp_file.name
+                if message_content.base64_data:
+                    file_content = base64.b64decode(message_content.base64_data)
+                    temp_file.write(file_content)
+                else:
+                    try:
+                        response = requests.get(message_content.url, timeout=60)
+                        response.raise_for_status()
+                        temp_file.write(response.content)
+                    except Exception as ex:
+                        raise ValueError(
+                            f"Failed to fetch data from url {message_content.url}, {ex}"
+                        ) from ex
+                temp_file.flush()
+                temp_file.seek(0)
+                response = client.files.create(file=temp_file, purpose="file-extract")
+                return response.id
+        finally:
+            # Clean up temporary file after upload
+            if temp_file_path and os.path.exists(temp_file_path):
                 try:
-                    response = requests.get(message_content.url, timeout=60)
-                    response.raise_for_status()
-                    temp_file.write(response.content)
-                except Exception as ex:
-                    raise ValueError(
-                        f"Failed to fetch data from url {message_content.url}, {ex}"
-                    ) from ex
-            temp_file.flush()
-        response = client.files.create(file=temp_file, purpose="file-extract")
-        return response.id
+                    os.remove(temp_file_path)
+                except Exception as e:
+                    logger.warning(f"Failed to remove temporary file {temp_file_path}: {e}")
 
     def _convert_tools(self, tools: list[PromptMessageTool]) -> list[dict]:
         """