Skip to content

Commit edf201e

Browse files
WzzPe-zhenwei.wang1gemini-code-assist[bot]crazywoola
authored
Fix: Tongyi Qianwen model temporary file leak issue. (#2076)
* 修复: 通义千问模型临时文件泄漏问题 - 添加临时文件追踪机制,在类初始化时创建 _temp_files 列表 - 在 _save_base64_to_file 方法中记录创建的临时文件 - 添加 _cleanup_temp_files 方法用于清理临时文件 - 在 _handle_generate_response 和 _handle_generate_stream_response 中使用 try-finally 确保临时文件被清理 - 修复 _upload_file_to_tongyi 方法中的临时文件泄漏问题 这个修复解决了处理 base64 编码的图片/视频和文档上传时临时文件累积的问题,避免长期运行服务时磁盘空间被占用。 * Update models/tongyi/models/llm/llm.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * 改进: 添加日志记录和清理列表 - 添加 logging 模块导入和 logger 实例 - 在 _cleanup_temp_files 方法中添加日志记录,记录删除失败的文件 - 在 _upload_file_to_tongyi 的 finally 块中添加日志记录 - 在 _cleanup_temp_files 结束时清空 _temp_files 列表 这些改进解决了 code review 中提到的异常被静默忽略的问题,便于调试和监控。 * add:更新版本号 * fix --------- Co-authored-by: e-zhenwei.wang1 <[email protected]> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: crazywoola <[email protected]>
1 parent 02105b9 commit edf201e

File tree

2 files changed

+144
-109
lines changed

2 files changed

+144
-109
lines changed

models/tongyi/manifest.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,5 @@ resource:
2525
model:
2626
enabled: false
2727
type: plugin
28-
version: 0.1.1
28+
version: 0.1.3
2929
created_at: "2024-09-20T00:13:50.29298939-04:00"

models/tongyi/models/llm/llm.py

Lines changed: 143 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import base64
2+
import logging
23
import os
34
import tempfile
45
import uuid
@@ -60,10 +61,16 @@
6061
from dify_plugin.interfaces.model.large_language_model import LargeLanguageModel
6162
from openai import OpenAI
6263

64+
logger = logging.getLogger(__name__)
65+
6366

6467
class TongyiLargeLanguageModel(LargeLanguageModel):
6568
tokenizers = {}
6669

70+
def __init__(self, *args, **kwargs):
71+
super().__init__(*args, **kwargs)
72+
self._temp_files = []
73+
6774
def _invoke(
6875
self,
6976
model: str,
@@ -266,26 +273,29 @@ def _handle_generate_response(
266273
:param prompt_messages: prompt messages
267274
:return: llm response
268275
"""
269-
if response.status_code not in {200, HTTPStatus.OK}:
270-
self._handle_error_response(response.status_code, response.message)
271-
resp_content = response.output.choices[0].message.content
272-
# special for qwen-vl
273-
if isinstance(resp_content, list):
274-
resp_content = resp_content[0]["text"]
275-
assistant_prompt_message = AssistantPromptMessage(content=resp_content)
276-
usage = self._calc_response_usage(
277-
model,
278-
credentials,
279-
response.usage.input_tokens,
280-
response.usage.output_tokens,
281-
)
282-
result = LLMResult(
283-
model=model,
284-
message=assistant_prompt_message,
285-
prompt_messages=prompt_messages,
286-
usage=usage,
287-
)
288-
return result
276+
try:
277+
if response.status_code not in {200, HTTPStatus.OK}:
278+
self._handle_error_response(response.status_code, response.message)
279+
resp_content = response.output.choices[0].message.content
280+
# special for qwen-vl
281+
if isinstance(resp_content, list):
282+
resp_content = resp_content[0]["text"]
283+
assistant_prompt_message = AssistantPromptMessage(content=resp_content)
284+
usage = self._calc_response_usage(
285+
model,
286+
credentials,
287+
response.usage.input_tokens,
288+
response.usage.output_tokens,
289+
)
290+
result = LLMResult(
291+
model=model,
292+
message=assistant_prompt_message,
293+
prompt_messages=prompt_messages,
294+
usage=usage,
295+
)
296+
return result
297+
finally:
298+
self._cleanup_temp_files()
289299

290300
def _handle_tool_call_stream(self, response, tool_calls, incremental_output):
291301
tool_calls_stream = response.output.choices[0].message["tool_calls"]
@@ -331,83 +341,86 @@ def _handle_generate_stream_response(
331341
# This is used to handle unincremental output correctly
332342
full_text = ""
333343
tool_calls = []
334-
for index, response in enumerate(responses):
335-
if response.status_code not in {200, HTTPStatus.OK}:
336-
self._handle_error_response(response.status_code, response.message, model)
337-
resp_finish_reason = response.output.choices[0].finish_reason
338-
if resp_finish_reason is not None and resp_finish_reason != "null":
339-
resp_content = response.output.choices[0].message.content
340-
assistant_prompt_message = AssistantPromptMessage(content="")
341-
if "tool_calls" in response.output.choices[0].message:
342-
self._handle_tool_call_stream(response, tool_calls, incremental_output)
343-
elif resp_content:
344-
if isinstance(resp_content, list):
345-
resp_content = resp_content[0]["text"]
346-
if incremental_output:
347-
assistant_prompt_message.content = resp_content
348-
full_text += resp_content
349-
else:
350-
assistant_prompt_message.content = resp_content.replace(
351-
full_text, "", 1
352-
)
353-
full_text = resp_content
354-
elif is_reasoning:
355-
assistant_prompt_message.content = "\n</think>"
356-
full_text += "\n</think>"
357-
if tool_calls:
358-
message_tool_calls = []
359-
for tool_call_obj in tool_calls:
360-
message_tool_call = AssistantPromptMessage.ToolCall(
361-
id=tool_call_obj["function"]["name"],
362-
type="function",
363-
function=AssistantPromptMessage.ToolCall.ToolCallFunction(
364-
name=tool_call_obj["function"]["name"],
365-
arguments=tool_call_obj["function"]["arguments"],
366-
),
367-
)
368-
message_tool_calls.append(message_tool_call)
369-
assistant_prompt_message.tool_calls = message_tool_calls
370-
usage = response.usage
371-
usage = self._calc_response_usage(
372-
model, credentials, usage.input_tokens, usage.output_tokens
373-
)
374-
yield LLMResultChunk(
375-
model=model,
376-
prompt_messages=prompt_messages,
377-
delta=LLMResultChunkDelta(
378-
index=index,
379-
message=assistant_prompt_message,
380-
finish_reason=resp_finish_reason,
381-
usage=usage,
382-
),
383-
)
384-
else:
385-
message = response.output.choices[0].message
386-
387-
resp_content, is_reasoning = self._wrap_thinking_by_reasoning_content(
388-
message, is_reasoning
389-
)
390-
if not resp_content:
344+
try:
345+
for index, response in enumerate(responses):
346+
if response.status_code not in {200, HTTPStatus.OK}:
347+
self._handle_error_response(response.status_code, response.message, model)
348+
resp_finish_reason = response.output.choices[0].finish_reason
349+
if resp_finish_reason is not None and resp_finish_reason != "null":
350+
resp_content = response.output.choices[0].message.content
351+
assistant_prompt_message = AssistantPromptMessage(content="")
391352
if "tool_calls" in response.output.choices[0].message:
392353
self._handle_tool_call_stream(response, tool_calls, incremental_output)
393-
continue
394-
if incremental_output:
395-
delta = resp_content
396-
full_text += delta
354+
elif resp_content:
355+
if isinstance(resp_content, list):
356+
resp_content = resp_content[0]["text"]
357+
if incremental_output:
358+
assistant_prompt_message.content = resp_content
359+
full_text += resp_content
360+
else:
361+
assistant_prompt_message.content = resp_content.replace(
362+
full_text, "", 1
363+
)
364+
full_text = resp_content
365+
elif is_reasoning:
366+
assistant_prompt_message.content = "\n</think>"
367+
full_text += "\n</think>"
368+
if tool_calls:
369+
message_tool_calls = []
370+
for tool_call_obj in tool_calls:
371+
message_tool_call = AssistantPromptMessage.ToolCall(
372+
id=tool_call_obj["function"]["name"],
373+
type="function",
374+
function=AssistantPromptMessage.ToolCall.ToolCallFunction(
375+
name=tool_call_obj["function"]["name"],
376+
arguments=tool_call_obj["function"]["arguments"],
377+
),
378+
)
379+
message_tool_calls.append(message_tool_call)
380+
assistant_prompt_message.tool_calls = message_tool_calls
381+
usage = response.usage
382+
usage = self._calc_response_usage(
383+
model, credentials, usage.input_tokens, usage.output_tokens
384+
)
385+
yield LLMResultChunk(
386+
model=model,
387+
prompt_messages=prompt_messages,
388+
delta=LLMResultChunkDelta(
389+
index=index,
390+
message=assistant_prompt_message,
391+
finish_reason=resp_finish_reason,
392+
usage=usage,
393+
),
394+
)
397395
else:
398-
delta = resp_content.replace(full_text, "", 1)
399-
full_text = resp_content
396+
message = response.output.choices[0].message
400397

401-
assistant_prompt_message = AssistantPromptMessage(
402-
content=delta
403-
)
404-
yield LLMResultChunk(
405-
model=model,
406-
prompt_messages=prompt_messages,
407-
delta=LLMResultChunkDelta(
408-
index=index, message=assistant_prompt_message
409-
),
410-
)
398+
resp_content, is_reasoning = self._wrap_thinking_by_reasoning_content(
399+
message, is_reasoning
400+
)
401+
if not resp_content:
402+
if "tool_calls" in response.output.choices[0].message:
403+
self._handle_tool_call_stream(response, tool_calls, incremental_output)
404+
continue
405+
if incremental_output:
406+
delta = resp_content
407+
full_text += delta
408+
else:
409+
delta = resp_content.replace(full_text, "", 1)
410+
full_text = resp_content
411+
412+
assistant_prompt_message = AssistantPromptMessage(
413+
content=delta
414+
)
415+
yield LLMResultChunk(
416+
model=model,
417+
prompt_messages=prompt_messages,
418+
delta=LLMResultChunkDelta(
419+
index=index, message=assistant_prompt_message
420+
),
421+
)
422+
finally:
423+
self._cleanup_temp_files()
411424

412425
def _to_credential_kwargs(self, credentials: dict) -> dict:
413426
"""
@@ -582,8 +595,19 @@ def _save_base64_to_file(self, base64_data: str) -> str:
582595
temp_dir = tempfile.gettempdir()
583596
file_path = os.path.join(temp_dir, f"{uuid.uuid4()}.{mime_type.split('/')[1]}")
584597
Path(file_path).write_bytes(base64.b64decode(encoded_string))
598+
self._temp_files.append(file_path)
585599
return f"file://{file_path}"
586600

601+
def _cleanup_temp_files(self):
602+
"""Clean up temporary files"""
603+
for file_path in self._temp_files:
604+
try:
605+
if os.path.exists(file_path):
606+
os.remove(file_path)
607+
except Exception as e:
608+
logger.warning(f"Failed to remove temporary file {file_path}: {e}")
609+
self._temp_files.clear()
610+
587611
def _upload_file_to_tongyi(
588612
self, credentials: dict, message_content: DocumentPromptMessageContent
589613
) -> str:
@@ -603,22 +627,33 @@ def _upload_file_to_tongyi(
603627
api_key=credentials.dashscope_api_key,
604628
base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
605629
)
606-
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
607-
if message_content.base64_data:
608-
file_content = base64.b64decode(message_content.base64_data)
609-
temp_file.write(file_content)
610-
else:
630+
temp_file_path = None
631+
try:
632+
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
633+
temp_file_path = temp_file.name
634+
if message_content.base64_data:
635+
file_content = base64.b64decode(message_content.base64_data)
636+
temp_file.write(file_content)
637+
else:
638+
try:
639+
response = requests.get(message_content.url, timeout=60)
640+
response.raise_for_status()
641+
temp_file.write(response.content)
642+
except Exception as ex:
643+
raise ValueError(
644+
f"Failed to fetch data from url {message_content.url}, {ex}"
645+
) from ex
646+
temp_file.flush()
647+
temp_file.seek(0)
648+
response = client.files.create(file=temp_file, purpose="file-extract")
649+
return response.id
650+
finally:
651+
# Clean up temporary file after upload
652+
if temp_file_path and os.path.exists(temp_file_path):
611653
try:
612-
response = requests.get(message_content.url, timeout=60)
613-
response.raise_for_status()
614-
temp_file.write(response.content)
615-
except Exception as ex:
616-
raise ValueError(
617-
f"Failed to fetch data from url {message_content.url}, {ex}"
618-
) from ex
619-
temp_file.flush()
620-
response = client.files.create(file=temp_file, purpose="file-extract")
621-
return response.id
654+
os.remove(temp_file_path)
655+
except Exception as e:
656+
logger.warning(f"Failed to remove temporary file {temp_file_path}: {e}")
622657

623658
def _convert_tools(self, tools: list[PromptMessageTool]) -> list[dict]:
624659
"""

0 commit comments

Comments
 (0)