Skip to content

Commit 771333f

Browse files
committed
新增:查询数据库的重试次数配置
1 parent d3745bd commit 771333f

File tree

4 files changed

+38
-48
lines changed

4 files changed

+38
-48
lines changed

ragflows/configs.demo.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131
# 切片进度查询间隔时间(秒)
3232
PROGRESS_CHECK_INTERVAL = 1
3333

34+
# 查数据库重试次数(单次重试间隔为1秒)
35+
SQL_RETRIES = 1
36+
3437

3538
def get_header():
3639
return {'authorization': AUTHORIZATION}

ragflows/ragflowdb.py

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from ragflows import configs
88
from utils.mysqlutils import BaseMySql
99
from utils import timeutils
10+
import time
1011

1112

1213
rag_db = None
@@ -17,8 +18,8 @@ def reset_connection():
1718
if rag_db:
1819
try:
1920
rag_db.close_connect()
20-
except:
21-
pass
21+
except Exception as e:
22+
timeutils.print_log(f'reset_connection error: {e}')
2223
rag_db = None
2324

2425
def get_db():
@@ -47,17 +48,28 @@ def get_doc_item(doc_id):
4748
results = db.query_list(sql)
4849
return results[0] if results else None
4950

50-
# @timeutils.monitor
51-
def get_doc_item_by_name(name):
52-
db = get_db()
53-
kb_id = configs.DIFY_DOC_KB_ID
54-
if kb_id:
55-
# 这里同时查询kb_id和name,如果document表中的数据量很大,需要增加kb_id和name的组合索引:CREATE INDEX document_kb_id_name ON document(kb_id, name);
56-
sql = f"select id,name,progress from document where kb_id = '{kb_id}' and name = '{name}'"
57-
else:
58-
sql = f"select id,name,progress from document where name = '{name}'"
59-
results = db.query_list(sql)
60-
return results[0] if results else None
51+
def get_doc_item_by_name(name, max_retries=configs.SQL_RETRIES, retry_interval=1):
52+
"""
53+
根据文档名称获取文档信息,支持重试机制
54+
:param name: 文档名称
55+
:param max_retries: 最大重试次数
56+
:param retry_interval: 重试间隔(秒)
57+
:return: 文档信息或None
58+
"""
59+
for attempt in range(max_retries):
60+
db = get_db()
61+
kb_id = configs.DIFY_DOC_KB_ID
62+
if kb_id:
63+
sql = f"select id,name,progress from document where kb_id = '{kb_id}' and name = '{name}'"
64+
else:
65+
sql = f"select id,name,progress from document where name = '{name}'"
66+
results = db.query_list(sql)
67+
if results:
68+
return results[0]
69+
if attempt < max_retries - 1: # 如果不是最后一次尝试
70+
timeutils.print_log(f"查询 {name} 无结果,第{attempt + 1}次重试...")
71+
time.sleep(retry_interval)
72+
return None
6173

6274
def exist(doc_id):
6375
return get_doc_item(doc_id) is not None

scripts/launcher.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,10 @@ def __init__(self):
7777
self.log_handlers = [] # 添加日志处理器列表
7878
self.original_print_log = None # 保存原始的日志打印函数
7979
self.title("RagFlow Upload")
80-
self.geometry("800x660")
80+
self.geometry("800x700")
8181

8282
# 版本和仓库信息
83-
self.version = "v1.0.2" # 版本号
83+
self.version = "v1.0.2-alpha" # 版本号
8484
self.github_repo = "https://github.com/Samge0/ragflow-upload" # GitHub仓库地址
8585

8686
# 自定义图标
@@ -99,6 +99,7 @@ def __init__(self):
9999
"DOC_DIR": {"type": str, "label": "文档目录", "default": "your doc dir"},
100100
"DOC_SUFFIX": {"type": str, "label": "文档后缀", "default": "md,txt,pdf,docx"},
101101
"PROGRESS_CHECK_INTERVAL": {"type": int, "label": "切片进度查询间隔", "default": "1"},
102+
"SQL_RETRIES": {"type": int, "label": "SQL查询重试次数", "default": "1"},
102103

103104
"MYSQL_HOST": {"type": str, "label": "MySQL主机", "default": "localhost"},
104105
"MYSQL_PORT": {"type": int, "label": "MySQL端口", "default": "5455"},

utils/mysqlutils.py

Lines changed: 7 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
import pymysql
77
import logging
88

9+
from utils import timeutils
10+
911
class BaseMySql(object):
1012

1113
conn = None
@@ -34,7 +36,7 @@ def __init__(self, host=None, user=None, password=None, database=None, port=None
3436
self.conn.commit()
3537

3638
except Exception as e:
37-
self.e(e)
39+
timeutils.print_log(f'连接数据库异常: {e}')
3840
pass
3941

4042
def query_list(self, sql: str) -> list:
@@ -51,7 +53,7 @@ def query_list(self, sql: str) -> list:
5153
columns = [col[0] for col in cur.description]
5254
return [dict(zip(columns, self.parse_encoding(row))) for row in cur.fetchall()]
5355
except Exception as e:
54-
self.e(e)
56+
timeutils.print_log(f'query_list 查询数据异常: {e}')
5557
return []
5658

5759
def execute(self, sql: str) -> bool:
@@ -67,7 +69,7 @@ def execute(self, sql: str) -> bool:
6769
self.conn.commit()
6870
return True
6971
except Exception as e:
70-
self.e(e)
72+
timeutils.print_log(f'execute 执行sql异常,sql = {sql}\n error: {e}')
7173
return False
7274

7375
def parse_encoding(self, row) -> list:
@@ -89,43 +91,15 @@ def close_connect(self) -> None:
8991
self.cursor.close()
9092
self.conn.close()
9193
self.child_close()
92-
self.i('释放数据库连接')
94+
timeutils.print_log(f'close_connect 已关闭数据库连接')
9395
except Exception as e:
94-
self.e(e)
96+
timeutils.print_log(f'close_connect 关闭数据库异常: {e}')
9597

9698
def child_close(self) -> None:
9799
"""
98100
提供给子类处理的关闭操作
99101
"""
100102
pass
101-
102-
def _need_update(self, spider) -> bool:
103-
"""
104-
判断该爬虫是否需要进行更新操作
105-
:param spider:
106-
:return:
107-
"""
108-
try:
109-
if not spider or not hasattr(spider, 'NEED_UPDATE'):
110-
return False
111-
self.i(f"是否需要进行更新 spider.NEED_UPDATE={spider.NEED_UPDATE}")
112-
return spider.NEED_UPDATE
113-
except:
114-
return False
115-
116-
def _get_update_field_list(self, spider) -> list:
117-
"""
118-
获取需要指定更新的字段
119-
:param spider:
120-
:return:
121-
"""
122-
try:
123-
if not spider or not hasattr(spider, 'UPDATE_FIELD_LIST'):
124-
return []
125-
self.i(f"指定更新字段 spider.UPDATE_FIELD_LIST={spider.UPDATE_FIELD_LIST}")
126-
return spider.UPDATE_FIELD_LIST
127-
except:
128-
return []
129103

130104
def i(self, msg):
131105
self.logger.info(msg)

0 commit comments

Comments
 (0)