Skip to content

Commit e4629a5

Browse files
authored
fix: queue (#4485)
1 parent 2dc3cb7 commit e4629a5

File tree

6 files changed

+104
-80
lines changed

6 files changed

+104
-80
lines changed

docSite/content/zh-cn/docs/development/upgrading/494.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,5 @@ curl --location --request POST 'https://{{host}}/api/admin/initv494' \
6161

6262
## 🐛 修复
6363

64-
1. 搜索应用/知识库时,无法点击目录进入下一层。
64+
1. 搜索应用/知识库时,无法点击目录进入下一层。
65+
2. 重新训练时,参数未成功初始化。

packages/service/common/vectorStore/controller.ts

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import { MILVUS_ADDRESS, PG_ADDRESS, OCEANBASE_ADDRESS } from './constants';
88
import { MilvusCtrl } from './milvus/class';
99
import { setRedisCache, getRedisCache, delRedisCache, CacheKeyEnum } from '../redis/cache';
1010
import { throttle } from 'lodash';
11+
import { retryFn } from '@fastgpt/global/common/system/utils';
1112

1213
const getVectorObj = () => {
1314
if (PG_ADDRESS) return new PgVectorCtrl();
@@ -55,22 +56,24 @@ export const insertDatasetDataVector = async ({
5556
query: string;
5657
model: EmbeddingModelItemType;
5758
}) => {
58-
const { vectors, tokens } = await getVectorsByText({
59-
model,
60-
input: query,
61-
type: 'db'
62-
});
63-
const { insertId } = await Vector.insert({
64-
...props,
65-
vector: vectors[0]
66-
});
59+
return retryFn(async () => {
60+
const { vectors, tokens } = await getVectorsByText({
61+
model,
62+
input: query,
63+
type: 'db'
64+
});
65+
const { insertId } = await Vector.insert({
66+
...props,
67+
vector: vectors[0]
68+
});
6769

68-
onDelCache(props.teamId);
70+
onDelCache(props.teamId);
6971

70-
return {
71-
tokens,
72-
insertId
73-
};
72+
return {
73+
tokens,
74+
insertId
75+
};
76+
});
7477
};
7578

7679
export const deleteDatasetDataVector = async (props: DelDatasetVectorCtrlProps) => {

projects/app/src/pageComponents/dataset/detail/Form/CollectionChunkForm.tsx

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ const CollectionChunkForm = ({ form }: { form: UseFormReturn<CollectionChunkForm
115115
const chunkSplitMode = watch('chunkSplitMode');
116116
const autoIndexes = watch('autoIndexes');
117117
const indexSize = watch('indexSize');
118+
const imageIndex = watch('imageIndex');
118119

119120
const trainingModeList = useMemo(() => {
120121
const list = Object.entries(DatasetCollectionDataProcessModeMap);
@@ -225,7 +226,11 @@ const CollectionChunkForm = ({ form }: { form: UseFormReturn<CollectionChunkForm
225226
<HStack gap={[3, 7]}>
226227
<HStack flex={'1'} spacing={1}>
227228
<MyTooltip label={!feConfigs?.isPlus ? t('common:commercial_function_tip') : ''}>
228-
<Checkbox isDisabled={!feConfigs?.isPlus} {...register('autoIndexes')}>
229+
<Checkbox
230+
isDisabled={!feConfigs?.isPlus}
231+
isChecked={autoIndexes}
232+
{...register('autoIndexes')}
233+
>
229234
<FormLabel>{t('dataset:auto_indexes')}</FormLabel>
230235
</Checkbox>
231236
</MyTooltip>
@@ -243,6 +248,7 @@ const CollectionChunkForm = ({ form }: { form: UseFormReturn<CollectionChunkForm
243248
>
244249
<Checkbox
245250
isDisabled={!feConfigs?.isPlus || !datasetDetail?.vlmModel}
251+
isChecked={imageIndex}
246252
{...register('imageIndex')}
247253
>
248254
<FormLabel>{t('dataset:image_auto_parse')}</FormLabel>

projects/app/src/pageComponents/dataset/detail/Import/commonProgress/DataProcess.tsx

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,14 @@ import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel';
2020
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
2121
import { shadowLight } from '@fastgpt/web/styles/theme';
2222
import CollectionChunkForm from '../../Form/CollectionChunkForm';
23-
import { DatasetCollectionDataProcessModeEnum } from '@fastgpt/global/core/dataset/constants';
2423

2524
function DataProcess() {
2625
const { t } = useTranslation();
2726
const { feConfigs } = useSystemStore();
2827

29-
const { goToNext, processParamsForm, chunkSize } = useContextSelector(
30-
DatasetImportContext,
31-
(v) => v
32-
);
33-
const { register } = processParamsForm;
28+
const { goToNext, processParamsForm } = useContextSelector(DatasetImportContext, (v) => v);
29+
const { register, watch } = processParamsForm;
30+
const customPdfParseValue = watch('customPdfParse');
3431

3532
const Title = useCallback(({ title }: { title: string }) => {
3633
return (
@@ -66,7 +63,7 @@ function DataProcess() {
6663
>
6764
{feConfigs.showCustomPdfParse && (
6865
<HStack spacing={1}>
69-
<Checkbox {...register('customPdfParse')}>
66+
<Checkbox isChecked={customPdfParseValue} {...register('customPdfParse')}>
7067
<FormLabel>{t('dataset:pdf_enhance_parse')}</FormLabel>
7168
</Checkbox>
7269
<QuestionTip label={t('dataset:pdf_enhance_parse_tips')} />

projects/app/src/service/core/dataset/data/controller.ts

Lines changed: 56 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
1717
import { countPromptTokens } from '@fastgpt/service/common/string/tiktoken';
1818

1919
const formatIndexes = async ({
20-
indexes,
20+
indexes = [],
2121
q,
2222
a = '',
2323
indexSize,
@@ -66,7 +66,6 @@ const formatIndexes = async ({
6666
];
6767
};
6868

69-
indexes = indexes || [];
7069
// If index not type, set it to custom
7170
indexes = indexes
7271
.map((item) => ({
@@ -93,27 +92,32 @@ const formatIndexes = async ({
9392
indexes = indexes.filter((item) => item.type !== DatasetDataIndexTypeEnum.default);
9493
indexes.push(...concatDefaultIndexes);
9594

96-
// Filter same text
95+
// Remove same text
9796
indexes = indexes.filter(
9897
(item, index, self) => index === self.findIndex((t) => t.text === item.text)
9998
);
10099

101100
const chekcIndexes = (
102101
await Promise.all(
103102
indexes.map(async (item) => {
103+
if (item.type === DatasetDataIndexTypeEnum.default) {
104+
return item;
105+
}
106+
104107
// If oversize tokens, split it
105108
const tokens = await countPromptTokens(item.text);
106-
if (tokens > indexSize) {
109+
if (tokens > maxIndexSize) {
107110
const splitText = splitText2Chunks({
108111
text: item.text,
109-
chunkSize: 512,
112+
chunkSize: indexSize,
110113
maxSize: maxIndexSize
111114
}).chunks;
112115
return splitText.map((text) => ({
113116
text,
114117
type: item.type
115118
}));
116119
}
120+
117121
return item;
118122
})
119123
)
@@ -164,24 +168,30 @@ export async function insertData2Dataset({
164168
});
165169

166170
// insert to vector store
167-
const result = await Promise.all(
168-
newIndexes.map(async (item) => {
169-
const result = await insertDatasetDataVector({
170-
query: item.text,
171-
model: embModel,
172-
teamId,
173-
datasetId,
174-
collectionId
175-
});
176-
return {
177-
tokens: result.tokens,
178-
index: {
179-
...item,
180-
dataId: result.insertId
181-
}
182-
};
183-
})
184-
);
171+
const results: {
172+
tokens: number;
173+
index: {
174+
dataId: string;
175+
type: `${DatasetDataIndexTypeEnum}`;
176+
text: string;
177+
};
178+
}[] = [];
179+
for await (const item of newIndexes) {
180+
const result = await insertDatasetDataVector({
181+
query: item.text,
182+
model: embModel,
183+
teamId,
184+
datasetId,
185+
collectionId
186+
});
187+
results.push({
188+
tokens: result.tokens,
189+
index: {
190+
...item,
191+
dataId: result.insertId
192+
}
193+
});
194+
}
185195

186196
// 2. Create mongo data
187197
const [{ _id }] = await MongoDatasetData.create(
@@ -194,7 +204,7 @@ export async function insertData2Dataset({
194204
q,
195205
a,
196206
chunkIndex,
197-
indexes: result.map((item) => item.index)
207+
indexes: results.map((item) => item.index)
198208
}
199209
],
200210
{ session, ordered: true }
@@ -216,7 +226,7 @@ export async function insertData2Dataset({
216226

217227
return {
218228
insertId: _id,
219-
tokens: result.reduce((acc, cur) => acc + cur.tokens, 0)
229+
tokens: results.reduce((acc, cur) => acc + cur.tokens, 0)
220230
};
221231
}
222232

@@ -303,25 +313,27 @@ export async function updateData2Dataset({
303313
await mongoData.save();
304314

305315
// 5. insert vector
306-
const insertResult = await Promise.all(
307-
patchResult
308-
.filter((item) => item.type === 'create' || item.type === 'update')
309-
.map(async (item) => {
310-
// insert new vector and update dateId
311-
const result = await insertDatasetDataVector({
312-
query: item.index.text,
313-
model: getEmbeddingModel(model),
314-
teamId: mongoData.teamId,
315-
datasetId: mongoData.datasetId,
316-
collectionId: mongoData.collectionId
317-
});
318-
item.index.dataId = result.insertId;
319-
return {
320-
tokens: result.tokens
321-
};
322-
})
323-
);
324-
const tokens = insertResult.reduce((acc, cur) => acc + cur.tokens, 0);
316+
const insertResults: {
317+
tokens: number;
318+
}[] = [];
319+
for await (const item of patchResult) {
320+
if (item.type === 'delete' || item.type === 'unChange') continue;
321+
322+
// insert new vector and update dateId
323+
const result = await insertDatasetDataVector({
324+
query: item.index.text,
325+
model: getEmbeddingModel(model),
326+
teamId: mongoData.teamId,
327+
datasetId: mongoData.datasetId,
328+
collectionId: mongoData.collectionId
329+
});
330+
item.index.dataId = result.insertId;
331+
insertResults.push({
332+
tokens: result.tokens
333+
});
334+
}
335+
336+
const tokens = insertResults.reduce((acc, cur) => acc + cur.tokens, 0);
325337

326338
const newIndexes = patchResult
327339
.filter((item) => item.type !== 'delete')

projects/app/src/service/events/generateVector.ts

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -200,19 +200,24 @@ const rebuildData = async ({
200200

201201
// update vector, update dataset_data rebuilding status, delete data from training
202202
// 1. Insert new vector to dataset_data
203-
const updateResult = await Promise.all(
204-
mongoData.indexes.map(async (index, i) => {
205-
const result = await insertDatasetDataVector({
206-
query: index.text,
207-
model: getEmbeddingModel(trainingData.model),
208-
teamId: mongoData.teamId,
209-
datasetId: mongoData.datasetId,
210-
collectionId: mongoData.collectionId
211-
});
212-
mongoData.indexes[i].dataId = result.insertId;
213-
return result;
214-
})
215-
);
203+
const updateResult: {
204+
tokens: number;
205+
insertId: string;
206+
}[] = [];
207+
let i = 0;
208+
for await (const index of mongoData.indexes) {
209+
const result = await insertDatasetDataVector({
210+
query: index.text,
211+
model: getEmbeddingModel(trainingData.model),
212+
teamId: mongoData.teamId,
213+
datasetId: mongoData.datasetId,
214+
collectionId: mongoData.collectionId
215+
});
216+
mongoData.indexes[i].dataId = result.insertId;
217+
updateResult.push(result);
218+
i++;
219+
}
220+
216221
const { tokens } = await mongoSessionRun(async (session) => {
217222
// 2. Ensure that the training data is deleted after the Mongo update is successful
218223
await mongoData.save({ session });

0 commit comments

Comments
 (0)