@@ -9,6 +9,7 @@ import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
99import { ReadFileByBufferParams } from '../read/type' ;
1010import { MongoRwaTextBuffer } from '../../buffer/rawText/schema' ;
1111import { readFileRawContent } from '../read/utils' ;
12+ import { PassThrough } from 'stream' ;
1213
1314export function getGFSCollection ( bucket : `${BucketNameEnum } `) {
1415 MongoFileSchema ;
@@ -113,31 +114,39 @@ export async function getDownloadStream({
113114 fileId : string ;
114115} ) {
115116 const bucket = getGridBucket ( bucketName ) ;
117+ const stream = bucket . openDownloadStream ( new Types . ObjectId ( fileId ) ) ;
118+ const copyStream = stream . pipe ( new PassThrough ( ) ) ;
116119
117- return bucket . openDownloadStream ( new Types . ObjectId ( fileId ) ) ;
118- }
119-
120- export const readFileEncode = async ( {
121- bucketName,
122- fileId
123- } : {
124- bucketName : `${BucketNameEnum } `;
125- fileId : string ;
126- } ) => {
127- const encodeStream = await getDownloadStream ( { bucketName, fileId } ) ;
128- let buffers : Buffer = Buffer . from ( [ ] ) ;
129- for await ( const chunk of encodeStream ) {
130- buffers = Buffer . concat ( [ buffers , chunk ] ) ;
131- if ( buffers . length > 10 ) {
132- encodeStream . abort ( ) ;
133- break ;
134- }
135- }
120+ /* get encoding */
121+ const buffer = await ( ( ) => {
122+ return new Promise < Buffer > ( ( resolve , reject ) => {
123+ let tmpBuffer : Buffer = Buffer . from ( [ ] ) ;
124+
125+ stream . on ( 'data' , ( chunk ) => {
126+ if ( tmpBuffer . length < 20 ) {
127+ tmpBuffer = Buffer . concat ( [ tmpBuffer , chunk ] ) ;
128+ }
129+ if ( tmpBuffer . length >= 20 ) {
130+ resolve ( tmpBuffer ) ;
131+ }
132+ } ) ;
133+ stream . on ( 'end' , ( ) => {
134+ resolve ( tmpBuffer ) ;
135+ } ) ;
136+ stream . on ( 'error' , ( err ) => {
137+ reject ( err ) ;
138+ } ) ;
139+ } ) ;
140+ } ) ( ) ;
136141
137- const encoding = detectFileEncoding ( buffers ) ;
142+ const encoding = detectFileEncoding ( buffer ) ;
138143
139- return encoding as BufferEncoding ;
140- } ;
144+ return {
145+ fileStream : copyStream ,
146+ encoding
147+ // encoding: 'utf-8'
148+ } ;
149+ }
141150
142151export const readFileContentFromMongo = async ( {
143152 teamId,
@@ -162,9 +171,8 @@ export const readFileContentFromMongo = async ({
162171 } ;
163172 }
164173
165- const [ file , encoding , fileStream ] = await Promise . all ( [
174+ const [ file , { encoding, fileStream } ] = await Promise . all ( [
166175 getFileById ( { bucketName, fileId } ) ,
167- readFileEncode ( { bucketName, fileId } ) ,
168176 getDownloadStream ( { bucketName, fileId } )
169177 ] ) ;
170178
@@ -176,12 +184,12 @@ export const readFileContentFromMongo = async ({
176184
177185 const fileBuffers = await ( ( ) => {
178186 return new Promise < Buffer > ( ( resolve , reject ) => {
179- let buffers = Buffer . from ( [ ] ) ;
187+ let buffer = Buffer . from ( [ ] ) ;
180188 fileStream . on ( 'data' , ( chunk ) => {
181- buffers = Buffer . concat ( [ buffers , chunk ] ) ;
189+ buffer = Buffer . concat ( [ buffer , chunk ] ) ;
182190 } ) ;
183191 fileStream . on ( 'end' , ( ) => {
184- resolve ( buffers ) ;
192+ resolve ( buffer ) ;
185193 } ) ;
186194 fileStream . on ( 'error' , ( err ) => {
187195 reject ( err ) ;
0 commit comments