11/* Only the token of gpt-3.5-turbo is used */
22import type { ChatItemType } from '../../../core/chat/type' ;
33import { Tiktoken } from 'js-tiktoken/lite' ;
4- import { adaptChat2GptMessages } from '../../../core/chat/adapt' ;
5- import { ChatCompletionRequestMessageRoleEnum } from '../../../core/ai/constant' ;
4+ import { chats2GPTMessages } from '../../../core/chat/adapt' ;
65import encodingJson from './cl100k_base.json' ;
7- import { ChatMessageItemType } from '../../../core/ai/type' ;
6+ import {
7+ ChatCompletionMessageParam ,
8+ ChatCompletionContentPart ,
9+ ChatCompletionCreateParams ,
10+ ChatCompletionTool
11+ } from '../../../core/ai/type' ;
12+ import { ChatCompletionRequestMessageRoleEnum } from '../../../core/ai/constants' ;
813
914/* init tikToken obj */
1015export function getTikTokenEnc ( ) {
@@ -29,18 +34,25 @@ export function getTikTokenEnc() {
2934
3035/* count one prompt tokens */
3136export function countPromptTokens (
32- prompt = '' ,
33- role : '' | `${ChatCompletionRequestMessageRoleEnum } ` = '' ,
34- tools ?: any
37+ prompt : string | ChatCompletionContentPart [ ] | null | undefined = '' ,
38+ role : '' | `${ChatCompletionRequestMessageRoleEnum } ` = ''
3539) {
3640 const enc = getTikTokenEnc ( ) ;
37- const toolText = tools
38- ? JSON . stringify ( tools )
39- . replace ( '"' , '' )
40- . replace ( '\n' , '' )
41- . replace ( / ( ) { 2 , } / g, ' ' )
42- : '' ;
43- const text = `${ role } \n${ prompt } \n${ toolText } ` . trim ( ) ;
41+ const promptText = ( ( ) => {
42+ if ( ! prompt ) return '' ;
43+ if ( typeof prompt === 'string' ) return prompt ;
44+ let promptText = '' ;
45+ prompt . forEach ( ( item ) => {
46+ if ( item . type === 'text' ) {
47+ promptText += item . text ;
48+ } else if ( item . type === 'image_url' ) {
49+ promptText += item . image_url . url ;
50+ }
51+ } ) ;
52+ return promptText ;
53+ } ) ( ) ;
54+
55+ const text = `${ role } \n${ promptText } ` . trim ( ) ;
4456
4557 try {
4658 const encodeText = enc . encode ( text ) ;
@@ -50,15 +62,66 @@ export function countPromptTokens(
5062 return text . length ;
5163 }
5264}
65+ export const countToolsTokens = (
66+ tools ?: ChatCompletionTool [ ] | ChatCompletionCreateParams . Function [ ]
67+ ) => {
68+ if ( ! tools || tools . length === 0 ) return 0 ;
69+
70+ const enc = getTikTokenEnc ( ) ;
71+
72+ const toolText = tools
73+ ? JSON . stringify ( tools )
74+ . replace ( '"' , '' )
75+ . replace ( '\n' , '' )
76+ . replace ( / ( ) { 2 , } / g, ' ' )
77+ : '' ;
78+
79+ return enc . encode ( toolText ) . length ;
80+ } ;
5381
5482/* count messages tokens */
55- export const countMessagesTokens = ( messages : ChatItemType [ ] , tools ?: any ) => {
56- const adaptMessages = adaptChat2GptMessages ( { messages, reserveId : true } ) ;
83+ export const countMessagesTokens = ( messages : ChatItemType [ ] ) => {
84+ const adaptMessages = chats2GPTMessages ( { messages, reserveId : true } ) ;
5785
58- return countGptMessagesTokens ( adaptMessages , tools ) ;
86+ return countGptMessagesTokens ( adaptMessages ) ;
5987} ;
60- export const countGptMessagesTokens = ( messages : ChatMessageItemType [ ] , tools ?: any ) =>
61- messages . reduce ( ( sum , item ) => sum + countPromptTokens ( item . content , item . role , tools ) , 0 ) ;
88+ export const countGptMessagesTokens = (
89+ messages : ChatCompletionMessageParam [ ] ,
90+ tools ?: ChatCompletionTool [ ] ,
91+ functionCall ?: ChatCompletionCreateParams . Function [ ]
92+ ) =>
93+ messages . reduce ( ( sum , item ) => {
94+ // Evaluates the text of toolcall and functioncall
95+ const functionCallPrompt = ( ( ) => {
96+ let prompt = '' ;
97+ if ( item . role === ChatCompletionRequestMessageRoleEnum . Assistant ) {
98+ const toolCalls = item . tool_calls ;
99+ prompt +=
100+ toolCalls
101+ ?. map ( ( item ) => `${ item ?. function ?. name } ${ item ?. function ?. arguments } ` . trim ( ) )
102+ ?. join ( '' ) || '' ;
103+
104+ const functionCall = item . function_call ;
105+ prompt += `${ functionCall ?. name } ${ functionCall ?. arguments } ` . trim ( ) ;
106+ }
107+ return prompt ;
108+ } ) ( ) ;
109+
110+ const contentPrompt = ( ( ) => {
111+ if ( ! item . content ) return '' ;
112+ if ( typeof item . content === 'string' ) return item . content ;
113+ return item . content
114+ . map ( ( item ) => {
115+ if ( item . type === 'text' ) return item . text ;
116+ return '' ;
117+ } )
118+ . join ( '' ) ;
119+ } ) ( ) ;
120+
121+ return sum + countPromptTokens ( `${ contentPrompt } ${ functionCallPrompt } ` , item . role ) ;
122+ } , 0 ) +
123+ countToolsTokens ( tools ) +
124+ countToolsTokens ( functionCall ) ;
62125
63126/* slice messages from top to bottom by maxTokens */
64127export function sliceMessagesTB ( {
@@ -68,7 +131,7 @@ export function sliceMessagesTB({
68131 messages : ChatItemType [ ] ;
69132 maxTokens : number ;
70133} ) {
71- const adaptMessages = adaptChat2GptMessages ( { messages, reserveId : true } ) ;
134+ const adaptMessages = chats2GPTMessages ( { messages, reserveId : true } ) ;
72135 let reduceTokens = maxTokens ;
73136 let result : ChatItemType [ ] = [ ] ;
74137
0 commit comments