Skip to content

Commit da535b6

Browse files
committed
refactor: add context prefix cache for response api
1 parent 95a0d3d commit da535b6

File tree

4 files changed

+39
-7
lines changed

4 files changed

+39
-7
lines changed

components/model/ark/chatmodel.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,9 @@ func buildResponsesAPIChatModel(config *ChatModelConfig) (*responsesAPIChatModel
246246
}
247247
var opts []arkruntime.ConfigOption
248248

249-
if config.Region != "" {
249+
if config.Region == "" {
250+
opts = append(opts, arkruntime.WithRegion(defaultRegion))
251+
} else {
250252
opts = append(opts, arkruntime.WithRegion(config.Region))
251253
}
252254

@@ -503,12 +505,10 @@ func (cm *ChatModel) IsCallbacksEnabled() bool {
503505
//
504506
// Note:
505507
// - It is unavailable for doubao models of version 1.6 and above.
506-
// - Currently, only supports calling by ContextAPI.
507508
func (cm *ChatModel) CreatePrefixCache(ctx context.Context, prefix []*schema.Message, ttl int) (info *CacheInfo, err error) {
508509
if cm.respChatModel.cache != nil && ptrFromOrZero(cm.respChatModel.cache.APIType) == ResponsesAPI {
509-
return nil, fmt.Errorf("CreatePrefixCache is not supported by ResponsesAPI")
510+
return cm.respChatModel.createContextByResponseAPI(ctx, prefix, ttl)
510511
}
511-
512512
return cm.createContextByContextAPI(ctx, prefix, ttl, model.ContextModeCommonPrefix, nil)
513513
}
514514

components/model/ark/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ require (
1010
github.com/openai/openai-go v1.10.1
1111
github.com/smartystreets/goconvey v1.8.1
1212
github.com/stretchr/testify v1.11.1
13-
github.com/volcengine/volcengine-go-sdk v1.1.47
13+
github.com/volcengine/volcengine-go-sdk v1.1.48
1414

1515
)
1616

components/model/ark/go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,8 @@ github.com/volcengine/volcengine-go-sdk v1.1.44 h1:WLoLlzt67ZlJeow55PPx65/Mh52De
157157
github.com/volcengine/volcengine-go-sdk v1.1.44/go.mod h1:oxoVo+A17kvkwPkIeIHPVLjSw7EQAm+l/Vau1YGHN+A=
158158
github.com/volcengine/volcengine-go-sdk v1.1.47 h1:aSHWIfo0fKhG+V6IO76W9Cf+1fzp3pykNoT5ulBVMsw=
159159
github.com/volcengine/volcengine-go-sdk v1.1.47/go.mod h1:oxoVo+A17kvkwPkIeIHPVLjSw7EQAm+l/Vau1YGHN+A=
160+
github.com/volcengine/volcengine-go-sdk v1.1.48 h1:eHffKYHLUMKdZDmUQgmD1+dEzoVTIsiSlNNdnmEBs8A=
161+
github.com/volcengine/volcengine-go-sdk v1.1.48/go.mod h1:oxoVo+A17kvkwPkIeIHPVLjSw7EQAm+l/Vau1YGHN+A=
160162
github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc=
161163
github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw=
162164
github.com/x-cray/logrus-prefixed-formatter v0.5.2 h1:00txxvfBM9muc0jiLIEAkAcIMJzfthRT6usrui8uGmg=

components/model/ark/responses_api.go

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,15 +98,18 @@ func (cm *responsesAPIChatModel) Generate(ctx context.Context, input []*schema.M
9898
cacheCfg.Enabled = responseReq.Caching.Type.String() == responses.CacheType_enabled.String()
9999
cacheCfg.ExpireAt = responseReq.ExpireAt
100100
}
101-
msg, err := cm.toOutputMessage(responseObject, cacheCfg)
102101

102+
outMsg, err = cm.toOutputMessage(responseObject, cacheCfg)
103+
if err != nil {
104+
return nil, fmt.Errorf("failed to convert output to schema.Message: %w", err)
105+
}
103106
callbacks.OnEnd(ctx, &model.CallbackOutput{
104107
Message: outMsg,
105108
Config: config,
106109
TokenUsage: cm.toModelTokenUsage(responseObject.Usage),
107110
Extra: map[string]any{callbackExtraKeyThinking: specOptions.thinking},
108111
})
109-
return msg, nil
112+
return outMsg, nil
110113

111114
}
112115

@@ -1014,3 +1017,30 @@ func ensureDataURL(dataOfBase64, mimeType string) (string, error) {
10141017
}
10151018
return fmt.Sprintf("data:%s;base64,%s", mimeType, dataOfBase64), nil
10161019
}
1020+
1021+
func (cm *responsesAPIChatModel) createContextByResponseAPI(ctx context.Context, prefix []*schema.Message, ttl int) (info *CacheInfo, err error) {
1022+
responseReq := &responses.ResponsesRequest{
1023+
Model: cm.model,
1024+
ExpireAt: ptrOf(time.Now().Unix() + int64(ttl)),
1025+
Store: ptrOf(true),
1026+
Caching: &responses.ResponsesCaching{
1027+
Type: responses.CacheType_enabled.Enum(),
1028+
Prefix: ptrOf(true),
1029+
},
1030+
}
1031+
err = cm.populateInput(prefix, responseReq)
1032+
if err != nil {
1033+
return nil, err
1034+
}
1035+
responseObject, err := cm.client.CreateResponses(ctx, responseReq)
1036+
if err != nil {
1037+
return nil, err
1038+
}
1039+
1040+
info = &CacheInfo{
1041+
ContextID: responseObject.Id,
1042+
Usage: *cm.toEinoTokenUsage(responseObject.Usage),
1043+
}
1044+
1045+
return info, nil
1046+
}

0 commit comments

Comments
 (0)