@@ -18,15 +18,13 @@ package kvcache
1818
1919import (
2020 "context"
21- "encoding/json"
2221 "fmt"
2322
2423 "k8s.io/apimachinery/pkg/util/sets"
2524 "k8s.io/klog/v2"
2625
2726 "github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache/kvblock"
2827 "github.com/llm-d/llm-d-kv-cache-manager/pkg/tokenization"
29- chattemplatego "github.com/llm-d/llm-d-kv-cache-manager/pkg/tokenization/chat_template_go"
3028 "github.com/llm-d/llm-d-kv-cache-manager/pkg/tokenization/prefixstore"
3129 "github.com/llm-d/llm-d-kv-cache-manager/pkg/utils/logging"
3230)
@@ -117,50 +115,14 @@ func (k *Indexer) KVBlockIndex() kvblock.Index {
117115//
118116// The function returns a map of pod identifiers to scores.
119117func (k * Indexer ) GetPodScores (ctx context.Context , prompt , modelName string ,
120- podIdentifiers []string , chatCompletion bool ,
118+ podIdentifiers []string ,
121119) (map [string ]int , error ) {
122120 traceLogger := klog .FromContext (ctx ).V (logging .TRACE ).WithName ("kvcache.GetPodScores" )
123-
124- // Handle chat completion requests
125- if chatCompletion {
126- // Parse the prompt as a ChatTemplateRequest JSON
127- var req chattemplatego.ChatTemplateRequest
128- if err := json .Unmarshal ([]byte (prompt ), & req ); err != nil {
129- return nil , fmt .Errorf ("failed to parse chat template request: %w" , err )
130- }
131-
132- // Create or reuse the CGo wrapper (could be a singleton in production)
133- // TODO: cache, instance management
134- wrapper := chattemplatego .NewChatTemplateCGoWrapper ()
135-
136- // Fetch the chat template for the model (if not already set)
137- if req .ChatTemplate == "" {
138- getReq := chattemplatego.GetChatTemplateRequest {ModelName : modelName }
139- template , template_vars , err := wrapper .GetModelChatTemplate (getReq )
140- if err != nil {
141- return nil , fmt .Errorf ("failed to fetch chat template: %w" , err )
142- }
143- req .ChatTemplate = template
144- req .TemplateVars = template_vars
145- }
146-
147- // Apply the template to the request
148- resp , err := wrapper .RenderChatTemplate (req )
149- if err != nil {
150- return nil , fmt .Errorf ("failed to render chat template: %w" , err )
151- }
152- if len (resp .RenderedChats ) == 0 {
153- return nil , nil
154- }
155- prompt = resp .RenderedChats [0 ]
156- }
157-
158121 // 0. add to tokenizers pool
159122 k .tokenizersPool .AddTask (prompt , modelName )
160123
161124 // 1. get available tokens of longest prefix
162125 tokens := k .tokensIndexer .FindLongestContainedTokens (prompt , modelName )
163-
164126 if len (tokens ) == 0 {
165127 //nolint:nilnil // no need to return an error
166128 return nil , nil
@@ -188,14 +150,6 @@ func (k *Indexer) GetPodScores(ctx context.Context, prompt, modelName string,
188150 return podScores , nil
189151}
190152
191- // GetPodScoresDefault is a convenience function for backward compatibility
192- // that calls GetPodScores with chatCompletion=false
193- func (k * Indexer ) GetPodScoresDefault (ctx context.Context , prompt , modelName string ,
194- podIdentifiers []string ,
195- ) (map [string ]int , error ) {
196- return k .GetPodScores (ctx , prompt , modelName , podIdentifiers , false )
197- }
198-
199153// podsPerKeyPrintHelper formats a map of keys to pod names for printing.
200154func podsPerKeyPrintHelper (ks map [kvblock.Key ][]string ) string {
201155 flattened := ""
0 commit comments