@@ -47,7 +47,7 @@ let s:default_config = {
4747 \ ' n_suffix' : 64 ,
4848 \ ' n_predict' : 128 ,
4949 \ ' t_max_prompt_ms' : 500 ,
50- \ ' t_max_predict_ms' : 1000 ,
50+ \ ' t_max_predict_ms' : 2000 ,
5151 \ ' show_info' : 2 ,
5252 \ ' auto_fim' : v: true ,
5353 \ ' max_line_suffix' : 8 ,
@@ -196,6 +196,15 @@ function! llama#init()
196196 if g: llama_config .ring_n_chunks > 0
197197 call s: ring_update ()
198198 endif
199+
200+ " for debugging
201+ " call timer_start(100, {-> s:update_status()})
202+ endfunction
203+
204+ function ! s: update_status ()
205+ let &statusline = ' indent = ' . s: indent_last
206+
207+ call timer_start (100 , {- > s: update_status ()})
199208endfunction
200209
201210" compute how similar two chunks of text are
@@ -515,9 +524,9 @@ function! llama#fim(pos_x, pos_y, is_auto, prev, use_cache) abort
515524 endif
516525
517526 let l: t_max_predict_ms = g: llama_config .t_max_predict_ms
518- if empty (a: prev )
519- " the first request is quick - we will launch a speculative request after this one is displayed
520- let l: t_max_predict_ms = 250
527+ if ! empty (a: prev )
528+ " give more time for the speculative FIM
529+ let l: t_max_predict_ms = min ([ 3 * g: llama_config .t_max_predict_ms, 3000 ])
521530 endif
522531
523532 " compute multiple hashes that can be used to generate a completion for which the
@@ -547,8 +556,10 @@ function! llama#fim(pos_x, pos_y, is_auto, prev, use_cache) abort
547556 endfor
548557 endif
549558
550- " TODO: this might be incorrect
551- let s: indent_last = l: indent
559+ " update only for non-speculative fims
560+ if empty (a: prev )
561+ let s: indent_last = l: indent
562+ endif
552563
553564 " TODO: refactor in a function
554565 let l: text = getline (max ([1 , line (' .' ) - g: llama_config .ring_chunk_size/2]), min([line('.') + g:llama_config.ring_chunk_size/ 2 , line (' $' )]))
0 commit comments