@@ -22,16 +22,19 @@ class Annotate(Pipeline):
2222 Automatically annotates papers using LLMs.
2323 """
2424
25- def __init__ (self , llm ):
25+ def __init__ (self , llm , maxlength = 10000 , ** kwargs ):
2626 """
2727 Creates a new Annotation pipeline.
2828
2929 Args:
3030 llm: LLM path
31+ maxlength: maximum prompt length
32+ kwargs: additional LLM args
3133 """
3234
3335 # Create LLM pipeline
34- self .llm = LLM (llm )
36+ self .llm = LLM (llm , ** kwargs )
37+ self .maxlength = maxlength
3538
3639 # Create segmentation pipeline
3740 self .segment = Segmentation (sentences = True , cleantext = False )
@@ -141,7 +144,7 @@ def title(self, text, progress):
141144
142145 result = None
143146 for x in tqdm ([prompt ], desc = "Extracting title" , disable = not progress ):
144- result = self .llm ([{"role" : "user" , "content" : x }], maxlength = 2048 )
147+ result = self .llm ([{"role" : "user" , "content" : x }], maxlength = 2048 , stripthink = True )
145148
146149 return result
147150
@@ -165,7 +168,7 @@ def keywords(self, text, progress):
165168
166169 result = None
167170 for x in tqdm ([prompt ], desc = "Generating keywords" , disable = not progress ):
168- result = self .llm ([{"role" : "user" , "content" : x }], maxlength = 2048 )
171+ result = self .llm ([{"role" : "user" , "content" : x }], maxlength = 2048 , stripthink = True )
169172
170173 return result
171174
@@ -234,7 +237,7 @@ def topics(self, annotations, progress):
234237 topics = []
235238 for prompt in tqdm (prompts , desc = "Generating topics" , disable = not progress ):
236239 # Generate topic
237- topic = self .llm ([{"role" : "user" , "content" : prompt }], maxlength = 10000 )
240+ topic = self .llm ([{"role" : "user" , "content" : prompt }], maxlength = self . maxlength , stripthink = True )
238241
239242 # Clean topic and append
240243 topics .append (re .sub (r"[^\x00-\x7f]" , r"" , topic ))
0 commit comments