0.1.1-alpha: better arg parsing, make guided generation optional (disabled by default), verbose mode

ComposerKevin · ComposerKevin · commit 296e6357ae6a · 2025-04-04T04:38:57.000+08:00
diff --git a/README.md b/README.md
@@ -62,13 +62,10 @@ CoreAgent makes it easy to integrate your own custom functionalities as tools. T
 
 Refer to the example above for a practical demonstration of tool registration.
 
-## Limitations
-
-Currently, it relies on `guided_grammar` function from `vLLM` to function correctly, so you have to use `vLLM` as inference server. 
 
 ## Roadmap
 - [x] Basic universal agent framework. 
-- [ ] Remove `guided_grammar` restriction, allow general LLM usage (DeepSeek API, GPT3.5/4/4o API, Qwen API, etc. )
+- [x] Remove `guided_grammar` restriction, allow general LLM usage (DeepSeek API, GPT3.5/4/4o API, Qwen API, etc. )
 - [ ] More intuitive simplified examples, alowing hands-on try-outs. 
 - [ ] RAG-based memory module. 
 
diff --git a/README.zh.md b/README.zh.md
@@ -61,9 +61,11 @@ CoreAgent 可以轻松地将您自己的自定义功能集成为工具。要注
 
 请参阅上面的示例，以获取工具注册的实际演示。
 
-## 使用限制
-
-目前，该项目依赖`vLLM`的`guided_grammar`功能运行，所以你使用`vLLM`git status作为推理服务。
+## Roadmap
+- [x] 基础框架。 
+- [x] 移除 `guided_grammar` 强制需求，支持常用LLM API (DeepSeek API, GPT3.5/4/4o API, Qwen API, etc. )
+- [ ] 更简单的样例代码。
+- [ ] 基于RAG的记忆系统。
 
 ## 贡献
 
diff --git a/coreagent/__init__.py b/coreagent/__init__.py
@@ -1,2 +1,3 @@
 from .config import Config, get_default_config, set_default_config
 from .agent import Agent, Identity
+from .arg_parsers import set_default_config_from_args
diff --git a/coreagent/agent.py b/coreagent/agent.py
@@ -157,14 +157,18 @@ def _call_llm(self, history) -> str:
       # Executes a single turn of LLM call.
       history: "Chat history [{\"role\": ..., \"content\": ...}, ...]"
       """
-      grammar_text = generate_aiml_syntax(self.identity.respond_gbnf, dict(
-        [(x, self.tool_desc[x].param_names) for x in self.tool_desc]
-      ))
-      extra_body = dict(
-        guided_grammar=grammar_text,
-        guided_decoding_backend=self.config.guided_decoding_backend,
-      )
-      if self.config.chat_template_type in chat_templates:
+      extra_body: dict = {}
+
+      if self.config.use_guided_generation:
+        grammar_text = generate_aiml_syntax(self.identity.respond_gbnf, dict(
+          [(x, self.tool_desc[x].param_names) for x in self.tool_desc]
+        ))
+        extra_body = dict(
+          guided_grammar=grammar_text,
+          guided_decoding_backend=self.config.guided_decoding_backend,
+        )
+
+      if self.config.chat_template_type is not None and self.config.chat_template_type in chat_templates:
         extra_body['chat_template'] = chat_templates[self.config.chat_template_type],
       if not self.config.show_generation:
         r = self.config.llm.chat.completions.create(
@@ -180,29 +184,29 @@ def _call_llm(self, history) -> str:
           print(r.choices[0].message)
           print(f'WARNING: finish_reason={r.choices[0].finish_reason}')
           raise Exception("too long")
-        if 'content' not in r.choices[0].message or len(r.choices[0].message.content) <= 0:
+        if r.choices[0].message is None or len(r.choices[0].message.content) <= 0:
           print(r.choices[0])
           raise Exception("empty LLM response")
         return r.choices[0].message.content
       ##########
       r = self.config.llm.chat.completions.create(
         model=self.config.model,
         messages=history,
-        stream=True,
-        temperature=0.0,
+        temperature=self.config.temperature,
         extra_body=extra_body,
-        frequency_penalty=self.identity.frequency_penalty,
-        max_completion_tokens=self.identity.generation_limit,
+        frequency_penalty=self.config.frequency_penalty,
+        max_completion_tokens=self.config.generation_limit,
         stop="\n$$EOF$$" if self.config.use_stop_token else None,
+        stream=True
       )
       total = ''
       reasoning = ''
       resp = ''
-      # prog = tqdm(r, unit='')
+      prog = tqdm(r, unit='')
       finish_reason = None
 
       entered_content = False
-      for chunk in r:
+      for chunk in prog:
         # print(chunk.choices[0], flush=True)
         if hasattr(chunk.choices[0].delta, "reasoning_content"):
           total += chunk.choices[0].delta.reasoning_content
@@ -211,13 +215,13 @@ def _call_llm(self, history) -> str:
         elif hasattr(chunk.choices[0].delta, "content") and len(chunk.choices[0].delta.content) > 0:
           if not entered_content:
             entered_content=True
-            print("\n========\nOUTPUT: \n")
+            # print("\n========\nOUTPUT: \n")
           total += chunk.choices[0].delta.content
           resp += chunk.choices[0].delta.content
-          print(chunk.choices[0].delta.content, end='', flush=True)
+          # print(chunk.choices[0].delta.content, end='', flush=True)
         if len(total) > self.config.progressbar_length:
           total = total[-self.config.progressbar_length:]
-        # prog.set_postfix_str(total.replace("\n", ""), refresh=False)
+        prog.set_postfix_str(total.replace("\n", "").replace("\r", ""), refresh=False)
         finish_reason = chunk.choices[0].finish_reason
       if finish_reason == 'length':
         raise Exception('generation too long')
diff --git a/coreagent/arg_parsers.py b/coreagent/arg_parsers.py
@@ -0,0 +1,43 @@
+from argparse import ArgumentParser
+from typing import Sequence, Optional
+from collections.abc import Callable
+
+import openai
+
+from coreagent import set_default_config, Config
+
+def set_default_config_from_args(args: Sequence[str] | None = None, argument_parser_handler: Optional[Callable[[ArgumentParser], None]] = None):
+  """
+  Set default configuration from command-line arguments.
+  :param args: Where to parse from? Set to None to use command-line arguments.
+  :param argument_parser_handler: In case you want to get extra params.
+  :return: Parsed parameters.
+  """
+  arg_parser = ArgumentParser()
+  arg_parser.add_argument("--guided", "-g", action="store_true", default=False, help="Use xgrammar guided generation. ")
+  arg_parser.add_argument("--api-base-url", "-u", default='http://192.168.1.5:9900/v1/', help="OpenAI-Compatible API base url. ")
+  arg_parser.add_argument("--api-key", "-k", default="1", help="API key ")
+  arg_parser.add_argument("--model", "-m", default="llm", help="Model to use. ")
+  arg_parser.add_argument("--verbose", "-v", action="store_true", default=False, help="Show generation process via a progress bar. ")
+
+  if argument_parser_handler is not None:
+    argument_parser_handler(arg_parser)
+
+  args = arg_parser.parse_args(args)
+
+  if args.api_base_url is None:
+    args.api_base_url = None
+
+  if args.verbose:
+    print("[Verbose] Showing generation process via a progress bar. ")
+
+  if args.guided:
+    print("[Guided] Using guided generation (xgrammar). ")
+
+  cli = openai.Client(
+      base_url=args.api_base_url,
+      api_key=args.api_key,
+  )
+  set_default_config(Config(cli, args.model, use_guided_generation=args.guided, show_generation=args.verbose))
+
+  return args
diff --git a/coreagent/config.py b/coreagent/config.py
@@ -15,12 +15,14 @@ class Config:
   temperature: Optional[float] = None
   frequency_penalty: float = None # generally don't set this, may cause problems.
   generation_limit: int = 5000
-  show_generation: bool = False                         # Don't use it for now, a bug in vLLM (tested as of <= v0.8.0) caused random junks to be streamed, check out vLLM Issue #15188.
   # ---- optional settings ----
-  progressbar_length: int = 50                          # Not used for now
+  use_guided_generation: bool = False                    # Disable if you're using non vLLM deployments
   guided_decoding_backend: str = 'xgrammar:no-fallback' # Tested with vLLM with Engine v0.
   use_stop_token: bool = False                          # Tested not working with vLLM <= 0.8.0, since stop tokens are also considered during reasoning, see vLLM Issue #14170.
   chat_template_type: Optional[str] = None              # modified chat templates, only for vLLM, one of ["qwq" or None]
+  # ---- display only ----
+  show_generation: bool = False  # Don't use it for now, a bug in vLLM (tested as of <= v0.8.0) caused random junks to be streamed, check out vLLM Issue #15188.
+  progressbar_length: int = 50   # Not used for now
 
 # Default configuration (used internally, do NOT modify directly! )
 default_config: Optional[Config] = None
diff --git a/examples/bomber.py b/examples/bomber.py
@@ -1,5 +1,7 @@
-import openai
-from coreagent import Agent, Config, set_default_config
+from coreagent import Agent, set_default_config_from_args, get_default_config
+
+# load deafult configurations from command-line arguments
+set_default_config_from_args()
 
 class Bomber:
   def __init__(self):
@@ -14,11 +16,8 @@ class Killer:
   def kill(self, name: str):
     return f"Update: {name} is now killed! "
 
-cli = openai.Client(
-    base_url='http://192.168.1.5:9900/v1/',
-    api_key='1',
-)
-set_default_config(Config(cli, "llm"))
+if get_default_config().use_guided_generation:
+  print("Using xgrammar guided generation. ")
 
 s = Agent()
 s.register_tool(Bomber())
diff --git a/examples/filetool.py b/examples/filetool.py
@@ -1,20 +1,22 @@
+from argparse import ArgumentParser
+
 import openai
-from coreagent import Agent, Config, set_default_config, Identity
+from coreagent import Agent, Config, set_default_config_from_args, get_default_config
 from coreagent.builtin import FileTool
 
-cli = openai.Client(
-    base_url='http://192.168.1.5:9900/v1/',
-    api_key='1',
-)
-set_default_config(Config(cli, "llm"))
+# We have extra arguments.
+def register_extra_args(ap: ArgumentParser):
+  ap.add_argument('--root-dir', '-d', default='.', type=str)
+  ap.add_argument('--allow-write', '-w', default=False, type=bool)
+
+# load deafult configurations from command-line arguments
+args = set_default_config_from_args(argument_parser_handler=register_extra_args)
 
-import argparse
-ap = argparse.ArgumentParser()
-ap.add_argument('--root-dir', '-d', default='.', type=str)
-ap.add_argument('--allow-write', '-w', default=False, type=bool)
-args = ap.parse_args()
+# update some required params since we might generate a lot!
+default_config: Config = get_default_config()
+default_config.generation_limit=5000
 
-s = Agent(Identity(show_generation=False, generation_limit=5000, temperature=0.0))
+s = Agent()
 s.register_tool(FileTool(args.root_dir), exclude=['write_file', 'mkdir'] if not args.allow_write else None)
 
 while True:
diff --git a/examples/guard_and_thief.py b/examples/guard_and_thief.py
@@ -1,13 +1,7 @@
-import os
+from coreagent import Agent, Identity, set_default_config_from_args
 
-import openai
-from coreagent import Agent, Identity, Config, set_default_config
-
-cli = openai.Client(
-    base_url='http://192.168.1.5:9900/v1/',
-    api_key='1',
-)
-set_default_config(Config(cli, "llm"))
+# load deafult configurations from command-line arguments
+set_default_config_from_args()
 
 class TortureState:
   def __init__(self):
diff --git a/examples/toolgen.py b/examples/toolgen.py
@@ -1,14 +1,13 @@
 import openai
-from coreagent import Agent, Config, set_default_config, Identity
+from coreagent import Agent, Identity, set_default_config_from_args
 from coreagent.builtin import ToolGen
 
-cli = openai.Client(
-    base_url='http://192.168.1.5:9900/v1/',
-    api_key='1',
-)
-set_default_config(Config(cli, "llm"))
+# load deafult configurations from command-line arguments
+set_default_config_from_args()
 
-s = Agent(Identity(show_generation=False, generation_limit=5000, temperature=0.0))
+
+
+s = Agent()
 s.register_tool(ToolGen(s))
 
 while True:
diff --git a/examples/vfs.py b/examples/vfs.py
@@ -1,5 +1,8 @@
 import openai
-from coreagent import Agent, Config, set_default_config
+from coreagent import Agent, set_default_config_from_args
+
+# load deafult configurations from command-line arguments
+set_default_config_from_args()
 
 class VFSTool:
   def __init__(self):
@@ -11,12 +14,6 @@ def write(self, file: str, content: str):
   def read(self, file: str):
     return self.files[file]
 
-cli = openai.Client(
-    base_url='http://192.168.1.5:9900/v1/',
-    api_key='1',
-)
-set_default_config(Config(cli, "llm"))
-
 s = Agent()
 s.register_tool(VFSTool(), 'fs')
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "coreagent"
-version = "0.1.0-alpha"
+version = "0.1.1-alpha"
 description = "The Simplest Agent framework. "
 keywords = ["CoreAgent", 'LLM', 'Agent', 'LLM Agent', 'AI Agent']
 readme = {file = "README.md", content-type = "text/markdown"}

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,3 @@`
`1`	`1`	`from .config import Config, get_default_config, set_default_config`
`2`	`2`	`from .agent import Agent, Identity`
	`3`	`+from .arg_parsers import set_default_config_from_args`