From a7bd59d3c8b77d66400870d51126ad1073fdce23 Mon Sep 17 00:00:00 2001
From: Brian <bmahabir@bu.edu>
Date: Thu, 18 Sep 2025 15:08:57 -0400
Subject: [PATCH] feat: added new flag default-template for models to use tools

Signed-off-by: Brian <bmahabir@bu.edu>
---
 docs/ramalama-run.1.md                     |  3 ++
 docs/ramalama-serve.1.md                   |  3 ++
 ramalama/cli.py                            |  7 ++++
 ramalama/daemon/service/command_factory.py |  9 +++--
 ramalama/model.py                          | 40 +++++++++++++++-------
 5 files changed, 46 insertions(+), 16 deletions(-)

diff --git a/docs/ramalama-run.1.md b/docs/ramalama-run.1.md
index 8ed75f898..f3263e93b 100644
--- a/docs/ramalama-run.1.md
+++ b/docs/ramalama-run.1.md
@@ -43,6 +43,9 @@ Possible values are "never", "always" and "auto". (default: auto)
 #### **--ctx-size**, **-c**
 size of the prompt context. This option is also available as **--max-model-len**. Applies to llama.cpp and vllm regardless of alias (default: 4096, 0 = loaded from model)
 
+#### **--default-template**
+Use the default chat template instead of model-specific chat template files. When specified, RamaLama will not use any extracted chat template files from the model and will rely on the runtime's built-in default template handling.
+
 #### **--device**
 Add a host device to the container. Optional permissions parameter  can
 be  used  to  specify device permissions by combining r for read, w for
diff --git a/docs/ramalama-serve.1.md b/docs/ramalama-serve.1.md
index 74478a195..45344a377 100644
--- a/docs/ramalama-serve.1.md
+++ b/docs/ramalama-serve.1.md
@@ -63,6 +63,9 @@ Min chunk size to attempt reusing from the cache via KV shifting
 #### **--ctx-size**, **-c**
 size of the prompt context. This option is also available as **--max-model-len**. Applies to llama.cpp and vllm regardless of alias (default: 4096, 0 = loaded from model)
 
+#### **--default-template**
+Use the default chat template instead of model-specific chat template files. When specified, RamaLama will not use any extracted chat template files from the model and will rely on the runtime's built-in default template handling.
+
 #### **--detach**, **-d**
 Run the container in the background and print the new container ID.
 The default is TRUE. The --nocontainer option forces this option to False.
diff --git a/ramalama/cli.py b/ramalama/cli.py
index 8967c57fd..a42ce7b40 100644
--- a/ramalama/cli.py
+++ b/ramalama/cli.py
@@ -892,6 +892,13 @@ def runtime_options(parser, command):
         help="enable/disable thinking mode in reasoning models",
         action=CoerceToBool,
     )
+    if command in ["run", "serve"]:
+        parser.add_argument(
+            "--default-template",
+            dest="default_template",
+            action="store_true",
+            help="use the default chat template instead of model-specific chat template files",
+        )
     parser.add_argument(
         "--oci-runtime",
         help="override the default OCI runtime used to launch the container",
diff --git a/ramalama/daemon/service/command_factory.py b/ramalama/daemon/service/command_factory.py
index cade75fc0..0cde3de92 100644
--- a/ramalama/daemon/service/command_factory.py
+++ b/ramalama/daemon/service/command_factory.py
@@ -82,9 +82,12 @@ def _build_llama_serve_command(self) -> list[str]:
         else:
             cmd += ["--jinja"]
 
-            chat_template_path = self.model._get_chat_template_path(False, False, False)
-            if chat_template_path:
-                cmd += ["--chat-template-file", chat_template_path]
+            # Add chat template unless using default template
+            use_default_template = self.request_args.get("default_template", False)
+            if not use_default_template:
+                chat_template_path = self.model._get_chat_template_path(False, False, False)
+                if chat_template_path:
+                    cmd += ["--chat-template-file", chat_template_path]
 
         cmd += [
             "--alias",
diff --git a/ramalama/model.py b/ramalama/model.py
index ba38dbcac..6cf99c992 100644
--- a/ramalama/model.py
+++ b/ramalama/model.py
@@ -647,9 +647,12 @@ def llama_serve(self, args):
         else:
             exec_args += ["--jinja"]
 
-            chat_template_path = self._get_chat_template_path(args.container, args.generate, args.dryrun)
-            if chat_template_path is not None:
-                exec_args += ["--chat-template-file", chat_template_path]
+            # Add chat template unless using default template
+            use_default_template = getattr(args, 'default_template', False)
+            if not use_default_template:
+                chat_template_path = self._get_chat_template_path(args.container, args.generate, args.dryrun)
+                if chat_template_path is not None:
+                    exec_args += ["--chat-template-file", chat_template_path]
 
         if should_colorize():
             exec_args += ["--log-colors", "on"]
@@ -739,17 +742,28 @@ def handle_runtime(self, args, exec_args):
     def generate_container_config(self, args, exec_args):
         # Get the blob paths (src) and mounted paths (dest)
         model_src_path = self._get_entry_model_path(False, False, args.dryrun)
-        chat_template_src_path = self._get_chat_template_path(False, False, args.dryrun)
         mmproj_src_path = self._get_mmproj_path(False, False, args.dryrun)
         model_dest_path = self._get_entry_model_path(True, True, args.dryrun)
-        chat_template_dest_path = self._get_chat_template_path(True, True, args.dryrun)
         mmproj_dest_path = self._get_mmproj_path(True, True, args.dryrun)
+        
+        # Get chat template paths unless using default template
+        use_default_template = getattr(args, 'default_template', False)
+        if use_default_template:
+            chat_template_src_path = None
+            chat_template_dest_path = None
+        else:
+            chat_template_src_path = self._get_chat_template_path(False, False, args.dryrun)
+            chat_template_dest_path = self._get_chat_template_path(True, True, args.dryrun)
+
+        # Prepare chat template paths tuple or None
+        chat_template_paths = None if chat_template_src_path is None else (chat_template_src_path, chat_template_dest_path)
+        mmproj_paths = None if mmproj_src_path is None else (mmproj_src_path, mmproj_dest_path)
 
         if args.generate.gen_type == "quadlet":
             self.quadlet(
                 (model_src_path, model_dest_path),
-                (chat_template_src_path, chat_template_dest_path),
-                (mmproj_src_path, mmproj_dest_path),
+                chat_template_paths,
+                mmproj_paths,
                 args,
                 exec_args,
                 args.generate.output_dir,
@@ -757,8 +771,8 @@ def generate_container_config(self, args, exec_args):
         elif args.generate.gen_type == "kube":
             self.kube(
                 (model_src_path, model_dest_path),
-                (chat_template_src_path, chat_template_dest_path),
-                (mmproj_src_path, mmproj_dest_path),
+                chat_template_paths,
+                mmproj_paths,
                 args,
                 exec_args,
                 args.generate.output_dir,
@@ -766,8 +780,8 @@ def generate_container_config(self, args, exec_args):
         elif args.generate.gen_type == "quadlet/kube":
             self.quadlet_kube(
                 (model_src_path, model_dest_path),
-                (chat_template_src_path, chat_template_dest_path),
-                (mmproj_src_path, mmproj_dest_path),
+                chat_template_paths,
+                mmproj_paths,
                 args,
                 exec_args,
                 args.generate.output_dir,
@@ -775,8 +789,8 @@ def generate_container_config(self, args, exec_args):
         elif args.generate.gen_type == "compose":
             self.compose(
                 (model_src_path, model_dest_path),
-                (chat_template_src_path, chat_template_dest_path),
-                (mmproj_src_path, mmproj_dest_path),
+                chat_template_paths,
+                mmproj_paths,
                 args,
                 exec_args,
                 args.generate.output_dir,