containers · bmahabirbu · Sep 18, 2025 · sourcery-ai · Sep 18, 2025 · gemini-code-assist
@@ -43,6 +43,9 @@ Possible values are "never", "always" and "auto". (default: auto)
 #### **--ctx-size**, **-c**
 size of the prompt context. This option is also available as **--max-model-len**. Applies to llama.cpp and vllm regardless of alias (default: 4096, 0 = loaded from model)
 
+#### **--default-template**
+Use the default chat template instead of model-specific chat template files. When specified, RamaLama will not use any extracted chat template files from the model and will rely on the runtime's built-in default template handling.
+
 #### **--device**
 Add a host device to the container. Optional permissions parameter  can
 be  used  to  specify device permissions by combining r for read, w for

@@ -63,6 +63,9 @@ Min chunk size to attempt reusing from the cache via KV shifting
 #### **--ctx-size**, **-c**
 size of the prompt context. This option is also available as **--max-model-len**. Applies to llama.cpp and vllm regardless of alias (default: 4096, 0 = loaded from model)
 
+#### **--default-template**
+Use the default chat template instead of model-specific chat template files. When specified, RamaLama will not use any extracted chat template files from the model and will rely on the runtime's built-in default template handling.
+
 #### **--detach**, **-d**
 Run the container in the background and print the new container ID.
 The default is TRUE. The --nocontainer option forces this option to False.

@@ -892,6 +892,13 @@ def runtime_options(parser, command):
         help="enable/disable thinking mode in reasoning models",
         action=CoerceToBool,
     )
+    if command in ["run", "serve"]:
+        parser.add_argument(
+            "--default-template",
+            dest="default_template",
+            action="store_true",
+            help="use the default chat template instead of model-specific chat template files",
+        )
     parser.add_argument(
         "--oci-runtime",
         help="override the default OCI runtime used to launch the container",

@@ -82,9 +82,12 @@ def _build_llama_serve_command(self) -> list[str]:
         else:
             cmd += ["--jinja"]
 
-            chat_template_path = self.model._get_chat_template_path(False, False, False)
-            if chat_template_path:
-                cmd += ["--chat-template-file", chat_template_path]
+            # Add chat template unless using default template
+            use_default_template = self.request_args.get("default_template", False)
+            if not use_default_template:
+                chat_template_path = self.model._get_chat_template_path(False, False, False)
+                if chat_template_path:
+                    cmd += ["--chat-template-file", chat_template_path]
-            use_default_template = self.request_args.get("default_template", False)
-            if not use_default_template:
-                chat_template_path = self.model._get_chat_template_path(False, False, False)
-                if chat_template_path:
-                    cmd += ["--chat-template-file", chat_template_path]
+            raw_default_template = self.request_args.get("default_template", False)
+            # Coerce to boolean: treat 'false', '0', '', None as False, everything else as True
+            if isinstance(raw_default_template, str):
+                use_default_template = raw_default_template.lower() in ("true", "1", "yes")
+            else:
+                use_default_template = bool(raw_default_template)
+            if not use_default_template:
+                chat_template_path = self.model._get_chat_template_path(False, False, False)
+                if chat_template_path:
+                    cmd += ["--chat-template-file", chat_template_path]
-            use_default_template = self.request_args.get("default_template", False)
-            if not use_default_template:
-                chat_template_path = self.model._get_chat_template_path(False, False, False)
-                if chat_template_path:
-                    cmd += ["--chat-template-file", chat_template_path]
+            raw_default_template = self.request_args.get("default_template", False)
+            # Coerce to boolean: treat 'false', '0', '', None as False, everything else as True
+            if isinstance(raw_default_template, str):
+                use_default_template = raw_default_template.lower() in ("true", "1", "yes")
+            else:
+                use_default_template = bool(raw_default_template)
+            if not use_default_template:
+                chat_template_path = self.model._get_chat_template_path(False, False, False)
+                if chat_template_path:
+                    cmd += ["--chat-template-file", chat_template_path]
 
         cmd += [
             "--alias",

@@ -647,9 +647,12 @@ def llama_serve(self, args):
         else:
             exec_args += ["--jinja"]
 
-            chat_template_path = self._get_chat_template_path(args.container, args.generate, args.dryrun)
-            if chat_template_path is not None:
-                exec_args += ["--chat-template-file", chat_template_path]
+            # Add chat template unless using default template
+            use_default_template = getattr(args, 'default_template', False)
+            if not use_default_template:
+                chat_template_path = self._get_chat_template_path(args.container, args.generate, args.dryrun)
+                if chat_template_path is not None:
+                    exec_args += ["--chat-template-file", chat_template_path]
 
         if should_colorize():
             exec_args += ["--log-colors", "on"]
@@ -739,44 +742,55 @@ def handle_runtime(self, args, exec_args):
     def generate_container_config(self, args, exec_args):
         # Get the blob paths (src) and mounted paths (dest)
         model_src_path = self._get_entry_model_path(False, False, args.dryrun)
-        chat_template_src_path = self._get_chat_template_path(False, False, args.dryrun)
         mmproj_src_path = self._get_mmproj_path(False, False, args.dryrun)
         model_dest_path = self._get_entry_model_path(True, True, args.dryrun)
-        chat_template_dest_path = self._get_chat_template_path(True, True, args.dryrun)
         mmproj_dest_path = self._get_mmproj_path(True, True, args.dryrun)
+
+        # Get chat template paths unless using default template
+        use_default_template = getattr(args, 'default_template', False)
+        if use_default_template:
+            chat_template_src_path = None
+            chat_template_dest_path = None
+        else:
+            chat_template_src_path = self._get_chat_template_path(False, False, args.dryrun)
+            chat_template_dest_path = self._get_chat_template_path(True, True, args.dryrun)
+
+        # Prepare chat template paths tuple or None
+        chat_template_paths = None if chat_template_src_path is None else (chat_template_src_path, chat_template_dest_path)
-        # Get chat template paths unless using default template
-        use_default_template = getattr(args, 'default_template', False)
-        if use_default_template:
-            chat_template_src_path = None
-            chat_template_dest_path = None
-        else:
-            chat_template_src_path = self._get_chat_template_path(False, False, args.dryrun)
-            chat_template_dest_path = self._get_chat_template_path(True, True, args.dryrun)
-
-        # Prepare chat template paths tuple or None
-        chat_template_paths = None if chat_template_src_path is None else (chat_template_src_path, chat_template_dest_path)
+        # Get chat template paths unless using default template
+        use_default_template = getattr(args, 'default_template', False)
+        chat_template_paths = None
+        if not use_default_template:
+            chat_template_src_path = self._get_chat_template_path(False, False, args.dryrun)
+            if chat_template_src_path is not None:
+                chat_template_dest_path = self._get_chat_template_path(True, True, args.dryrun)
+                chat_template_paths = (chat_template_src_path, chat_template_dest_path)
-        # Get chat template paths unless using default template
-        use_default_template = getattr(args, 'default_template', False)
-        if use_default_template:
-            chat_template_src_path = None
-            chat_template_dest_path = None
-        else:
-            chat_template_src_path = self._get_chat_template_path(False, False, args.dryrun)
-            chat_template_dest_path = self._get_chat_template_path(True, True, args.dryrun)
-
-        # Prepare chat template paths tuple or None
-        chat_template_paths = None if chat_template_src_path is None else (chat_template_src_path, chat_template_dest_path)
+        # Get chat template paths unless using default template
+        use_default_template = getattr(args, 'default_template', False)
+        chat_template_paths = None
+        if not use_default_template:
+            chat_template_src_path = self._get_chat_template_path(False, False, args.dryrun)
+            if chat_template_src_path is not None:
+                chat_template_dest_path = self._get_chat_template_path(True, True, args.dryrun)
+                chat_template_paths = (chat_template_src_path, chat_template_dest_path)
+        mmproj_paths = None if mmproj_src_path is None else (mmproj_src_path, mmproj_dest_path)
 
         if args.generate.gen_type == "quadlet":
             self.quadlet(
                 (model_src_path, model_dest_path),
-                (chat_template_src_path, chat_template_dest_path),
-                (mmproj_src_path, mmproj_dest_path),
+                chat_template_paths,
+                mmproj_paths,
                 args,
                 exec_args,
                 args.generate.output_dir,
             )
         elif args.generate.gen_type == "kube":
             self.kube(
                 (model_src_path, model_dest_path),
-                (chat_template_src_path, chat_template_dest_path),
-                (mmproj_src_path, mmproj_dest_path),
+                chat_template_paths,
+                mmproj_paths,
                 args,
                 exec_args,
                 args.generate.output_dir,
             )
         elif args.generate.gen_type == "quadlet/kube":
             self.quadlet_kube(
                 (model_src_path, model_dest_path),
-                (chat_template_src_path, chat_template_dest_path),
-                (mmproj_src_path, mmproj_dest_path),
+                chat_template_paths,
+                mmproj_paths,
                 args,
                 exec_args,
                 args.generate.output_dir,
             )
         elif args.generate.gen_type == "compose":
             self.compose(
                 (model_src_path, model_dest_path),
-                (chat_template_src_path, chat_template_dest_path),
-                (mmproj_src_path, mmproj_dest_path),
+                chat_template_paths,
+                mmproj_paths,
                 args,
                 exec_args,
                 args.generate.output_dir,