Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/ramalama-run.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ Possible values are "never", "always" and "auto". (default: auto)
#### **--ctx-size**, **-c**
size of the prompt context. This option is also available as **--max-model-len**. Applies to llama.cpp and vllm regardless of alias (default: 4096, 0 = loaded from model)

#### **--default-template**
Use the default chat template instead of model-specific chat template files. When specified, RamaLama will not use any extracted chat template files from the model and will rely on the runtime's built-in default template handling.

#### **--device**
Add a host device to the container. Optional permissions parameter can
be used to specify device permissions by combining r for read, w for
Expand Down
3 changes: 3 additions & 0 deletions docs/ramalama-serve.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ Min chunk size to attempt reusing from the cache via KV shifting
#### **--ctx-size**, **-c**
size of the prompt context. This option is also available as **--max-model-len**. Applies to llama.cpp and vllm regardless of alias (default: 4096, 0 = loaded from model)

#### **--default-template**
Use the default chat template instead of model-specific chat template files. When specified, RamaLama will not use any extracted chat template files from the model and will rely on the runtime's built-in default template handling.

#### **--detach**, **-d**
Run the container in the background and print the new container ID.
The default is TRUE. The --nocontainer option forces this option to False.
Expand Down
7 changes: 7 additions & 0 deletions ramalama/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -892,6 +892,13 @@ def runtime_options(parser, command):
help="enable/disable thinking mode in reasoning models",
action=CoerceToBool,
)
if command in ["run", "serve"]:
parser.add_argument(
"--default-template",
dest="default_template",
action="store_true",
help="use the default chat template instead of model-specific chat template files",
)
parser.add_argument(
"--oci-runtime",
help="override the default OCI runtime used to launch the container",
Expand Down
9 changes: 6 additions & 3 deletions ramalama/daemon/service/command_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,12 @@ def _build_llama_serve_command(self) -> list[str]:
else:
cmd += ["--jinja"]

chat_template_path = self.model._get_chat_template_path(False, False, False)
if chat_template_path:
cmd += ["--chat-template-file", chat_template_path]
# Add chat template unless using default template
use_default_template = self.request_args.get("default_template", False)
if not use_default_template:
chat_template_path = self.model._get_chat_template_path(False, False, False)
if chat_template_path:
cmd += ["--chat-template-file", chat_template_path]
Comment on lines +86 to +90
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (bug_risk): Type coercion for 'default_template' from request_args may be needed.

Explicitly convert 'default_template' to a boolean to prevent logic errors from string values like 'false' or '0'.

Suggested change
use_default_template = self.request_args.get("default_template", False)
if not use_default_template:
chat_template_path = self.model._get_chat_template_path(False, False, False)
if chat_template_path:
cmd += ["--chat-template-file", chat_template_path]
raw_default_template = self.request_args.get("default_template", False)
# Coerce to boolean: treat 'false', '0', '', None as False, everything else as True
if isinstance(raw_default_template, str):
use_default_template = raw_default_template.lower() in ("true", "1", "yes")
else:
use_default_template = bool(raw_default_template)
if not use_default_template:
chat_template_path = self.model._get_chat_template_path(False, False, False)
if chat_template_path:
cmd += ["--chat-template-file", chat_template_path]


cmd += [
"--alias",
Expand Down
40 changes: 27 additions & 13 deletions ramalama/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,9 +647,12 @@ def llama_serve(self, args):
else:
exec_args += ["--jinja"]

chat_template_path = self._get_chat_template_path(args.container, args.generate, args.dryrun)
if chat_template_path is not None:
exec_args += ["--chat-template-file", chat_template_path]
# Add chat template unless using default template
use_default_template = getattr(args, 'default_template', False)
if not use_default_template:
chat_template_path = self._get_chat_template_path(args.container, args.generate, args.dryrun)
if chat_template_path is not None:
exec_args += ["--chat-template-file", chat_template_path]

if should_colorize():
exec_args += ["--log-colors", "on"]
Expand Down Expand Up @@ -739,44 +742,55 @@ def handle_runtime(self, args, exec_args):
def generate_container_config(self, args, exec_args):
# Get the blob paths (src) and mounted paths (dest)
model_src_path = self._get_entry_model_path(False, False, args.dryrun)
chat_template_src_path = self._get_chat_template_path(False, False, args.dryrun)
mmproj_src_path = self._get_mmproj_path(False, False, args.dryrun)
model_dest_path = self._get_entry_model_path(True, True, args.dryrun)
chat_template_dest_path = self._get_chat_template_path(True, True, args.dryrun)
mmproj_dest_path = self._get_mmproj_path(True, True, args.dryrun)

# Get chat template paths unless using default template
use_default_template = getattr(args, 'default_template', False)
if use_default_template:
chat_template_src_path = None
chat_template_dest_path = None
else:
chat_template_src_path = self._get_chat_template_path(False, False, args.dryrun)
chat_template_dest_path = self._get_chat_template_path(True, True, args.dryrun)

# Prepare chat template paths tuple or None
chat_template_paths = None if chat_template_src_path is None else (chat_template_src_path, chat_template_dest_path)
Comment on lines +749 to +759
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This logic for determining chat_template_paths can be simplified for better readability and conciseness. You can initialize chat_template_paths to None and then update it inside a single if block. This avoids defining chat_template_src_path and chat_template_dest_path in a wider scope than necessary.

Suggested change
# Get chat template paths unless using default template
use_default_template = getattr(args, 'default_template', False)
if use_default_template:
chat_template_src_path = None
chat_template_dest_path = None
else:
chat_template_src_path = self._get_chat_template_path(False, False, args.dryrun)
chat_template_dest_path = self._get_chat_template_path(True, True, args.dryrun)
# Prepare chat template paths tuple or None
chat_template_paths = None if chat_template_src_path is None else (chat_template_src_path, chat_template_dest_path)
# Get chat template paths unless using default template
use_default_template = getattr(args, 'default_template', False)
chat_template_paths = None
if not use_default_template:
chat_template_src_path = self._get_chat_template_path(False, False, args.dryrun)
if chat_template_src_path is not None:
chat_template_dest_path = self._get_chat_template_path(True, True, args.dryrun)
chat_template_paths = (chat_template_src_path, chat_template_dest_path)

mmproj_paths = None if mmproj_src_path is None else (mmproj_src_path, mmproj_dest_path)

if args.generate.gen_type == "quadlet":
self.quadlet(
(model_src_path, model_dest_path),
(chat_template_src_path, chat_template_dest_path),
(mmproj_src_path, mmproj_dest_path),
chat_template_paths,
mmproj_paths,
args,
exec_args,
args.generate.output_dir,
)
elif args.generate.gen_type == "kube":
self.kube(
(model_src_path, model_dest_path),
(chat_template_src_path, chat_template_dest_path),
(mmproj_src_path, mmproj_dest_path),
chat_template_paths,
mmproj_paths,
args,
exec_args,
args.generate.output_dir,
)
elif args.generate.gen_type == "quadlet/kube":
self.quadlet_kube(
(model_src_path, model_dest_path),
(chat_template_src_path, chat_template_dest_path),
(mmproj_src_path, mmproj_dest_path),
chat_template_paths,
mmproj_paths,
args,
exec_args,
args.generate.output_dir,
)
elif args.generate.gen_type == "compose":
self.compose(
(model_src_path, model_dest_path),
(chat_template_src_path, chat_template_dest_path),
(mmproj_src_path, mmproj_dest_path),
chat_template_paths,
mmproj_paths,
args,
exec_args,
args.generate.output_dir,
Expand Down
Loading