Merge pull request #1292 from containers/pass-args-to-ramalama-run-core

rhatdan · web-flow · commit db8f30ec1830 · 2025-04-30T08:14:46.000-04:00
Pass args to ramalama run core
diff --git a/libexec/ramalama/ramalama-run-core b/libexec/ramalama/ramalama-run-core
@@ -70,9 +70,9 @@ def main(args):
 
         args = Namespace(
             container=False, dryrun=False, engine=None, podman_keep_groups=False,
-            image='quay.io/ramalama/ramalama', runtime='llama.cpp',
+            image=None, runtime='llama.cpp',
             store=os.path.expanduser("~/.local/share/ramalama"), use_model_store=False,
-            quiet=False, debug=False, subcommand='serve', ngl=-1, threads=6,
+            quiet=False, debug=False, subcommand='serve', ngl=parsed_args.ngl, threads=parsed_args.threads,
             temp=parsed_args.temp, authfile=None, env=[], device=None, name=None,
             oci_runtime=None, privileged=False, pull='newer', seed=None,
             tlsverify=True, context=parsed_args.context, runtime_args=[], network=None,
diff --git a/ramalama/model.py b/ramalama/model.py
@@ -272,12 +272,8 @@ def gpu_args(self, args, runner=False):
             if self.draft_model:
                 # Use the same arg as ngl to reduce configuration space
                 gpu_args += ["-ngld", f'{args.ngl}']
-        # for some reason the --threads option is blowing up on Docker,
-        # with option not being supported by llama-run.
-        # This could be something being masked in a Docker container but not
-        # in a Podman container.
-        if args.threads != -1 and args.engine and os.path.basename(args.engine) != "docker":
-            gpu_args += ["--threads", f"{args.threads}"]
+
+        gpu_args += ["--threads", f"{args.threads}"]
 
         return gpu_args