Skip to content

Commit db8f30e

Browse files
authored
Merge pull request #1292 from containers/pass-args-to-ramalama-run-core
Pass args to ramalama run core
2 parents a13764c + bb259ad commit db8f30e

File tree

2 files changed

+4
-8
lines changed

2 files changed

+4
-8
lines changed

libexec/ramalama/ramalama-run-core

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,9 @@ def main(args):
7070

7171
args = Namespace(
7272
container=False, dryrun=False, engine=None, podman_keep_groups=False,
73-
image='quay.io/ramalama/ramalama', runtime='llama.cpp',
73+
image=None, runtime='llama.cpp',
7474
store=os.path.expanduser("~/.local/share/ramalama"), use_model_store=False,
75-
quiet=False, debug=False, subcommand='serve', ngl=-1, threads=6,
75+
quiet=False, debug=False, subcommand='serve', ngl=parsed_args.ngl, threads=parsed_args.threads,
7676
temp=parsed_args.temp, authfile=None, env=[], device=None, name=None,
7777
oci_runtime=None, privileged=False, pull='newer', seed=None,
7878
tlsverify=True, context=parsed_args.context, runtime_args=[], network=None,

ramalama/model.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -272,12 +272,8 @@ def gpu_args(self, args, runner=False):
272272
if self.draft_model:
273273
# Use the same arg as ngl to reduce configuration space
274274
gpu_args += ["-ngld", f'{args.ngl}']
275-
# for some reason the --threads option is blowing up on Docker,
276-
# with option not being supported by llama-run.
277-
# This could be something being masked in a Docker container but not
278-
# in a Podman container.
279-
if args.threads != -1 and args.engine and os.path.basename(args.engine) != "docker":
280-
gpu_args += ["--threads", f"{args.threads}"]
275+
276+
gpu_args += ["--threads", f"{args.threads}"]
281277

282278
return gpu_args
283279

0 commit comments

Comments
 (0)