Skip to content

Commit c6ea837

Browse files
committed
Integrated daemon into RamaLama CLI
Integrated the RamaLama daemon into the CLI. The --use-daemon is used as a feature flag to enable its usage by the serve and run commands. Signed-off-by: Michael Engel <[email protected]>
1 parent 97fcbe5 commit c6ea837

File tree

18 files changed

+438
-133
lines changed

18 files changed

+438
-133
lines changed

docs/ramalama.1.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,10 @@ The default can be overridden in the ramalama.conf file.
132132
store AI Models in the specified directory (default rootless: `$HOME/.local/share/ramalama`, default rootful: `/var/lib/ramalama`)
133133
The default can be overridden in the ramalama.conf file.
134134

135+
#### **--use-daemon**
136+
Feature flag to enable using the RamaLama daemon as backend for [ramalama-serve(1)](ramalama-serve.1.md) and [ramalama-run(1)](ramalama-run.1.md).
137+
138+
135139
## COMMANDS
136140

137141
| Command | Description |

docsite/docs/commands/ramalama/ramalama.mdx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,9 @@ The default can be overridden in the ramalama.conf file.
142142
store AI Models in the specified directory (default rootless: `$HOME/.local/share/ramalama`, default rootful: `/var/lib/ramalama`)
143143
The default can be overridden in the ramalama.conf file.
144144

145+
#### **--use-daemon**
146+
Feature flag to enable using the RamaLama daemon as backend for [ramalama-serve(1)](/docs/commands/ramalama/serve) and [ramalama-run(1)](/docs/commands/ramalama/run).
147+
145148
## COMMANDS
146149

147150
| Command | Description |

ramalama/cli.py

Lines changed: 44 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
get_inference_spec_files,
4040
load_file_config,
4141
)
42+
from ramalama.daemon_stub import DaemonStub, run_daemon
4243
from ramalama.endian import EndianMismatchError
4344
from ramalama.logger import configure_logger, logger
4445
from ramalama.model_inspect.error import ParseError
@@ -218,6 +219,7 @@ def create_argument_parser(description: str):
218219
prog="ramalama",
219220
description=description,
220221
formatter_class=argparse.RawTextHelpFormatter,
222+
exit_on_error=False,
221223
)
222224
configure_arguments(parser)
223225
return parser
@@ -279,6 +281,21 @@ def configure_arguments(parser):
279281
help=argparse.SUPPRESS,
280282
)
281283

284+
# Feature flags
285+
parser.add_argument(
286+
"--use-daemon",
287+
dest="use_daemon",
288+
default=False,
289+
action="store_true",
290+
help="Feature Flag: Enable using the daemon as backend by default",
291+
)
292+
parser.add_argument(
293+
"--daemon-name",
294+
dest="daemon_name",
295+
help="Requires --use-daemon. Specifies the name of the daemon either for creation or further use.",
296+
completer=suppressCompleter,
297+
)
298+
282299

283300
def configure_subcommands(parser):
284301
"""Add subcommand parsers to the main argument parser."""
@@ -499,7 +516,14 @@ def containers_parser(subparsers):
499516

500517

501518
def list_containers(args):
502-
containers = engine.containers(args)
519+
label = engine.LABEL_CONTAINER_RAMALAMA
520+
521+
# Feature Flag:
522+
# Use daemon backend when feature flag is given
523+
if args.use_daemon:
524+
label = engine.LABEL_CONTAINER_RAMALAMA_DAEMON
525+
526+
containers = engine.containers(args, label)
503527
if len(containers) == 0:
504528
return
505529
print("\n".join(containers))
@@ -1137,6 +1161,14 @@ def run_cli(args):
11371161
except Exception as exc:
11381162
raise e from exc
11391163

1164+
# Feature Flag:
1165+
# Use daemon backend when feature flag is given
1166+
if args.use_daemon:
1167+
stub = DaemonStub(shortnames, args, sys.argv)
1168+
serve_path = stub.serve_model(getattr(args, "daemon_name"))
1169+
stub.chat(serve_path)
1170+
return
1171+
11401172
if args.rag:
11411173
if not args.container:
11421174
raise ValueError("ramalama run --rag cannot be run with the --nocontainer option.")
@@ -1155,6 +1187,15 @@ def serve_parser(subparsers):
11551187

11561188

11571189
def serve_cli(args):
1190+
1191+
# Feature Flag:
1192+
# Use daemon backend when feature flag is given
1193+
if args.use_daemon:
1194+
stub = DaemonStub(shortnames, args, sys.argv)
1195+
serve_path = stub.serve_model(getattr(args, "daemon_name"))
1196+
stub.wait_for_model(serve_path)
1197+
return
1198+
11581199
if not args.container:
11591200
args.detach = False
11601201

@@ -1269,47 +1310,11 @@ def daemon_parser(subparsers) -> None:
12691310

12701311

12711312
def daemon_start_cli(args):
1272-
from ramalama.common import exec_cmd
1273-
1274-
daemon_cmd = []
1275-
daemon_model_store_dir = args.store
1276-
is_daemon_in_container = args.container and args.engine in get_args(SUPPORTED_ENGINES)
1277-
1278-
if is_daemon_in_container:
1279-
# If run inside a container, map the model store to the container internal directory
1280-
daemon_model_store_dir = "/ramalama/models"
1281-
1282-
daemon_cmd += [
1283-
args.engine,
1284-
"run",
1285-
"--pull",
1286-
args.pull,
1287-
"-d",
1288-
"-p",
1289-
f"{args.port}:8080",
1290-
"-v",
1291-
f"{args.store}:{daemon_model_store_dir}",
1292-
args.image,
1293-
]
1294-
1295-
daemon_cmd += [
1296-
"ramalama",
1297-
"--store",
1298-
daemon_model_store_dir,
1299-
"daemon",
1300-
"run",
1301-
"--port",
1302-
"8080" if is_daemon_in_container else args.port,
1303-
"--host",
1304-
CONFIG.host if is_daemon_in_container else args.host,
1305-
]
1306-
exec_cmd(daemon_cmd)
1313+
DaemonStub(shortnames, args, sys.argv).start_daemon()
13071314

13081315

13091316
def daemon_run_cli(args):
1310-
from ramalama.daemon.daemon import run
1311-
1312-
run(host=args.host, port=int(args.port), model_store_path=args.store)
1317+
run_daemon(args)
13131318

13141319

13151320
def version_parser(subparsers):

ramalama/common.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -286,8 +286,8 @@ def verify_checksum(filename: str) -> bool:
286286
return sha256_hash.hexdigest() == expected_checksum
287287

288288

289-
def genname():
290-
return "ramalama_" + "".join(random.choices(string.ascii_letters + string.digits, k=10))
289+
def genname(prefix: str = "ramalama"):
290+
return prefix + "_" + "".join(random.choices(string.ascii_letters + string.digits, k=10))
291291

292292

293293
def engine_version(engine: SUPPORTED_ENGINES) -> str:

ramalama/daemon/client.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import http
2+
import json
3+
import urllib.error
4+
import urllib.parse
5+
import urllib.request
6+
from typing import Any, Optional, Tuple
7+
8+
from ramalama.daemon.dto.model import ModelResponse, RunningModelResponse
9+
from ramalama.daemon.dto.serve import ServeRequest, ServeResponse, StopServeRequest
10+
from ramalama.logger import logger
11+
12+
13+
class DaemonAPIError(Exception):
14+
15+
def __init__(self, reason: str, code: Optional[http.HTTPStatus] = None, *args):
16+
super().__init__(*args)
17+
18+
self.reason = reason
19+
self.code = code
20+
21+
def __str__(self):
22+
if self.code:
23+
return f"Call to daemon API failed ({self.code}): {self.reason}"
24+
return f"Call to daemon API failed: {self.reason}"
25+
26+
27+
class DaemonClient:
28+
29+
def __init__(self, host: str, port: int):
30+
self.host = host
31+
self.port = port
32+
33+
@property
34+
def base_url(self) -> str:
35+
return f"{self.host}:{self.port}"
36+
37+
def list_available_models(self) -> list[ModelResponse]:
38+
url = f"http://{self.base_url}/api/tags"
39+
resp, _ = DaemonClient.call_api(url)
40+
if resp:
41+
return [ModelResponse(**model) for model in resp["models"]]
42+
43+
def list_running_models(self) -> list[RunningModelResponse]:
44+
url = f"http://{self.base_url}/api/ps"
45+
resp, _ = DaemonClient.call_api(url)
46+
if resp:
47+
return [RunningModelResponse(**model) for model in resp["models"]]
48+
49+
def start_model(self, model_name: str, runtime: str, exec_args: list[str]) -> Optional[str]:
50+
url = f"http://{self.base_url}/api/serve"
51+
request = ServeRequest(model_name, runtime, exec_args).to_dict()
52+
resp, _ = DaemonClient.call_api(url, method=http.HTTPMethod.POST, json_data=request)
53+
if resp:
54+
return f"http://{self.base_url}{ServeResponse(**resp).serve_path}"
55+
return None
56+
57+
def stop_model(self, model_name: str) -> Optional[str]:
58+
url = f"http://{self.base_url}/api/stop"
59+
request = StopServeRequest(model_name).to_dict()
60+
DaemonClient.call_api(url, method=http.HTTPMethod.POST, json_data=request)
61+
62+
def is_healthy(self) -> bool:
63+
url = f"http://{self.base_url}/api/health"
64+
try:
65+
_, code = DaemonClient.call_api(url)
66+
logger.debug(f"Health check success, code: {code}")
67+
return code == http.HTTPStatus.NO_CONTENT
68+
except DaemonAPIError as e:
69+
logger.debug(f"Health check failed: {e}")
70+
return False
71+
72+
@staticmethod
73+
def call_api(
74+
url: str, method: http.HTTPMethod = http.HTTPMethod.GET, headers=None, params=None, json_data=None, timeout=10
75+
) -> Tuple[Any | None, http.HTTPStatus]:
76+
headers = headers or {}
77+
78+
if params:
79+
query_string = urllib.parse.urlencode(params)
80+
separator = '&' if '?' in url else '?'
81+
url = f"{url}{separator}{query_string}"
82+
83+
body = None
84+
if json_data is not None:
85+
body = json.dumps(json_data).encode('utf-8')
86+
headers['Content-Type'] = 'application/json'
87+
88+
req = urllib.request.Request(url, data=body, headers=headers, method=method.value)
89+
try:
90+
with urllib.request.urlopen(req, timeout=timeout) as response:
91+
response_code = response.getcode()
92+
response_data = response.read().decode('utf-8')
93+
try:
94+
return json.loads(response_data), response_code
95+
except json.JSONDecodeError:
96+
return response_data, response_code
97+
except urllib.error.HTTPError as e:
98+
raise DaemonAPIError(e.reason, e.code)
99+
except urllib.error.URLError as e:
100+
raise DaemonAPIError(e.reason)
101+
except ConnectionResetError:
102+
raise DaemonAPIError("Connection reset")
103+
except Exception as e:
104+
raise DaemonAPIError(f"Unexpected error occurred: {e}")

ramalama/daemon/daemon.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from datetime import datetime, timedelta
88

99
from ramalama.daemon.handler.ramalama import RamalamaHandler
10-
from ramalama.daemon.logging import LogLevel, configure_logger, logger
10+
from ramalama.daemon.logger import LogLevel, configure_logger, logger
1111
from ramalama.daemon.service.model_runner import ModelRunner
1212

1313

@@ -61,26 +61,22 @@ def finish_request(self, request, client_address):
6161

6262
def check_model_expiration(self):
6363
curr_time = datetime.now()
64-
for name, m in self.model_runner.managed_models.items():
64+
65+
for id in list(self.model_runner.managed_models.keys()):
66+
m = self.model_runner.managed_models[id]
6567
if m.expiration_date > curr_time:
6668
continue
6769

6870
try:
69-
logger.info(f"Stopping expired model '{name}'...")
71+
logger.info(f"Stopping expired model '{m.model.model_organization}/{m.model.model_name}'...")
7072
self.model_runner.stop_model(m.id)
7173
except Exception as e:
72-
logger.error(f"Failed to stop expired model '{name}': {e}")
74+
logger.error(f"Failed to stop expired model '{m.model.model_organization}/{m.model.model_name}': {e}")
7375

7476
def shutdown(self):
7577
logger.info("Shutting down ramalama daemon...")
7678

77-
for name, managed_model in self.model_runner.managed_models.items():
78-
try:
79-
logger.info(f"Stopping model runner {name}...")
80-
self.model_runner.stop_model(managed_model.id)
81-
except Exception as e:
82-
logger.error(f"Error stopping model runner {name}: {e}")
83-
79+
self.model_runner.stop()
8480
super().shutdown()
8581

8682

ramalama/daemon/dto/model.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ class RunningModelResponse:
7878
size_vram: int
7979
digest: str
8080
cmd: str
81+
serve_path: str
8182

8283
def to_dict(self) -> dict:
8384
return {
@@ -91,6 +92,7 @@ def to_dict(self) -> dict:
9192
"size_vram": self.size_vram,
9293
"digest": self.digest,
9394
"cmd": self.cmd,
95+
"serve_path": self.serve_path,
9496
}
9597

9698
def serialize(self) -> str:

ramalama/daemon/dto/serve.py

Lines changed: 3 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import json
22
from dataclasses import dataclass
33

4-
from ramalama.config import BaseConfig
54
from ramalama.daemon.dto.errors import MissingArgumentError
65

76

@@ -10,17 +9,13 @@ class ServeRequest:
109

1110
model_name: str
1211
runtime: str
13-
exec_args: dict[str, str]
12+
exec_args: list[str]
1413

1514
def to_dict(self) -> dict:
1615
return {
1716
"model_name": self.model_name,
1817
"runtime": self.runtime,
19-
"exec_args": dict(
20-
[
21-
(key, value) for key, value in self.exec_args.items() if type(value) is str
22-
] # Filter out non-string values
23-
),
18+
"exec_args": [entry for entry in self.exec_args],
2419
}
2520

2621
def serialize(self) -> str:
@@ -38,27 +33,7 @@ def from_string(data: str) -> "ServeRequest":
3833
if not runtime:
3934
raise MissingArgumentError("runtime")
4035

41-
base_exec_args = BaseConfig().__dict__
42-
exec_args_input = data_dict.get("exec_args", {})
43-
# merge missing args to args from base config
44-
exec_args = {
45-
**base_exec_args,
46-
**{
47-
"runtime_args": [],
48-
"debug": False,
49-
},
50-
}
51-
# overwrite with input args
52-
exec_args = {**exec_args, **exec_args_input}
53-
# overwrite certain values which do not make sense in this context
54-
exec_args = {
55-
**exec_args,
56-
**{
57-
"container": False,
58-
"generate": False,
59-
"dryrun": False,
60-
},
61-
}
36+
exec_args = data_dict.get("exec_args", [])
6237

6338
return ServeRequest(
6439
model_name=model_name,

0 commit comments

Comments
 (0)