Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions docs/ramalama-convert.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,15 @@ Image to use when converting to GGUF format (when then `--gguf` option has been
executable and available in the `PATH`. The script is available from the `llama.cpp` GitHub repo. Defaults to the current
`quay.io/ramalama/ramalama-rag` image.

#### **--type**=*raw* | *car*
#### **--type**="artifact" | *raw* | *car*

type of OCI Model Image to convert.
Convert the MODEL to the specified OCI Object

| Type | Description |
| ---- | ------------------------------------------------------------- |
| car | Includes base image with the model stored in a /models subdir |
| raw | Only the model and a link file model.file to it stored at / |
| Type | Description |
| -------- | ------------------------------------------------------------- |
| artifact | Store AI Models as artifacts |
| car | Traditional OCI image including base image with the model stored in a /models subdir |
| raw | Traditional OCI image including only the model and a link file `model.file` pointed at it stored at / |

## EXAMPLE

Expand Down
8 changes: 8 additions & 0 deletions docs/ramalama.conf
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,14 @@
#
#carimage = "registry.access.redhat.com/ubi10-micro:latest"

# Convert the MODEL to the specified OCI Object
# Options: artifact, car, raw
#
# artifact: Store AI Models as artifacts
# car: Traditional OCI image including base image with the model stored in a /models subdir
# raw: Traditional OCI image including only the model and a link file `model.file` pointed at it stored at /
#convert_type = "raw"

# Run RamaLama in the default container.
#
#container = true
Expand Down
12 changes: 12 additions & 0 deletions docs/ramalama.conf.5.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,18 @@ Min chunk size to attempt reusing from the cache via KV shifting
Run RamaLama in the default container.
RAMALAMA_IN_CONTAINER environment variable overrides this field.

#convert_type = "raw"

Convert the MODEL to the specified OCI Object
Options: artifact, car, raw

| Type | Description |
| -------- | ------------------------------------------------------------- |
| artifact | Store AI Models as artifacts |
| car | Traditional OCI image including base image with the model stored in a /models subdir |
| raw | Traditional OCI image including only the model and a link file `model.file` pointed at it stored at / |


**ctx_size**=0

Size of the prompt context (0 = loaded from model)
Expand Down
31 changes: 21 additions & 10 deletions ramalama/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,11 +719,12 @@ def convert_parser(subparsers):
)
parser.add_argument(
"--type",
default="raw",
choices=["car", "raw"],
default=CONFIG.convert_type,
choices=["artifact", "car", "raw"],
help="""\
type of OCI Model Image to push.

Model "artifact" stores the AI Model as an OCI Artifact.
Model "car" includes base image with the model stored in a /models subdir.
Model "raw" contains the model and a link file model.file to it stored at /.""",
)
Expand Down Expand Up @@ -762,11 +763,12 @@ def push_parser(subparsers):
add_network_argument(parser)
parser.add_argument(
"--type",
default="raw",
choices=["car", "raw"],
default=CONFIG.convert_type,
choices=["artifact", "car", "raw"],
help="""\
type of OCI Model Image to push.

Model "artifact" stores the AI Model as an OCI Artifact.
Model "car" includes base image with the model stored in a /models subdir.
Model "raw" contains the model and a link file model.file to it stored at /.""",
)
Expand All @@ -781,22 +783,26 @@ def push_parser(subparsers):
parser.set_defaults(func=push_cli)


def _get_source_model(args):
def _get_source_model(args, transport=None):
src = shortnames.resolve(args.SOURCE)
if not src:
src = args.SOURCE
smodel = New(src, args)
if smodel.type == "OCI":
raise ValueError(f"converting from an OCI based image {src} is not supported")
smodel = New(src, args, transport=transport)
if not smodel.exists() and not args.dryrun:
smodel.pull(args)
return smodel


def push_cli(args):
source_model = _get_source_model(args)
target = args.SOURCE
transport = None
if not args.TARGET:
transport = "oci"
source_model = _get_source_model(args, transport=transport)

if args.TARGET:
if source_model.type == "OCI":
raise ValueError(f"converting from an OCI based image {args.SOURCE} is not supported")
target = shortnames.resolve(args.TARGET)
if not target:
target = args.TARGET
Expand Down Expand Up @@ -1173,9 +1179,14 @@ def serve_cli(args):
model.ensure_model_exists(args)
except KeyError as e:
try:
if "://" in args.MODEL:
raise e
args.quiet = True
model = TransportFactory(args.MODEL, args, ignore_stderr=True).create_oci()
model.ensure_model_exists(args)
// Since this is a OCI model, prepend oci://
args.MODEL = f"oci://{args.MODEL}"

except Exception:
raise e

Expand Down Expand Up @@ -1425,7 +1436,7 @@ def _rm_model(models, args):
try:
m = New(model, args)
m.remove(args)
except KeyError as e:
except (KeyError, subprocess.CalledProcessError) as e:
for prefix in MODEL_TYPES:
if model.startswith(prefix + "://"):
if not args.ignore:
Expand Down
2 changes: 1 addition & 1 deletion ramalama/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ def verify_checksum(filename: str) -> bool:


def genname():
return "ramalama_" + "".join(random.choices(string.ascii_letters + string.digits, k=10))
return "ramalama-" + "".join(random.choices(string.ascii_letters + string.digits, k=10))


def engine_version(engine: SUPPORTED_ENGINES) -> str:
Expand Down
1 change: 1 addition & 0 deletions ramalama/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ class BaseConfig:
carimage: str = "registry.access.redhat.com/ubi10-micro:latest"
container: bool = None # type: ignore
ctx_size: int = 0
convert_type: Literal["artifact", "car", "raw"] = "raw"
default_image: str = DEFAULT_IMAGE
default_rag_image: str = DEFAULT_RAG_IMAGE
dryrun: bool = False
Expand Down
20 changes: 12 additions & 8 deletions ramalama/kube.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
from typing import Optional, Tuple

from ramalama.common import MNT_DIR, RAG_DIR, genname, get_accel_env_vars
from ramalama.common import MNT_DIR, RAG_DIR, get_accel_env_vars
from ramalama.file import PlainFile
from ramalama.version import version

Expand All @@ -15,6 +15,7 @@ def __init__(
mmproj_paths: Optional[Tuple[str, str]],
args,
exec_args,
artifact,
):
self.src_model_path, self.dest_model_path = model_paths
self.src_chat_template_path, self.dest_chat_template_path = (
Expand All @@ -27,27 +28,30 @@ def __init__(
if getattr(args, "name", None):
self.name = args.name
else:
self.name = genname()
self.name = "ramalama"

self.args = args
self.exec_args = exec_args
self.image = args.image
self.artifact = artifact

def _gen_volumes(self):
mounts = """\
volumeMounts:"""

volumes = """
volumes:"""

if os.path.exists(self.src_model_path):
m, v = self._gen_path_volume()
mounts += m
volumes += v
else:
subPath = ""
if not self.artifact:
subPath = """
subPath: /models"""
mounts += f"""
- mountPath: {MNT_DIR}
subPath: /models
- mountPath: {MNT_DIR}{subPath}
name: model"""
volumes += self._gen_oci_volume()

Expand Down Expand Up @@ -98,7 +102,7 @@ def _gen_path_volume(self):
def _gen_oci_volume(self):
return f"""
- image:
reference: {self.ai_image}
reference: {self.src_model_path}
pullPolicy: IfNotPresent
name: model"""

Expand Down Expand Up @@ -162,7 +166,7 @@ def __gen_env_vars():
for k, v in env_vars.items():
env_spec += f"""
- name: {k}
value: {v}"""
value: \"{v}\""""

return env_spec

Expand All @@ -177,7 +181,7 @@ def generate(self) -> PlainFile:
# it into Kubernetes.
#
# Created with ramalama-{_version}
apiVersion: v1
apiVersion: apps/v1
kind: Deployment
metadata:
name: {self.name}
Expand Down
95 changes: 77 additions & 18 deletions ramalama/oci_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,66 @@
ocilabeltype = "org.containers.type"


def engine_supports_manifest_attributes(engine):
def convert_from_human_readable_size(input) -> float:
sizes = [("KB", 1024), ("MB", 1024**2), ("GB", 1024**3), ("TB", 1024**4), ("B", 1)]
for unit, size in sizes:
if input.endswith(unit) or input.endswith(unit.lower()):
return float(input[: -len(unit)]) * size

return float(input)


def list_artifacts(args: EngineArgType):
if args.engine == "docker":
return []

conman_args = [
args.engine,
"artifact",
"ls",
"--format",
(
'{"name":"oci://{{ .Repository }}:{{ .Tag }}",\
"created":"{{ .CreatedAt }}", \
"size":"{{ .Size }}", \
"ID":"{{ .Digest }}"},'
),
]
output = run_cmd(conman_args).stdout.decode("utf-8").strip()
if output == "":
return []

artifacts = json.loads(f"[{output[:-1]}]")
models = []
for artifact in artifacts:
conman_args = [
args.engine,
"artifact",
"inspect",
artifact["ID"],
]
output = run_cmd(conman_args).stdout.decode("utf-8").strip()

if output == "":
continue
inspect = json.loads(output)
if "Manifest" not in inspect:
continue
if "artifactType" not in inspect["Manifest"]:
continue
if inspect["Manifest"]['artifactType'] != annotations.ArtifactTypeModelManifest:
continue
models += [
{
"name": artifact["name"],
"modified": artifact["created"],
"size": convert_from_human_readable_size(artifact["size"]),
}
]
return models


def engine_supports_manifest_attributes(engine) -> bool:
if not engine or engine == "" or engine == "docker":
return False
if engine == "podman" and engine_version(engine) < "5":
Expand Down Expand Up @@ -91,26 +150,26 @@ def list_models(args: EngineArgType):
"--format",
formatLine,
]
models = []
output = run_cmd(conman_args, env={"TZ": "UTC"}).stdout.decode("utf-8").strip()
if output == "":
return []

models = json.loads(f"[{output[:-1]}]")
# exclude dangling images having no tag (i.e. <none>:<none>)
models = [model for model in models if model["name"] != "oci://<none>:<none>"]

# Grab the size from the inspect command
if conman == "docker":
# grab the size from the inspect command
for model in models:
conman_args = [conman, "image", "inspect", model["id"], "--format", "{{.Size}}"]
output = run_cmd(conman_args).stdout.decode("utf-8").strip()
# convert the number value from the string output
model["size"] = int(output)
# drop the id from the model
del model["id"]
if output != "":
models += json.loads(f"[{output[:-1]}]")
# exclude dangling images having no tag (i.e. <none>:<none>)
models = [model for model in models if model["name"] != "oci://<none>:<none>"]

# Grab the size from the inspect command
if conman == "docker":
# grab the size from the inspect command
for model in models:
conman_args = [conman, "image", "inspect", model["id"], "--format", "{{.Size}}"]
output = run_cmd(conman_args).stdout.decode("utf-8").strip()
# convert the number value from the string output
model["size"] = int(output)
# drop the id from the model
del model["id"]

models += list_manifests(args)
models += list_artifacts(args)
Comment on lines 113 to +172
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: The models list is extended with both manifests and artifacts, which may result in duplicate entries if the same model exists in both forms.

Deduplicate models by name or ID to prevent listing the same model multiple times.

Suggested implementation:

    # Combine manifests and artifacts, then deduplicate by 'name'
    manifest_models = list_manifests(args)
    artifact_models = list_artifacts(args)
    combined_models = manifest_models + artifact_models

    # Deduplicate by 'name'
    seen_names = set()
    deduped_models = []
    for model in combined_models:
        model_name = model.get("name")
        if model_name and model_name not in seen_names:
            deduped_models.append(model)
            seen_names.add(model_name)

    models += deduped_models

    for model in models:
        # Convert to ISO 8601 format
  • If your models use a different unique key (e.g., "id" instead of "name"), replace "name" with the appropriate key in the deduplication logic.
  • If models is not empty before this block, you may want to deduplicate the entire list (including existing entries).

for model in models:
# Convert to ISO 8601 format
parsed_date = datetime.fromisoformat(
Expand Down
19 changes: 14 additions & 5 deletions ramalama/quadlet.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def __init__(
mmproj_path: Optional[Tuple[str, str]],
args,
exec_args,
artifact: bool
):
self.src_model_path, self.dest_model_path = model_paths
self.src_chat_template_path, self.dest_chat_template_path = (
Expand All @@ -33,6 +34,7 @@ def __init__(
self.name = model_name

self.args = args
self.artifact = artifact
self.exec_args = exec_args
self.image = args.image
self.rag = ""
Expand Down Expand Up @@ -147,11 +149,18 @@ def _gen_model_volume(self, quadlet_file: UnitFile):

files.append(self._gen_image(self.name, self.ai_image))

quadlet_file.add(
"Container",
"Mount",
f"type=image,source={self.ai_image},destination={MNT_DIR},subpath=/models,readwrite=false",
)
if self.artifact:
quadlet_file.add(
"Container",
"Mount",
f"type=artifact,source={self.src_model_path},destination={MNT_DIR}",
)
else:
quadlet_file.add(
"Container",
"Mount",
f"type=image,source={self.src_model_path},destination={MNT_DIR},subpath=/models,readwrite=false",
)
return files

def _gen_port(self, quadlet_file: UnitFile):
Expand Down
Loading
Loading