containers · rhatdan · Oct 27, 2025 · sourcery-ai · Oct 20, 2025
@@ -39,14 +39,15 @@ Image to use when converting to GGUF format (when then `--gguf` option has been
 executable and available in the `PATH`. The script is available from the `llama.cpp` GitHub repo. Defaults to the current
 `quay.io/ramalama/ramalama-rag` image.
 
-#### **--type**=*raw* | *car*
+#### **--type**="artifact" | *raw* | *car*
 
-type of OCI Model Image to convert.
+Convert the MODEL to the specified OCI Object
 
-| Type | Description                                                   |
-| ---- | ------------------------------------------------------------- |
-| car  | Includes base image with the model stored in a /models subdir |
-| raw  | Only the model and a link file model.file to it stored at /   |
+| Type     | Description                                                   |
+| -------- | ------------------------------------------------------------- |
+| artifact | Store AI Models as artifacts                                  |
+| car      | Traditional OCI image including base image with the model stored in a /models subdir |
+| raw      | Traditional OCI image including only the model and a link file `model.file` pointed at it stored at /   |
 
 ## EXAMPLE
 

@@ -32,6 +32,14 @@
 #
 #carimage = "registry.access.redhat.com/ubi10-micro:latest"
 
+# Convert the MODEL to the specified OCI Object
+# Options: artifact, car, raw
+#
+# artifact: Store AI Models as artifacts
+# car:      Traditional OCI image including base image with the model stored in a /models subdir
+# raw:      Traditional OCI image including only the model and a link file `model.file` pointed at it stored at /
+#convert_type = "raw"
+
 # Run RamaLama in the default container.
 #
 #container = true

@@ -84,6 +84,18 @@ Min chunk size to attempt reusing from the cache via KV shifting
 Run RamaLama in the default container.
 RAMALAMA_IN_CONTAINER environment variable overrides this field.
 
+#convert_type = "raw"
+
+Convert the MODEL to the specified OCI Object
+Options: artifact, car, raw
+
+| Type     | Description                                                   |
+| -------- | ------------------------------------------------------------- |
+| artifact | Store AI Models as artifacts                                  |
+| car      | Traditional OCI image including base image with the model stored in a /models subdir |
+| raw      | Traditional OCI image including only the model and a link file `model.file` pointed at it stored at /   |
+
+
 **ctx_size**=0
 
 Size of the prompt context (0 = loaded from model)

@@ -719,11 +719,12 @@ def convert_parser(subparsers):
     )
     parser.add_argument(
         "--type",
-        default="raw",
-        choices=["car", "raw"],
+        default=CONFIG.convert_type,
+        choices=["artifact", "car", "raw"],
         help="""\
 type of OCI Model Image to push.
 
+Model "artifact" stores the AI Model as an OCI Artifact.
 Model "car" includes base image with the model stored in a /models subdir.
 Model "raw" contains the model and a link file model.file to it stored at /.""",
     )
@@ -762,11 +763,12 @@ def push_parser(subparsers):
     add_network_argument(parser)
     parser.add_argument(
         "--type",
-        default="raw",
-        choices=["car", "raw"],
+        default=CONFIG.convert_type,
+        choices=["artifact", "car", "raw"],
         help="""\
 type of OCI Model Image to push.
 
+Model "artifact" stores the AI Model as an OCI Artifact.
 Model "car" includes base image with the model stored in a /models subdir.
 Model "raw" contains the model and a link file model.file to it stored at /.""",
     )
@@ -781,22 +783,26 @@ def push_parser(subparsers):
     parser.set_defaults(func=push_cli)
 
 
-def _get_source_model(args):
+def _get_source_model(args, transport=None):
     src = shortnames.resolve(args.SOURCE)
     if not src:
         src = args.SOURCE
-    smodel = New(src, args)
-    if smodel.type == "OCI":
-        raise ValueError(f"converting from an OCI based image {src} is not supported")
+    smodel = New(src, args, transport=transport)
     if not smodel.exists() and not args.dryrun:
         smodel.pull(args)
     return smodel
 
 
 def push_cli(args):
-    source_model = _get_source_model(args)
     target = args.SOURCE
+    transport = None
+    if not args.TARGET:
+        transport = "oci"
+    source_model = _get_source_model(args, transport=transport)
+
     if args.TARGET:
+        if source_model.type == "OCI":
+            raise ValueError(f"converting from an OCI based image {args.SOURCE} is not supported")
         target = shortnames.resolve(args.TARGET)
         if not target:
             target = args.TARGET
@@ -1173,9 +1179,14 @@ def serve_cli(args):
         model.ensure_model_exists(args)
     except KeyError as e:
         try:
+            if "://" in args.MODEL:
+                raise e
             args.quiet = True
             model = TransportFactory(args.MODEL, args, ignore_stderr=True).create_oci()
             model.ensure_model_exists(args)
+            // Since this is a OCI model, prepend oci://
+            args.MODEL = f"oci://{args.MODEL}"
+
         except Exception:
             raise e
 
@@ -1425,7 +1436,7 @@ def _rm_model(models, args):
         try:
             m = New(model, args)
             m.remove(args)
-        except KeyError as e:
+        except (KeyError, subprocess.CalledProcessError) as e:
             for prefix in MODEL_TYPES:
                 if model.startswith(prefix + "://"):
                     if not args.ignore:

@@ -287,7 +287,7 @@ def verify_checksum(filename: str) -> bool:
 
 
 def genname():
-    return "ramalama_" + "".join(random.choices(string.ascii_letters + string.digits, k=10))
+    return "ramalama-" + "".join(random.choices(string.ascii_letters + string.digits, k=10))
 
 
 def engine_version(engine: SUPPORTED_ENGINES) -> str:

@@ -134,6 +134,7 @@ class BaseConfig:
     carimage: str = "registry.access.redhat.com/ubi10-micro:latest"
     container: bool = None  # type: ignore
     ctx_size: int = 0
+    convert_type: Literal["artifact", "car", "raw"] = "raw"
     default_image: str = DEFAULT_IMAGE
     default_rag_image: str = DEFAULT_RAG_IMAGE
     dryrun: bool = False

@@ -1,7 +1,7 @@
 import os
 from typing import Optional, Tuple
 
-from ramalama.common import MNT_DIR, RAG_DIR, genname, get_accel_env_vars
+from ramalama.common import MNT_DIR, RAG_DIR, get_accel_env_vars
 from ramalama.file import PlainFile
 from ramalama.version import version
 
@@ -15,6 +15,7 @@ def __init__(
         mmproj_paths: Optional[Tuple[str, str]],
         args,
         exec_args,
+        artifact,
     ):
         self.src_model_path, self.dest_model_path = model_paths
         self.src_chat_template_path, self.dest_chat_template_path = (
@@ -27,27 +28,30 @@ def __init__(
         if getattr(args, "name", None):
             self.name = args.name
         else:
-            self.name = genname()
+            self.name = "ramalama"
 
         self.args = args
         self.exec_args = exec_args
         self.image = args.image
+        self.artifact = artifact
 
     def _gen_volumes(self):
         mounts = """\
         volumeMounts:"""
 
         volumes = """
       volumes:"""
-
         if os.path.exists(self.src_model_path):
             m, v = self._gen_path_volume()
             mounts += m
             volumes += v
         else:
+            subPath = ""
+            if not self.artifact:
+                subPath = """
+          subPath: /models"""
             mounts += f"""
-        - mountPath: {MNT_DIR}
-          subPath: /models
+        - mountPath: {MNT_DIR}{subPath}
           name: model"""
             volumes += self._gen_oci_volume()
 
@@ -98,7 +102,7 @@ def _gen_path_volume(self):
     def _gen_oci_volume(self):
         return f"""
       - image:
-          reference: {self.ai_image}
+          reference: {self.src_model_path}
           pullPolicy: IfNotPresent
         name: model"""
 
@@ -162,7 +166,7 @@ def __gen_env_vars():
         for k, v in env_vars.items():
             env_spec += f"""
         - name: {k}
-          value: {v}"""
+          value: \"{v}\""""
 
         return env_spec
 
@@ -177,7 +181,7 @@ def generate(self) -> PlainFile:
 # it into Kubernetes.
 #
 # Created with ramalama-{_version}
-apiVersion: v1
+apiVersion: apps/v1
 kind: Deployment
 metadata:
   name: {self.name}

@@ -8,7 +8,66 @@
 ocilabeltype = "org.containers.type"
 
 
-def engine_supports_manifest_attributes(engine):
+def convert_from_human_readable_size(input) -> float:
+    sizes = [("KB", 1024), ("MB", 1024**2), ("GB", 1024**3), ("TB", 1024**4), ("B", 1)]
+    for unit, size in sizes:
+        if input.endswith(unit) or input.endswith(unit.lower()):
+            return float(input[: -len(unit)]) * size
+
+    return float(input)
+
+
+def list_artifacts(args: EngineArgType):
+    if args.engine == "docker":
+        return []
+
+    conman_args = [
+        args.engine,
+        "artifact",
+        "ls",
+        "--format",
+        (
+            '{"name":"oci://{{ .Repository }}:{{ .Tag }}",\
+            "created":"{{ .CreatedAt }}", \
+            "size":"{{ .Size }}", \
+            "ID":"{{ .Digest }}"},'
+        ),
+    ]
+    output = run_cmd(conman_args).stdout.decode("utf-8").strip()
+    if output == "":
+        return []
+
+    artifacts = json.loads(f"[{output[:-1]}]")
+    models = []
+    for artifact in artifacts:
+        conman_args = [
+            args.engine,
+            "artifact",
+            "inspect",
+            artifact["ID"],
+        ]
+        output = run_cmd(conman_args).stdout.decode("utf-8").strip()
+
+        if output == "":
+            continue
+        inspect = json.loads(output)
+        if "Manifest" not in inspect:
+            continue
+        if "artifactType" not in inspect["Manifest"]:
+            continue
+        if inspect["Manifest"]['artifactType'] != annotations.ArtifactTypeModelManifest:
+            continue
+        models += [
+            {
+                "name": artifact["name"],
+                "modified": artifact["created"],
+                "size": convert_from_human_readable_size(artifact["size"]),
+            }
+        ]
+    return models
+
+
+def engine_supports_manifest_attributes(engine) -> bool:
     if not engine or engine == "" or engine == "docker":
         return False
     if engine == "podman" and engine_version(engine) < "5":
@@ -91,26 +150,26 @@ def list_models(args: EngineArgType):
         "--format",
         formatLine,
     ]
+    models = []
     output = run_cmd(conman_args, env={"TZ": "UTC"}).stdout.decode("utf-8").strip()
-    if output == "":
-        return []
-
-    models = json.loads(f"[{output[:-1]}]")
-    # exclude dangling images having no tag (i.e. <none>:<none>)
-    models = [model for model in models if model["name"] != "oci://<none>:<none>"]
-
-    # Grab the size from the inspect command
-    if conman == "docker":
-        # grab the size from the inspect command
-        for model in models:
-            conman_args = [conman, "image", "inspect", model["id"], "--format", "{{.Size}}"]
-            output = run_cmd(conman_args).stdout.decode("utf-8").strip()
-            # convert the number value from the string output
-            model["size"] = int(output)
-            # drop the id from the model
-            del model["id"]
+    if output != "":
+        models += json.loads(f"[{output[:-1]}]")
+        # exclude dangling images having no tag (i.e. <none>:<none>)
+        models = [model for model in models if model["name"] != "oci://<none>:<none>"]
+
+        # Grab the size from the inspect command
+        if conman == "docker":
+            # grab the size from the inspect command
+            for model in models:
+                conman_args = [conman, "image", "inspect", model["id"], "--format", "{{.Size}}"]
+                output = run_cmd(conman_args).stdout.decode("utf-8").strip()
+                # convert the number value from the string output
+                model["size"] = int(output)
+                # drop the id from the model
+                del model["id"]
 
     models += list_manifests(args)
+    models += list_artifacts(args)
     for model in models:
         # Convert to ISO 8601 format
         parsed_date = datetime.fromisoformat(

@@ -15,6 +15,7 @@ def __init__(
         mmproj_path: Optional[Tuple[str, str]],
         args,
         exec_args,
+        artifact: bool
     ):
         self.src_model_path, self.dest_model_path = model_paths
         self.src_chat_template_path, self.dest_chat_template_path = (
@@ -33,6 +34,7 @@ def __init__(
             self.name = model_name
 
         self.args = args
+        self.artifact = artifact
         self.exec_args = exec_args
         self.image = args.image
         self.rag = ""
@@ -147,11 +149,18 @@ def _gen_model_volume(self, quadlet_file: UnitFile):
 
         files.append(self._gen_image(self.name, self.ai_image))
 
-        quadlet_file.add(
-            "Container",
-            "Mount",
-            f"type=image,source={self.ai_image},destination={MNT_DIR},subpath=/models,readwrite=false",
-        )
+        if self.artifact:
+            quadlet_file.add(
+                "Container",
+                "Mount",
+                f"type=artifact,source={self.src_model_path},destination={MNT_DIR}",
+            )
+        else:
+            quadlet_file.add(
+                "Container",
+                "Mount",
+                f"type=image,source={self.src_model_path},destination={MNT_DIR},subpath=/models,readwrite=false",
+            )
         return files
 
     def _gen_port(self, quadlet_file: UnitFile):