Merge pull request #1706 from rhatdan/chat

rhatdan · web-flow · commit fbec47445661 · 2025-07-21T08:08:20.000-04:00
Consolodate run and chat commands together also allow specification of prefix in ramalama.conf
diff --git a/docs/ramalama.conf b/docs/ramalama.conf
@@ -77,6 +77,16 @@
 #
 #port = "8080"
 
+# Specify default prefix for chat and run command. By default the prefix
+# is based on the container engine used.
+# Podman:           "🦭 > "
+# Docker:           "🐋 > "
+# No Engine:        "🦙 > "
+# No IMOGI support: "> "
+#
+#
+#prefix = ""
+
 # Specify default pull policy for OCI Images
 #
 # **always**: Always pull the image and throw an error if the pull fails.
diff --git a/docs/ramalama.conf.5.md b/docs/ramalama.conf.5.md
@@ -119,6 +119,17 @@ In some cases this is needed to access the gpu from a rootless container
 number of gpu layers, 0 means CPU inferencing, 999 means use max layers (default: -1)
 The default -1, means use whatever is automatically deemed appropriate (0 or 999)
 
+**prefix**=""
+Specify default prefix for chat and run command. By default the prefix
+is based on the container engine used.
+
+| Container Engine| Prefix  |
+| --------------- | ------- |
+| Podman          | "🦭 > " |
+| Docker          | "🐋 > " |
+| No Engine       | "🦙 > " |
+| No EMOJI support| "> "    |
+
 **port**="8080"
 
 Specify default port for services to listen on
diff --git a/ramalama/chat.py b/ramalama/chat.py
@@ -52,6 +52,9 @@ def default_prefix():
     if not EMOJI:
         return "> "
 
+    if CONFIG.prefix:
+        return CONFIG.prefix
+
     engine = CONFIG.engine
 
     if engine:
diff --git a/ramalama/cli.py b/ramalama/cli.py
@@ -852,7 +852,7 @@ def runtime_options(parser, command):
         choices=["always", "missing", "never", "newer"],
         help='pull image policy',
     )
-    if command in ["run", "serve"]:
+    if command in ["serve"]:
         parser.add_argument(
             "--rag", help="RAG vector database or OCI Image to be served with the model", completer=local_models
         )
@@ -921,6 +921,18 @@ def default_threads():
     return CONFIG.threads
 
 
+def chat_run_options(parser):
+    parser.add_argument(
+        '--color',
+        '--colour',
+        default="auto",
+        choices=get_args(COLOR_OPTIONS),
+        help='possible values are "never", "always" and "auto".',
+    )
+    parser.add_argument("--prefix", type=str, help="prefix for the user prompt", default=default_prefix())
+    parser.add_argument("--rag", type=str, help="a file or directory to use as context for the chat")
+
+
 def chat_parser(subparsers):
     parser = subparsers.add_parser("chat", help="OpenAI chat with the specified RESTAPI URL")
     parser.add_argument(
@@ -929,21 +941,13 @@ def chat_parser(subparsers):
         default=os.getenv("API_KEY"),
         help="OpenAI-compatible API key. Can also be set via the API_KEY environment variable.",
     )
-    parser.add_argument(
-        '--color',
-        '--colour',
-        default="auto",
-        choices=get_args(COLOR_OPTIONS),
-        help='possible values are "never", "always" and "auto".',
-    )
+    chat_run_options(parser)
     parser.add_argument(
         "--list",
         "--ls",
         action="store_true",
         help="list the available models at an endpoint",
     )
-    parser.add_argument("--prefix", type=str, help="prefix for the user prompt", default=default_prefix())
-    parser.add_argument("--rag", type=str, help="a file or directory to use as context for the chat")
     parser.add_argument("--url", type=str, default="http://127.0.0.1:8080/v1", help="the url to send requests to")
     parser.add_argument("--model", "-m", type=str, completer=local_models, help="model for inferencing")
     parser.add_argument(
@@ -955,14 +959,7 @@ def chat_parser(subparsers):
 def run_parser(subparsers):
     parser = subparsers.add_parser("run", help="run specified AI Model as a chatbot")
     runtime_options(parser, "run")
-    parser.add_argument(
-        '--color',
-        '--colour',
-        default="auto",
-        choices=get_args(COLOR_OPTIONS),
-        help='possible values are "never", "always" and "auto".',
-    )
-    parser.add_argument("--prefix", type=str, help="prefix for the user prompt", default=default_prefix())
+    chat_run_options(parser)
     parser.add_argument("MODEL", completer=local_models)  # positional argument
 
     parser.add_argument(
diff --git a/ramalama/config.py b/ramalama/config.py
@@ -65,13 +65,16 @@ class RamalamaSettings:
 
 @dataclass
 class BaseConfig:
-    container: bool = None  # type: ignore
-    image: str = None  # type: ignore
+    api: str = "none"
     carimage: str = "registry.access.redhat.com/ubi10-micro:latest"
+    container: bool = None  # type: ignore
     ctx_size: int = 2048
+    default_image: str = DEFAULT_IMAGE
+    dryrun: bool = False
     engine: SUPPORTED_ENGINES | None = field(default_factory=get_default_engine)
     env: list[str] = field(default_factory=list)
     host: str = "0.0.0.0"
+    image: str = None  # type: ignore
     images: dict[str, str] = field(
         default_factory=lambda: {
             "ASAHI_VISIBLE_DEVICES": "quay.io/ramalama/asahi",
@@ -83,23 +86,21 @@ class BaseConfig:
             "MUSA_VISIBLE_DEVICES": "quay.io/ramalama/musa",
         }
     )
-    api: str = "none"
     keep_groups: bool = False
     ngl: int = -1
-    threads: int = -1
+    ocr: bool = False
     port: str = str(DEFAULT_PORT)
+    prefix: str = None  # type: ignore
     pull: str = "newer"
     rag_format: Literal["qdrant", "json", "markdown", "milvus"] = "qdrant"
     runtime: SUPPORTED_RUNTIMES = "llama.cpp"
+    selinux: bool = False
+    settings: RamalamaSettings = field(default_factory=RamalamaSettings)
     store: str = field(default_factory=get_default_store)
     temp: str = "0.8"
+    threads: int = -1
     transport: str = "ollama"
-    ocr: bool = False
-    default_image: str = DEFAULT_IMAGE
     user: UserConfig = field(default_factory=UserConfig)
-    selinux: bool = False
-    dryrun: bool = False
-    settings: RamalamaSettings = field(default_factory=RamalamaSettings)
 
     def __post_init__(self):
         self.container = coerce_to_bool(self.container) if self.container is not None else self.engine is not None