From fb366ea1142d6c251114131262c8a355ba080b93 Mon Sep 17 00:00:00 2001
From: Seungjae Yoo <seungjaeyoo@google.com>
Date: Mon, 18 May 2026 10:15:56 +0900
Subject: [PATCH] Update podcvd skill, and include more evaluations

---
 container/src/podcvd/skill/EVAL.yaml | 87 ++++++++++++++++++++++++++--
 container/src/podcvd/skill/SKILL.md  | 29 ++++++++++
 2 files changed, 112 insertions(+), 4 deletions(-)

diff --git a/container/src/podcvd/skill/EVAL.yaml b/container/src/podcvd/skill/EVAL.yaml
index 4f09c445d02..613ec330775 100644
--- a/container/src/podcvd/skill/EVAL.yaml
+++ b/container/src/podcvd/skill/EVAL.yaml
@@ -4,13 +4,17 @@ max_tool_calls: 80
 
 cases:
   - name: podcvd_lifecycle_single_create_and_reset
-    prompt: >
+    prompt: |
       Create a single Cuttlefish instance group. Sequentially perform these multi-layered verifications:
       1. Extract the assigned group name, ADB connection address, and truncated base Web UI URL.
       2. Execute fleet to ensure the group is actively registered.
       3. Execute `adb devices -l` to confirm the ADB device is attached.
       4. Execute a `curl` command targeting the base Web UI URL to verify dashboard accessibility.
       Finally, perform a global clear to reset all states globally.
+
+      WARNING:
+      - Must reply all outputs into the chat, instead of creating files to get review. It's not available to read those files.
+      - Must describe all commands executed to the output.
     skills: [podcvd]
     rubric: |
       The LLM evaluator must verify that the agent strictly follows this complete end-to-end agentic sequence:
@@ -23,8 +27,9 @@ cases:
       4. ADB Bridge Verification: Executes `adb devices -l` to explicitly confirm the Cuttlefish instance is attached as a valid device.
       5. Application Verification: Executes a `curl` command against the extracted Web UI endpoint to verify HTTP reachability.
       6. Reset: Executes `scripts/podcvd_executor.sh clear` to purge all resources globally.
-    expect_keywords_any:
+    expect_keywords:
       - "scripts/podcvd_executor.sh"
+      - "create"
       - "--vhost_user_vsock=true"
       - "--report_anonymous_usage_stats=n"
       - "fleet"
@@ -33,13 +38,17 @@ cases:
       - "clear"
 
   - name: podcvd_lifecycle_bulk_create_and_dynamic_teardown
-    prompt: >
+    prompt: |
       Create two Cuttlefish instance groups in parallel. Sequentially perform these verifications for both distinct groups:
       1. Report their identifiers, ADB addresses, and truncated Web UI URLs.
       2. Execute fleet to monitor their active registration.
       3. Execute `adb devices -l` to confirm both ADB connection states are present.
       4. Execute `curl` requests to verify that both Web UI endpoints are network-accessible.
       Finally, remove both groups concurrently by dynamically injecting their extracted identifiers.
+
+      WARNING:
+      - Must reply all outputs into the chat, instead of creating files to get review. It's not available to read those files.
+      - Must describe all commands executed to the output.
     skills: [podcvd]
     rubric: |
       The LLM evaluator must verify native task concurrency, multi-layered checks, and dynamic state injection:
@@ -52,11 +61,81 @@ cases:
       4. ADB Bridge Monitoring: Executes `adb devices -l` to confirm multiple distinct IP:port attachments.
       5. Network Monitoring: Executes `curl` commands to proactively verify that both independent Web UI dashboards respond.
       6. Dynamic Teardown: Concurrently launches independent `remove` commands. The agent MUST dynamically inject the actual group name strings extracted from the creation steps into the `--group_name` arguments (Format: `--group_name=<dynamically_parsed_string>`, Example Value: `--group_name=cvd_1`). Hardcoding target strings is strictly forbidden.
-    expect_keywords_any:
+    expect_keywords:
       - "scripts/podcvd_executor.sh"
+      - "create"
       - "--vhost_user_vsock=true"
       - "--report_anonymous_usage_stats=n"
       - "fleet"
       - "adb"
       - "curl"
       - "remove"
+
+  - name: podcvd_lifecycle_stop_and_start
+    prompt: |
+      Create a single Cuttlefish instance group. Sequentially perform these state transition verifications:
+      1. Extract the assigned group name, ADB connection address, and truncated base Web UI URL.
+      2. Stop the running group.
+      3. Execute fleet or adb devices -l to ensure the group is no longer active or attached.
+      4. Start the stopped group again.
+      5. Execute fleet and adb devices -l to confirm the group is active and the ADB device is re-attached.
+      Finally, remove the group to clean up resources.
+
+      WARNING:
+      - Must reply all outputs into the chat, instead of creating files to get review. It's not available to read those files.
+      - Must describe all commands executed to the output.
+    skills: [podcvd]
+    rubric: |
+      The LLM evaluator must verify that the agent strictly follows this state transition sequence:
+      1. Creation: Executes `scripts/podcvd_executor.sh create` with mandatory flags `--vhost_user_vsock=true` and `--report_anonymous_usage_stats=n`. Direct `podcvd` usage is prohibited.
+      2. Extraction: Parses outputs to explicitly display connection details. The evaluator must enforce the exact structural Formats below, while treating the Example Values strictly as illustrative references:
+         - Group Name ➔ Format: simple identifier string (Example Value: `cvd_1`)
+         - ADB Connection ➔ Format: `IP:PORT` (Example Value: `192.168.80.1:6520`)
+         - Web UI Endpoint ➔ Format: `https://IP:PORT` strictly stripping all specific internal paths (Example Value: `https://192.168.80.1:11443`)
+      3. Stop: Executes `scripts/podcvd_executor.sh --group_name=<group_name> stop` to stop the running group. The agent MUST dynamically inject the actual group name string extracted from the creation step (Format: `--group_name=<dynamically_parsed_string>`).
+      4. Disconnection Verification: Executes `scripts/podcvd_executor.sh fleet` or `adb devices -l` to confirm the device is stopped/disconnected.
+      5. Start: Executes `scripts/podcvd_executor.sh --group_name=<group_name> start` to restart the stopped group. The agent MUST dynamically inject the actual group name string extracted from the creation step (Format: `--group_name=<dynamically_parsed_string>`).
+      6. Re-connection Verification: Executes `scripts/podcvd_executor.sh fleet` and `adb devices -l` to confirm the Cuttlefish instance is active and attached again.
+      7. Teardown: Executes `scripts/podcvd_executor.sh --group_name=<group_name> remove` to clean up resources. The agent MUST dynamically inject the actual group name string (Format: `--group_name=<dynamically_parsed_string>`).
+    expect_keywords:
+      - "scripts/podcvd_executor.sh"
+      - "create"
+      - "--vhost_user_vsock=true"
+      - "--report_anonymous_usage_stats=n"
+      - "stop"
+      - "start"
+      - "fleet"
+      - "adb"
+      - "remove"
+
+  - name: podcvd_lifecycle_restart
+    prompt: |
+      Create a single Cuttlefish instance group. Sequentially perform these restart verifications:
+      1. Extract the assigned group name, ADB connection address, and truncated base Web UI URL.
+      2. Restart the running group.
+      3. Execute fleet and adb devices -l to confirm the group is active and the ADB device is re-attached.
+      Finally, remove the group to clean up resources.
+
+      WARNING:
+      - Must reply all outputs into the chat, instead of creating files to get review. It's not available to read those files.
+      - Must describe all commands executed to the output.
+    skills: [podcvd]
+    rubric: |
+      The LLM evaluator must verify that the agent strictly follows this restart sequence:
+      1. Creation: Executes `scripts/podcvd_executor.sh create` with mandatory flags `--vhost_user_vsock=true` and `--report_anonymous_usage_stats=n`. Direct `podcvd` usage is prohibited.
+      2. Extraction: Parses outputs to explicitly display connection details. The evaluator must enforce the exact structural Formats below, while treating the Example Values strictly as illustrative references:
+         - Group Name ➔ Format: simple identifier string (Example Value: `cvd_1`)
+         - ADB Connection ➔ Format: `IP:PORT` (Example Value: `192.168.80.1:6520`)
+         - Web UI Endpoint ➔ Format: `https://IP:PORT` strictly stripping all specific internal paths (Example Value: `https://192.168.80.1:11443`)
+      3. Restart: Executes `scripts/podcvd_executor.sh --group_name=<group_name> restart` to restart the running group. The agent MUST dynamically inject the actual group name string extracted from the creation step (Format: `--group_name=<dynamically_parsed_string>`).
+      4. Re-connection Verification: Executes `scripts/podcvd_executor.sh fleet` and `adb devices -l` to confirm the Cuttlefish instance is active and attached again after restart.
+      5. Teardown: Executes `scripts/podcvd_executor.sh --group_name=<group_name> remove` to clean up resources. The agent MUST dynamically inject the actual group name string (Format: `--group_name=<dynamically_parsed_string>`).
+    expect_keywords:
+      - "scripts/podcvd_executor.sh"
+      - "create"
+      - "--vhost_user_vsock=true"
+      - "--report_anonymous_usage_stats=n"
+      - "restart"
+      - "fleet"
+      - "adb"
+      - "remove"
diff --git a/container/src/podcvd/skill/SKILL.md b/container/src/podcvd/skill/SKILL.md
index c0e5dbb7cde..cf8927a7c52 100644
--- a/container/src/podcvd/skill/SKILL.md
+++ b/container/src/podcvd/skill/SKILL.md
@@ -10,6 +10,20 @@ This skill fully orchestrates the lifecycle of Cuttlefish instances and their gr
 > [!WARNING]
 > **Exclusive ADB Connection Control**: This skill internally manages all required ADB connections when creating, removing, or clearing instances. You **MUST NOT** invoke external skills, MCP servers, or secondary tools to establish ADB connections to these Cuttlefish instances, as doing so will cause system conflicts.
 
+> [!WARNING]
+> **Strict Isolation from `cvd`**: `podcvd` and standard `cvd` do not share state or information. Mixing their usage will cause inconsistent states and system errors.
+> * **MUST NOT** use standard `cvd` commands if this `podcvd` skill has been used in the environment.
+> * **MUST NOT** use this `podcvd` skill if standard `cvd` was previously used to manage instances.
+> * **Why Instances May Seem Missing**: Because they do not share state, instances created via standard `cvd` will **not** be visible to `podcvd` (e.g., `podcvd fleet` will show no instances), and vice versa. If you cannot find an expected Cuttlefish instance, verify which tool was used to create it.
+
+## Prerequisites
+
+* **Package Requirement**: `podcvd` is delivered via the `cuttlefish-podcvd` Debian package.
+* **Initial Setup**: To use `podcvd` successfully, the `podcvd-setup` binary (installed with the package) **MUST** be executed at least once after installation to perform the necessary initialization.
+* **Required Build Artifacts**: Launching Cuttlefish instances requires appropriate Cuttlefish host tools (host binaries and libraries) and Android device images (such as system.img, boot.img, and vendor.img):
+  * **Inside an Android Workspace**: You must initialize the environment (e.g., `lunch <target>`) and build the required targets (e.g., `m`) to ensure that valid Cuttlefish host tools (inside `ANDROID_HOST_OUT`) and Android device images (inside `ANDROID_PRODUCT_OUT`) are generated.
+  * **Outside an Android Workspace**: If you are not in an active build environment, you must ensure that compatible, pre-built Cuttlefish host tools and Android device images are already present in the working directory or are otherwise accessible via the relevant environment paths (like `ANDROID_HOST_OUT` and `ANDROID_PRODUCT_OUT`).
+
 ## Mandatory Execution Rule (Critical)
 
 You **MUST NOT** call the `podcvd` binary directly.
@@ -73,6 +87,13 @@ Only assemble custom command lines if the user explicitly requests non-standard
 ./scripts/podcvd_executor.sh [podcvd_flags...] <subcommand> [subcommand_flags...]
 ```
 
+To understand advanced setup options and detailed command usage, you should query the tool's built-in help directly:
+```bash
+./scripts/podcvd_executor.sh help
+# or for specific subcommands:
+./scripts/podcvd_executor.sh <subcommand> --help
+```
+
 ## Integrated Web UI Monitoring Tip (Highly Recommended)
 
 Executing the `create` or `fleet` subcommands returns a JSON output containing instance configuration details. Within this JSON, you will find a **`web_access`** field.
@@ -86,3 +107,11 @@ If the JSON output returns:
 ```
 You must truncate the path and provide the user with this exact link:
 **`https://192.168.80.1:1443`**
+
+## Current Limitations & Temporarily Unsupported Features (Critical)
+
+Please be aware of the following **current** technical limitations when operating Cuttlefish instances via `podcvd`. Note that these features are actively being worked on and may be supported in future updates:
+
+* **Temporarily Unimplemented Subcommands**: Some `cvd` subcommands listed in the help output are not yet fully implemented and will explicitly return errors if executed in the current container environment.
+* **GPU Acceleration Not Yet Supported**: GPU hardware acceleration is **not yet supported**. Instances currently run with software rendering.
+* **Temporarily Restricted Filesystem Flag Support**: Flag support for custom filesystem paths is currently highly restricted. While paths residing within standard `ANDROID_HOST_OUT` and `ANDROID_PRODUCT_OUT` are exceptionally handled and fully supported, specifying advanced flags to point to any other custom directory paths (for example, setting custom kernel image paths outside these pre-configured directories) will likely fail. Avoid using advanced filesystem/path-redirection flags for non-standard paths until support is fully introduced.