From 8c154bb700319e185a472968265912bb236a9ed7 Mon Sep 17 00:00:00 2001 From: Aditya Gupta Date: Thu, 19 Mar 2026 16:33:32 -0400 Subject: [PATCH 01/23] Making changes --- pufferlib/ocean/drive/binding.c | 1 + pufferlib/ocean/drive/drive.h | 113 ++++++++++++++++++++++++++++---- pufferlib/ocean/drive/drive.py | 20 +++++- pufferlib/ocean/env_binding.h | 62 +++++++++++++++--- pufferlib/ocean/env_config.h | 1 + 5 files changed, 172 insertions(+), 25 deletions(-) diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c index 75189f8dd..8de367de4 100644 --- a/pufferlib/ocean/drive/binding.c +++ b/pufferlib/ocean/drive/binding.c @@ -369,6 +369,7 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) { env->min_goal_distance = (float)unpack(kwargs, "min_goal_distance"); env->max_goal_distance = (float)unpack(kwargs, "max_goal_distance"); env->goal_radius = (float)unpack(kwargs, "goal_radius"); + env->render_mode = (int)unpack(kwargs, "render_mode"); env->min_goal_speed = (float)unpack(kwargs, "min_goal_speed"); env->max_goal_speed = (float)unpack(kwargs, "max_goal_speed"); env->min_avg_speed_to_consider_goal_attempt = (float)unpack(kwargs, "min_avg_speed_to_consider_goal_attempt"); diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 7953617eb..ccd78653d 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -1,3 +1,4 @@ +#include #include #include #include @@ -13,6 +14,20 @@ #include "error.h" #include "datatypes.h" +#define RENDER_WINDOW 0 +#define RENDER_HEADLESS 1 + +// View modes +#define VIEW_MODE_SIM_STATE 0 +#define VIEW_MODE_BEV_AGENT_OBS 1 +#define VIEW_MODE_AGENT_PERSP 2 + +// // Order of entities in rendering (lower is rendered first) +// #define Z_ROAD_SURFACE 0.0f +// #define Z_ROAD_MARKINGS 0.05f // Lane lines, road lines, traces +// #define Z_AGENT_DETAILS 0.4f // Arrow, goal markers, obs overlays +// #define Z_AGENTS 0.6f // Vehicles, cyclists, pedestrians + // constants for strings, data etc. #define SCENARIO_ID_STR_LENGTH 16 @@ -118,8 +133,8 @@ #define MAX_CHECKED_LANES 32 // Ego features depend on dynamics model -#define EGO_FEATURES_CLASSIC 13 -#define EGO_FEATURES_JERK 16 +#define EGO_FEATURES_CLASSIC 14 +#define EGO_FEATURES_JERK 17 // Observation normalization constants #define MAX_SPEED 100.0f @@ -201,6 +216,13 @@ const Color LIGHTGREEN = (Color){152, 255, 152, 255}; const Color LIGHTYELLOW = (Color){255, 255, 152, 255}; const Color SOFT_YELLOW = (Color){245, 245, 220, 255}; +const Color LIGHTBLUE = (Color){167, 204, 255, 255}; +const Color DEEPBLUE = (Color){45, 112, 226, 255}; +const Color EXPERT_REPLAY = (Color){162, 220, 183, 255}; +const Color EXPERT_REPLAY_SMALL = (Color){95, 112, 93, 255}; +const Color LIGHT_ORANGE = (Color){255, 160, 80, 255}; +const Color LIGHT_PURPLE = (Color){204, 204, 255, 255}; + struct timespec ts; typedef struct AgentSpawnSettings AgentSpawnSettings; @@ -366,6 +388,7 @@ struct Drive { int reward_randomization; int reward_conditioning; int turn_off_normalization; + int render_mode; RewardBound reward_bounds[NUM_REWARD_COEFS]; float min_avg_speed_to_consider_goal_attempt; float partner_obs_radius; @@ -1468,6 +1491,11 @@ int collision_check(Drive *env, int agent_idx) { if (agent->sim_x == INVALID_POSITION) return -1; + // Skip collision checking for pedestrians because they are often too + // close to other entities at initialization. + if (agent->type == PEDESTRIAN) + return -1; + int car_collided_with_index = -1; if (agent->respawn_timestep != -1) @@ -1561,8 +1589,8 @@ static bool check_offroad(Drive *env, Agent *agent) { RoadMapElement *entity; entity = &env->road_elements[entity_list[i].entity_idx]; - // Check for offroad collision with road edges - if (entity->type == ROAD_EDGE) { + // Check for offroad collision with road edges (only for vehicles and cyclists) + if (entity->type == ROAD_EDGE && agent->type != PEDESTRIAN) { int geometry_idx = entity_list[i].geometry_idx; if (entity->z[geometry_idx] > agent->sim_z + agent->sim_height / 2.0f || entity->z[geometry_idx] < agent->sim_z - agent->sim_height / 2.0f) @@ -2261,7 +2289,13 @@ void init(Drive *env) { env->logs = (Log *)calloc(env->active_agent_count, sizeof(Log)); } +void close_client(Client *client); + void c_close(Drive *env) { + if (env->client != NULL) { + close_client(env->client); + env->client = NULL; + } free_agents(env->agents, env->num_objects); for (int i = 0; i < env->num_roads; i++) { free_road_element(&env->road_elements[i]); @@ -2586,11 +2620,13 @@ void compute_agent_metrics(Drive *env, int agent_idx) { // FORMAT COMES IN agent->metrics_array[SPEED_LIMIT_IDX] = (speed_magnitude > SPEED_LIMIT + 2.0f) ? 1.0f : 0.0f; - // Check for vehicle collisions - int car_collided_with_index = collision_check(env, agent_idx); - if (car_collided_with_index != -1) - collided = VEHICLE_COLLISION; - + // Check for vehicle collisions (skip for pedestrians) + int car_collided_with_index = -1; + if (agent->type != PEDESTRIAN) { + car_collided_with_index = collision_check(env, agent_idx); + if (car_collided_with_index != -1) + collided = VEHICLE_COLLISION; + } agent->collision_state = collided; if (collided == VEHICLE_COLLISION) { @@ -2756,6 +2792,7 @@ void compute_observations(Drive *env) { obs[13] = fminf(SPEED_LIMIT / MAX_SPEED, 1.0f); obs[14] = lane_center_dist; obs[15] = ego_entity->metrics_array[LANE_ANGLE_IDX]; + obs[16] = ego_entity->type / 3.0f; } else { obs[7] = (ego_entity->respawn_timestep != -1) ? 1 : 0; obs[8] = normalized_goal_speed_min; @@ -2763,6 +2800,7 @@ void compute_observations(Drive *env) { obs[10] = fminf(SPEED_LIMIT / MAX_SPEED, 1.0f); obs[11] = lane_center_dist; obs[12] = ego_entity->metrics_array[LANE_ANGLE_IDX]; + obs[13] = ego_entity->type / 3.0f; } int obs_idx = (env->reward_conditioning == 1) ? ego_dim - NUM_REWARD_COEFS : ego_dim; // Placeholder for reward conditioning encoder - @@ -3483,16 +3521,63 @@ struct Client { int car_assignments[MAX_AGENTS]; // To keep car model assignments consistent per vehicle Vector3 default_camera_position; Vector3 default_camera_target; + int recorder_pipefd[2]; + pid_t recorder_pid; }; Client *make_client(Drive *env) { Client *client = (Client *)calloc(1, sizeof(Client)); - client->width = 1280; - client->height = 704; - SetConfigFlags(FLAG_MSAA_4X_HINT); + + if (env->render_mode == RENDER_HEADLESS && getenv("DISPLAY") == NULL) { + // Start Xvfb and set DISPLAY + pid_t xvfb_pid = fork(); + if (xvfb_pid == 0) { + execlp("Xvfb", "Xvfb", ":99", "-screen", "0", "1x1x24", NULL); + _exit(1); + } + setenv("DISPLAY", ":99", 1); + usleep(500000); // Give Xvfb 500ms to start + } + + if (env->render_mode == RENDER_WINDOW) { + client->width = 1280; + client->height = 704; + SetConfigFlags(FLAG_MSAA_4X_HINT); + SetTargetFPS(30); + + // Set up camera for interactive window + Vector3 target_pos = {0, 0, 1}; // Y is up, Z is depth + + client->default_camera_position = (Vector3){ + 0, // Same X as target + 120.0f, // 20 units above target + 175.0f // 20 units behind target + }; + client->default_camera_target = target_pos; + client->camera.position = client->default_camera_position; + client->camera.target = client->default_camera_target; + client->camera.up = (Vector3){0.0f, -1.0f, 0.0f}; // Y is up + client->camera.fovy = 45.0f; + client->camera.projection = CAMERA_PERSPECTIVE; + + } else { // Headless rendering + SetConfigFlags(FLAG_WINDOW_HIDDEN); + SetTargetFPS(6000); + + float map_width = env->grid_map->bottom_right_x - env->grid_map->top_left_x; + float map_height = env->grid_map->top_left_y - env->grid_map->bottom_right_y; + float scale = 6.0f; // Controls the resolution of the output video + int img_width = (int)roundf(map_width * scale / 2.0f) * 2; + int img_height = (int)roundf(map_height * scale / 2.0f) * 2; + + client->width = img_width; + client->height = img_height; + } + + SetTraceLogLevel(LOG_WARNING); // Only show warnings and errors InitWindow(client->width, client->height, "PufferDrive"); - SetTargetFPS(30); - client->puffers = LoadTexture("resources/puffers_128.png"); + + // Load assets client->cars[0] = LoadModel("resources/drive/RedCar.glb"); client->cars[1] = LoadModel("resources/drive/WhiteCar.glb"); client->cars[2] = LoadModel("resources/drive/BlueCar.glb"); diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py index 7e9324692..539db8b81 100644 --- a/pufferlib/ocean/drive/drive.py +++ b/pufferlib/ocean/drive/drive.py @@ -4,15 +4,22 @@ import struct import os import pufferlib +from enum import IntEnum from pufferlib.ocean.drive import binding from multiprocessing import Pool, cpu_count from tqdm import tqdm +class RenderView(IntEnum): + FULL_SIM_STATE = 0 # Orthographic top-down, fully observable simulator state + BEV_AGENT_OBS = 1 # Orthographic top-down, only show what the selected agent can observe + AGENT_PERSP = 2 # Third-person perspective following selected agent + + class Drive(pufferlib.PufferEnv): def __init__( self, - render_mode=None, + render_mode=RenderView.FULL_SIM_STATE, report_interval=1, width=1280, height=1024, @@ -430,6 +437,7 @@ def __init__( spawn_length_min=self.spawn_length_min, spawn_length_max=self.spawn_length_max, spawn_height=self.spawn_height, + render_mode=render_mode, ) env_ids.append(env_id) @@ -593,6 +601,7 @@ def resample_maps(self): spawn_length_min=self.spawn_length_min, spawn_length_max=self.spawn_length_max, spawn_height=self.spawn_height, + render_mode=self.render_mode, ) env_ids.append(env_id) self.c_envs = binding.vectorize(*env_ids) @@ -736,12 +745,17 @@ def get_road_edge_polylines(self): return polylines - def render(self): - binding.vec_render(self.c_envs, 0) + def render(self, view_mode: RenderView = RenderView.FULL_SIM_STATE, draw_traces: bool = True, env_id: int = 0): + binding.vec_render(self.c_envs, int(view_mode), draw_traces, env_id) def close(self): binding.vec_close(self.c_envs) + @property + def scenario_ids(self) -> list[str]: + """Return scenario ID string for each env, stripping null padding.""" + return [s.rstrip("\x00") for s in binding.vec_get_scenario_ids(self.c_envs)] + def calculate_area(p1, p2, p3): # 3D triangle area = 0.5 * ||(p2-p1) x (p3-p1)|| diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h index 44eeacb40..0fdb48428 100644 --- a/pufferlib/ocean/env_binding.h +++ b/pufferlib/ocean/env_binding.h @@ -209,13 +209,34 @@ static PyObject *env_step(PyObject *self, PyObject *args) { Py_RETURN_NONE; } -// Python function to step the environment +// Python function to render the environment static PyObject *env_render(PyObject *self, PyObject *args) { + int num_args = PyTuple_Size(args); + if (num_args != 3) { + PyErr_SetString(PyExc_TypeError, "env_render requires 3 arguments (env_handle, view_mode, draw_traces)"); + return NULL; + } Env *env = unpack_env(args); if (!env) { return NULL; } - c_render(env); + + PyObject *view_mode_arg = PyTuple_GetItem(args, 1); + if (!PyObject_TypeCheck(view_mode_arg, &PyLong_Type)) { + PyErr_SetString(PyExc_TypeError, "view_mode must be an integer"); + return NULL; + } + int view_mode = PyLong_AsLong(view_mode_arg); + + PyObject *show_traces_arg = PyTuple_GetItem(args, 2); + if (!PyObject_TypeCheck(show_traces_arg, &PyBool_Type)) { + PyErr_SetString(PyExc_TypeError, "draw_traces must be a boolean"); + return NULL; + } + bool draw_traces = PyObject_IsTrue(show_traces_arg); + + c_render(env, view_mode, draw_traces); + Py_RETURN_NONE; } @@ -531,8 +552,8 @@ static PyObject *vec_step(PyObject *self, PyObject *arg) { static PyObject *vec_render(PyObject *self, PyObject *args) { int num_args = PyTuple_Size(args); - if (num_args != 2) { - PyErr_SetString(PyExc_TypeError, "vec_render requires 2 arguments"); + if (num_args != 4) { + PyErr_SetString(PyExc_TypeError, "vec_render requires 4 arguments"); return NULL; } @@ -542,14 +563,24 @@ static PyObject *vec_render(PyObject *self, PyObject *args) { return NULL; } - PyObject *env_id_arg = PyTuple_GetItem(args, 1); - if (!PyObject_TypeCheck(env_id_arg, &PyLong_Type)) { + if (!PyObject_TypeCheck(PyTuple_GetItem(args, 1), &PyLong_Type)) { + PyErr_SetString(PyExc_TypeError, "view_mode must be an integer"); + return NULL; + } + if (!PyObject_TypeCheck(PyTuple_GetItem(args, 2), &PyBool_Type)) { + PyErr_SetString(PyExc_TypeError, "draw_traces must be a boolean"); + return NULL; + } + if (!PyObject_TypeCheck(PyTuple_GetItem(args, 3), &PyLong_Type)) { PyErr_SetString(PyExc_TypeError, "env_id must be an integer"); return NULL; } - int env_id = PyLong_AsLong(env_id_arg); - c_render(vec->envs[env_id]); + int view_mode = PyLong_AsLong(PyTuple_GetItem(args, 1)); + bool draw_traces = PyObject_IsTrue(PyTuple_GetItem(args, 2)); + int env_id = PyLong_AsLong(PyTuple_GetItem(args, 3)); + + c_render(vec->envs[env_id], view_mode, draw_traces); Py_RETURN_NONE; } @@ -684,6 +715,20 @@ static PyObject *get_global_agent_state(PyObject *self, PyObject *args) { Py_RETURN_NONE; } + +static PyObject *vec_get_scenario_ids(PyObject *self, PyObject *args) { + VecEnv *vec = unpack_vecenv(args); + if (!vec) + return NULL; + + PyObject *list = PyList_New(vec->num_envs); + for (int i = 0; i < vec->num_envs; i++) { + // scenario_id is char[16], may not be null-terminated at byte 16 + PyList_SET_ITEM(list, i, PyUnicode_FromStringAndSize(vec->envs[i]->scenario_id, 16)); + } + return list; +} + static PyObject *vec_get_global_agent_state(PyObject *self, PyObject *args) { if (PyTuple_Size(args) != 8) { PyErr_SetString(PyExc_TypeError, "vec_get_global_agent_state requires 8 arguments"); @@ -990,6 +1035,7 @@ static PyMethodDef methods[] = { {"vec_log", vec_log, METH_VARARGS, "Log the vector of environments"}, {"vec_render", vec_render, METH_VARARGS, "Render the vector of environments"}, {"vec_close", vec_close, METH_VARARGS, "Close the vector of environments"}, + {"vec_get_scenario_ids", vec_get_scenario_ids, METH_VARARGS, "Get scenario IDs for all envs"}, {"shared", (PyCFunction)my_shared, METH_VARARGS | METH_KEYWORDS, "Shared state"}, {"get_global_agent_state", get_global_agent_state, METH_VARARGS, "Get global agent state"}, {"vec_get_global_agent_state", vec_get_global_agent_state, METH_VARARGS, "Get agent state from vectorized env"}, diff --git a/pufferlib/ocean/env_config.h b/pufferlib/ocean/env_config.h index 8105a40e9..0d83d9762 100644 --- a/pufferlib/ocean/env_config.h +++ b/pufferlib/ocean/env_config.h @@ -8,6 +8,7 @@ // Config struct for parsing INI files - contains all environment configuration typedef struct { + int render_mode; int action_type; int dynamics_model; float reward_vehicle_collision; From bf8e27e7a0c0e339db2160d489c21e1e1251795b Mon Sep 17 00:00:00 2001 From: Aditya Gupta Date: Thu, 19 Mar 2026 23:41:53 -0400 Subject: [PATCH 02/23] Updating drive.h --- pufferlib/ocean/drive/drive.h | 433 +++++++++++++++++++++++++--------- 1 file changed, 321 insertions(+), 112 deletions(-) diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index ccd78653d..61722f483 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -215,7 +215,7 @@ const Color PUFF_BACKGROUND2 = (Color){18, 72, 72, 255}; const Color LIGHTGREEN = (Color){152, 255, 152, 255}; const Color LIGHTYELLOW = (Color){255, 255, 152, 255}; const Color SOFT_YELLOW = (Color){245, 245, 220, 255}; - +const Color ROAD_COLOR = (Color){35, 35, 37, 255}; const Color LIGHTBLUE = (Color){167, 204, 255, 255}; const Color DEEPBLUE = (Color){45, 112, 226, 255}; const Color EXPERT_REPLAY = (Color){162, 220, 183, 255}; @@ -3591,26 +3591,43 @@ Client *make_client(Drive *env) { for (int i = 0; i < MAX_AGENTS; i++) { client->car_assignments[i] = (rand() % 4) + 1; } - // Get initial target position from first active agent - Vector3 target_pos = { - 0, - 0, // Y is up - 1 // Z is depth - }; - // Set up camera to look at target from above and behind - client->default_camera_position = (Vector3){ - 0, // Same X as target - 120.0f, // 20 units above target - 40.0f // 20 units behind target - }; - client->default_camera_target = target_pos; - client->camera.position = client->default_camera_position; - client->camera.target = client->default_camera_target; - client->camera.up = (Vector3){0.0f, -1.0f, 0.0f}; // Y is up - client->camera.fovy = 45.0f; - client->camera.projection = CAMERA_PERSPECTIVE; - client->camera_zoom = 1.0f; + // Set up ffmpeg process for recording + if (env->render_mode == RENDER_HEADLESS) { + if (pipe(client->recorder_pipefd) == -1) { + fprintf(stderr, "Failed to create pipe\n"); + free(client); + return NULL; + } + + char size_str[64]; + snprintf(size_str, sizeof(size_str), "%dx%d", (int)client->width, (int)client->height); + + char filename[256]; + snprintf(filename, sizeof(filename), "%s.mp4", env->scenario_id); + + client->recorder_pid = fork(); + if (client->recorder_pid == -1) { + fprintf(stderr, "Failed to fork\n"); + free(client); + return NULL; + } + + if (client->recorder_pid == 0) { // Child process + close(client->recorder_pipefd[1]); + dup2(client->recorder_pipefd[0], STDIN_FILENO); + close(client->recorder_pipefd[0]); + for (int fd = 3; fd < 256; fd++) + close(fd); + execlp("ffmpeg", "ffmpeg", "-y", "-f", "rawvideo", "-pix_fmt", "rgba", "-s", size_str, "-r", "30", "-i", + "-", "-c:v", "libx264", "-pix_fmt", "yuv420p", "-preset", "ultrafast", "-crf", "23", "-loglevel", + "error", filename, NULL); + fprintf(stderr, "execlp ffmpeg failed\n"); + _exit(1); + } + close(client->recorder_pipefd[0]); + } + return client; } @@ -3717,20 +3734,27 @@ void draw_agent_obs(Drive *env, int agent_index, int mode, int obs_only, int las float goal_y = agent_obs[1] * 200; float goal_z = agent_obs[2] * 200; + int agent_type = env->agents[active_idx].type; + Color goal_color = LIGHTBLUE; + if (agent_type == PEDESTRIAN) + goal_color = LIGHT_ORANGE; + else if (agent_type == CYCLIST) + goal_color = LIGHT_PURPLE; + if (mode == 0) { - DrawSphere((Vector3){goal_x, goal_y, goal_z}, 0.5f, LIGHTGREEN); + DrawSphere((Vector3){goal_x, goal_y, goal_z}, 0.5f, goal_color); DrawCircle3D((Vector3){goal_x, goal_y, goal_z}, env->agents[active_idx].reward_coefs[REWARD_COEF_GOAL_RADIUS], - (Vector3){0, 0, 1}, 90.0f, Fade(LIGHTGREEN, 0.3f)); + (Vector3){0, 0, 1}, 90.0f, Fade(goal_color, 0.3f)); } if (mode == 1) { float goal_x_world = px + (goal_x * heading_self_x - goal_y * heading_self_y); float goal_y_world = py + (goal_x * heading_self_y + goal_y * heading_self_x); float goal_z_world = pz + goal_z; - DrawSphere((Vector3){goal_x_world, goal_y_world, goal_z_world}, 0.5f, LIGHTGREEN); + DrawSphere((Vector3){goal_x_world, goal_y_world, goal_z_world}, 0.5f, goal_color); DrawCircle3D((Vector3){goal_x_world, goal_y_world, goal_z_world}, env->agents[active_idx].reward_coefs[REWARD_COEF_GOAL_RADIUS], (Vector3){0, 0, 1}, 90.0f, - Fade(LIGHTGREEN, 0.3f)); + Fade(goal_color, 0.3f)); } // First draw other agent observations int obs_idx = ego_dim; // Start after ego obs @@ -4040,27 +4064,39 @@ void draw_scene(Drive *env, Client *client, int mode, int obs_only, int lasers, continue; } - // --- Draw the car --- - Color car_color = GRAY; // default for static - if (is_expert) - car_color = GOLD; // expert replay - if (is_active_agent) - car_color = BLUE; // policy-controlled - if (is_active_agent && agent->aabb_collision_state > 0) - car_color = LIGHTGREEN; - if (is_active_agent && agent->collision_state > 0) - car_color = RED; - rlSetLineWidth(3.0f); - for (int j = 0; j < 4; j++) { - DrawLine3D(corners[j], corners[(j + 1) % 4], car_color); + // Draw the agent bounding boxes + Color agent_color = GRAY; + if (is_expert) { + if (agent[i].type == PEDESTRIAN || agent[i].type == CYCLIST) + agent_color = EXPERT_REPLAY_SMALL; + else + agent_color = EXPERT_REPLAY; + } + if (is_active_agent) { + if (agent[i].type == PEDESTRIAN) + agent_color = LIGHT_ORANGE; + else if (agent[i].type == CYCLIST) + agent_color = LIGHT_PURPLE; + else + agent_color = BLUE; } + if (is_active_agent && agent[i].collision_state > 0) + agent_color = RED; + + rlPushMatrix(); + rlTranslatef(position.x, position.y, position.z); + rlRotatef(heading * RAD2DEG, 0.0f, 0.0f, 1.0f); + DrawCube((Vector3){0.0f, 0.0f, 0.0f}, size.x, size.y, 1.0f, Fade(agent_color, 0.5f)); + DrawCubeWires((Vector3){0.0f, 0.0f, 0.0f}, size.x, size.y, 1.0f, agent_color); + rlPopMatrix(); + // --- Draw a heading arrow pointing forward --- Vector3 arrowStart = position; Vector3 arrowEnd = {position.x + cos_heading * half_len * 1.5f, // extend arrow beyond car position.y + sin_heading * half_len * 1.5f, position.z}; - DrawLine3D(arrowStart, arrowEnd, car_color); - DrawSphere(arrowEnd, 0.2f, car_color); // arrow tip + DrawLine3D(arrowStart, arrowEnd, agent_color); + DrawSphere(arrowEnd, 0.2f, agent_color); // arrow tip } else { // Agent view rlPushMatrix(); @@ -4126,7 +4162,7 @@ void draw_scene(Drive *env, Client *client, int mode, int obs_only, int lasers, }; Color wire_color = GRAY; // static if (!is_active_agent && agent->mark_as_expert == 1) - wire_color = GOLD; // expert replay + wire_color = EXPERT_REPLAY; // expert replay if (is_active_agent) wire_color = BLUE; // policy if (is_active_agent && agent->aabb_collision_state > 0) @@ -4162,13 +4198,20 @@ void draw_scene(Drive *env, Client *client, int mode, int obs_only, int lasers, continue; } if (!IsKeyDown(KEY_LEFT_CONTROL) && obs_only == 0) { + + Color goal_color = DEEPBLUE; + if (agent->type == PEDESTRIAN) + goal_color = LIGHT_ORANGE; + else if (agent->type == CYCLIST) + goal_color = LIGHT_PURPLE; + DrawSphere( (Vector3){ agent->goal_position_x, agent->goal_position_y, agent->goal_position_z, }, - 0.5f, DARKGREEN); + 0.5f, goal_color); DrawCircle3D( (Vector3){ @@ -4176,7 +4219,7 @@ void draw_scene(Drive *env, Client *client, int mode, int obs_only, int lasers, agent->goal_position_y, agent->goal_position_z, }, - agent->reward_coefs[REWARD_COEF_GOAL_RADIUS], (Vector3){0, 0, 1}, 90.0f, Fade(LIGHTGREEN, 0.3f)); + agent->reward_coefs[REWARD_COEF_GOAL_RADIUS], (Vector3){0, 0, 1}, 90.0f, Fade(goal_color, 0.3f)); } } } @@ -4193,7 +4236,7 @@ void draw_scene(Drive *env, Client *client, int mode, int obs_only, int lasers, else if (road->type == ROAD_LINE) lineColor = WHITE; else if (road->type == ROAD_EDGE) - lineColor = WHITE; + lineColor = Fade(WHITE, 0.7f); else if (road->type == DRIVEWAY) lineColor = RED; @@ -4240,96 +4283,262 @@ void draw_scene(Drive *env, Client *client, int mode, int obs_only, int lasers, } } -void c_render(Drive *env) { +void c_render(Drive *env, int view_mode, int draw_traces) { + + // Create client on first render call if (env->client == NULL) { env->client = make_client(env); } Client *client = env->client; - BeginDrawing(); - Color road = (Color){35, 35, 37, 255}; - ClearBackground(road); - BeginMode3D(client->camera); - handle_camera_controls(env->client); - draw_scene(env, client, 0, 0, 0, 0); - if (IsKeyPressed(KEY_TAB) && env->active_agent_count > 0) { - env->human_agent_idx = (env->human_agent_idx + 1) % env->active_agent_count; - } + if (env->render_mode == RENDER_HEADLESS) { // Headless rendering via ffmpeg + float map_width = env->grid_map->bottom_right_x - env->grid_map->top_left_x; + float map_height = env->grid_map->top_left_y - env->grid_map->bottom_right_y; + + Camera3D camera = {0}; + + if (view_mode == VIEW_MODE_SIM_STATE) { + // Orthographic bird's-eye view over the entire map (fully observable) + camera.position = (Vector3){0.0, 0.0, 400.0f}; // Above the scene + camera.target = (Vector3){0.0, 0.0, 0.0}; // Look at origin + camera.up = (Vector3){0.0f, -1.0f, 0.0f}; + camera.projection = CAMERA_ORTHOGRAPHIC; + camera.fovy = map_height; + + BeginDrawing(); + ClearBackground(ROAD_COLOR); + BeginMode3D(camera); + + if (draw_traces) { // Show logged trajectories of active agents and expert static agents + for (int i = 0; i < env->active_agent_count; i++) { + int idx = env->active_agent_indices[i]; + for (int t = env->init_steps; t < env->episode_length; t++) { + Color agent_color = LIGHTBLUE; + if (env->agents[idx].type == PEDESTRIAN) { + agent_color = LIGHT_ORANGE; + } else if (env->agents[idx].type == CYCLIST) { + agent_color = LIGHT_PURPLE; + } + DrawSphere((Vector3){env->agents[idx].log_trajectory_x[t], env->agents[idx].log_trajectory_y[t], + env->agents[idx].log_trajectory_z[t]}, + 0.15f, agent_color); + } + } + + for (int i = 0; i < env->expert_static_agent_count; i++) { + int idx = env->expert_static_agent_indices[i]; + for (int t = env->init_steps; t < env->episode_length; t++) { + DrawSphere((Vector3){env->agents[idx].log_trajectory_x[t], env->agents[idx].log_trajectory_y[t], + env->agents[idx].log_trajectory_z[t]}, + 0.15f, EXPERT_REPLAY); + } + } + } - // Draw debug info - DrawText(TextFormat("Camera Position: (%.2f, %.2f, %.2f)", client->camera.position.x, client->camera.position.y, - client->camera.position.z), - 10, 10, 20, PUFF_WHITE); - DrawText(TextFormat("Camera Target: (%.2f, %.2f, %.2f)", client->camera.target.x, client->camera.target.y, - client->camera.target.z), - 10, 30, 20, PUFF_WHITE); - DrawText(TextFormat("Timestep: %d", env->timestep), 10, 50, 20, PUFF_WHITE); + draw_scene(env, client, 1, 0, 0, 0); - int human_idx = env->active_agent_indices[env->human_agent_idx]; - DrawText(TextFormat("Controlling Agent: %d", env->human_agent_idx), 10, 70, 20, PUFF_WHITE); - DrawText(TextFormat("Agent Index: %d", human_idx), 10, 90, 20, PUFF_WHITE); + } else if (view_mode == VIEW_MODE_BEV_AGENT_OBS) { + // Orthographic bird's-eye view centered on the selected agent, + // showing only that agent's observations + int agent_idx = env->active_agent_indices[env->human_agent_idx]; + Agent *agent = &env->agents[agent_idx]; - // Display current action values - yellow when controlling, white otherwise - Color action_color = IsKeyDown(KEY_LEFT_SHIFT) ? YELLOW : PUFF_WHITE; + Camera3D camera = {0}; + camera.position = (Vector3){agent->sim_x, agent->sim_y, 400.0f}; + camera.target = (Vector3){agent->sim_x, agent->sim_y, 0.0f}; + camera.up = (Vector3){0.0f, -1.0f, 0.0f}; + camera.projection = CAMERA_ORTHOGRAPHIC; + camera.fovy = env->grid_map->vision_range * GRID_CELL_SIZE * 2.0f; - if (env->action_type == 0) { // discrete - int *action_array = (int *)env->actions; - int action_val = action_array[env->human_agent_idx]; + BeginDrawing(); + ClearBackground(ROAD_COLOR); + BeginMode3D(camera); + draw_scene(env, client, 1, 1, 0, 0); - if (env->dynamics_model == CLASSIC) { - int num_steer = 13; - int accel_idx = action_val / num_steer; - int steer_idx = action_val % num_steer; - float accel_value = ACCELERATION_VALUES[accel_idx]; - float steer_value = STEERING_VALUES[steer_idx]; - - DrawText(TextFormat("Acceleration: %.2f m/s^2", accel_value), 10, 110, 20, action_color); - DrawText(TextFormat("Steering: %.3f", steer_value), 10, 130, 20, action_color); - } else if (env->dynamics_model == JERK) { - int num_lat = 3; - int jerk_long_idx = action_val / num_lat; - int jerk_lat_idx = action_val % num_lat; - float jerk_long_value = JERK_LONG[jerk_long_idx]; - float jerk_lat_value = JERK_LAT[jerk_lat_idx]; - - DrawText(TextFormat("Longitudinal Jerk: %.2f m/s^3", jerk_long_value), 10, 110, 20, action_color); - DrawText(TextFormat("Lateral Jerk: %.2f m/s^3", jerk_lat_value), 10, 130, 20, action_color); + } else { // First-person perspective from a selected agent + int agent_idx = env->active_agent_indices[env->human_agent_idx]; + Agent *agent = &env->agents[agent_idx]; + + Camera3D camera = {0}; + // Position camera behind and above the agent + camera.position = (Vector3){agent->sim_x - (25.0f * cosf(agent->sim_heading)), + agent->sim_y - (25.0f * sinf(agent->sim_heading)), 15.0f}; + camera.target = (Vector3){agent->sim_x + 40.0f * cosf(agent->sim_heading), + agent->sim_y + 40.0f * sinf(agent->sim_heading), 1.0f}; + camera.up = (Vector3){0.0f, 0.0f, 1.0f}; + camera.fovy = 60.0f; + camera.projection = CAMERA_PERSPECTIVE; + + BeginDrawing(); + ClearBackground(ROAD_COLOR); + BeginMode3D(camera); + draw_scene(env, client, 0, 0, 0, 1); } - } else { // continuous - float (*action_array_f)[2] = (float (*)[2])env->actions; - DrawText(TextFormat("Acceleration: %.2f", action_array_f[env->human_agent_idx][0]), 10, 110, 20, action_color); - DrawText(TextFormat("Steering: %.2f", action_array_f[env->human_agent_idx][1]), 10, 130, 20, action_color); - } - // Show key press status - int status_y = 150; - if (IsKeyDown(KEY_LEFT_SHIFT)) { - DrawText("[shift pressed]", 10, status_y, 20, YELLOW); - status_y += 20; - } - if (IsKeyDown(KEY_SPACE)) { - DrawText("[space pressed]", 10, status_y, 20, YELLOW); - status_y += 20; - } - if (IsKeyDown(KEY_LEFT_CONTROL)) { - DrawText("[ctrl pressed]", 10, status_y, 20, YELLOW); - status_y += 20; + EndDrawing(); + + unsigned char *screen_data = rlReadScreenPixels((int)client->width, (int)client->height); + if (screen_data) { + write(client->recorder_pipefd[1], screen_data, (int)client->width * (int)client->height * 4); + RL_FREE(screen_data); + } + } else { // Pop-up window + BeginDrawing(); + ClearBackground(ROAD_COLOR); + BeginMode3D(client->camera); + handle_camera_controls(env->client); + draw_scene(env, client, 0, 0, 0, 0); + + if (IsKeyPressed(KEY_TAB) && env->active_agent_count > 0) { + env->human_agent_idx = (env->human_agent_idx + 1) % env->active_agent_count; + } + + DrawText(TextFormat("Timestep: %d", env->timestep), 10, 50, 20, PUFF_WHITE); + DrawText(TextFormat("Controlling agent: %d", env->human_agent_idx), 10, 70, 20, PUFF_WHITE); + int human_idx = env->active_agent_indices[env->human_agent_idx]; + + Color action_color = IsKeyDown(KEY_LEFT_SHIFT) ? YELLOW : PUFF_WHITE; + + if (env->action_type == 0) { // discrete + int *action_array = (int *)env->actions; + int action_val = action_array[env->human_agent_idx]; + + if (env->dynamics_model == CLASSIC) { + int num_steer = 13; + int accel_idx = action_val / num_steer; + int steer_idx = action_val % num_steer; + float accel_value = ACCELERATION_VALUES[accel_idx]; + float steer_value = STEERING_VALUES[steer_idx]; + + DrawText(TextFormat("Acceleration: %.2f m/s^2", accel_value), 10, 110, 20, action_color); + DrawText(TextFormat("Steering: %.3f", steer_value), 10, 130, 20, action_color); + } else if (env->dynamics_model == JERK) { + int num_lat = 3; + int jerk_long_idx = action_val / num_lat; + int jerk_lat_idx = action_val % num_lat; + float jerk_long_value = JERK_LONG[jerk_long_idx]; + float jerk_lat_value = JERK_LAT[jerk_lat_idx]; + + DrawText(TextFormat("Longitudinal Jerk: %.2f m/s^3", jerk_long_value), 10, 110, 20, action_color); + DrawText(TextFormat("Lateral Jerk: %.2f m/s^3", jerk_lat_value), 10, 130, 20, action_color); + } + } else { // continuous + float (*action_array_f)[2] = (float (*)[2])env->actions; + DrawText(TextFormat("Acceleration: %.2f", action_array_f[env->human_agent_idx][0]), 10, 110, 20, + action_color); + DrawText(TextFormat("Steering: %.2f", action_array_f[env->human_agent_idx][1]), 10, 130, 20, action_color); + } + + int status_y = 150; + if (IsKeyDown(KEY_LEFT_SHIFT)) { + DrawText("[shift pressed]", 10, status_y, 20, YELLOW); + status_y += 20; + } + if (IsKeyDown(KEY_SPACE)) { + DrawText("[space pressed]", 10, status_y, 20, YELLOW); + status_y += 20; + } + if (IsKeyDown(KEY_LEFT_CONTROL)) { + DrawText("[ctrl pressed]", 10, status_y, 20, YELLOW); + status_y += 20; + } + + DrawText("Controls: SHIFT + W/S - Accelerate/Brake, SHIFT + A/D - Steer, TAB - Switch Agent", 10, + client->height - 30, 20, PUFF_WHITE); + DrawText(TextFormat("Grid Rows: %d", env->grid_map->grid_rows), 10, status_y, 20, PUFF_WHITE); + DrawText(TextFormat("Grid Cols: %d", env->grid_map->grid_cols), 10, status_y + 20, 20, PUFF_WHITE); + EndDrawing(); } - // Controls help - DrawText("Controls: SHIFT + W/S - Accelerate/Brake, SHIFT + A/D - Steer, TAB - Switch Agent", 10, - client->height - 30, 20, PUFF_WHITE); + // BeginDrawing(); + // Color road = (Color){35, 35, 37, 255}; + // ClearBackground(road); + // BeginMode3D(client->camera); + // handle_camera_controls(env->client); + // draw_scene(env, client, 0, 0, 0, 0); + + // if (IsKeyPressed(KEY_TAB) && env->active_agent_count > 0) { + // env->human_agent_idx = (env->human_agent_idx + 1) % env->active_agent_count; + // } + + // // Draw debug info + // DrawText(TextFormat("Camera Position: (%.2f, %.2f, %.2f)", client->camera.position.x, client->camera.position.y, + // client->camera.position.z), + // 10, 10, 20, PUFF_WHITE); + // DrawText(TextFormat("Camera Target: (%.2f, %.2f, %.2f)", client->camera.target.x, client->camera.target.y, + // client->camera.target.z), + // 10, 30, 20, PUFF_WHITE); + // DrawText(TextFormat("Timestep: %d", env->timestep), 10, 50, 20, PUFF_WHITE); + + // int human_idx = env->active_agent_indices[env->human_agent_idx]; + // DrawText(TextFormat("Controlling Agent: %d", env->human_agent_idx), 10, 70, 20, PUFF_WHITE); + // DrawText(TextFormat("Agent Index: %d", human_idx), 10, 90, 20, PUFF_WHITE); + + // // Display current action values - yellow when controlling, white otherwise + // Color action_color = IsKeyDown(KEY_LEFT_SHIFT) ? YELLOW : PUFF_WHITE; + + // if (env->action_type == 0) { // discrete + // int *action_array = (int *)env->actions; + // int action_val = action_array[env->human_agent_idx]; + + // if (env->dynamics_model == CLASSIC) { + // int num_steer = 13; + // int accel_idx = action_val / num_steer; + // int steer_idx = action_val % num_steer; + // float accel_value = ACCELERATION_VALUES[accel_idx]; + // float steer_value = STEERING_VALUES[steer_idx]; + + // DrawText(TextFormat("Acceleration: %.2f m/s^2", accel_value), 10, 110, 20, action_color); + // DrawText(TextFormat("Steering: %.3f", steer_value), 10, 130, 20, action_color); + // } else if (env->dynamics_model == JERK) { + // int num_lat = 3; + // int jerk_long_idx = action_val / num_lat; + // int jerk_lat_idx = action_val % num_lat; + // float jerk_long_value = JERK_LONG[jerk_long_idx]; + // float jerk_lat_value = JERK_LAT[jerk_lat_idx]; + + // DrawText(TextFormat("Longitudinal Jerk: %.2f m/s^3", jerk_long_value), 10, 110, 20, action_color); + // DrawText(TextFormat("Lateral Jerk: %.2f m/s^3", jerk_lat_value), 10, 130, 20, action_color); + // } + // } else { // continuous + // float (*action_array_f)[2] = (float (*)[2])env->actions; + // DrawText(TextFormat("Acceleration: %.2f", action_array_f[env->human_agent_idx][0]), 10, 110, 20, + // action_color); DrawText(TextFormat("Steering: %.2f", action_array_f[env->human_agent_idx][1]), 10, 130, 20, + // action_color); + // } + + // // Show key press status + // int status_y = 150; + // if (IsKeyDown(KEY_LEFT_SHIFT)) { + // DrawText("[shift pressed]", 10, status_y, 20, YELLOW); + // status_y += 20; + // } + // if (IsKeyDown(KEY_SPACE)) { + // DrawText("[space pressed]", 10, status_y, 20, YELLOW); + // status_y += 20; + // } + // if (IsKeyDown(KEY_LEFT_CONTROL)) { + // DrawText("[ctrl pressed]", 10, status_y, 20, YELLOW); + // status_y += 20; + // } + + // // Controls help + // DrawText("Controls: SHIFT + W/S - Accelerate/Brake, SHIFT + A/D - Steer, TAB - Switch Agent", 10, + // client->height - 30, 20, PUFF_WHITE); - DrawText(TextFormat("Grid Rows: %d", env->grid_map->grid_rows), 10, status_y, 20, PUFF_WHITE); - DrawText(TextFormat("Grid Cols: %d", env->grid_map->grid_cols), 10, status_y + 20, 20, PUFF_WHITE); - EndDrawing(); + // DrawText(TextFormat("Grid Rows: %d", env->grid_map->grid_rows), 10, status_y, 20, PUFF_WHITE); + // DrawText(TextFormat("Grid Cols: %d", env->grid_map->grid_cols), 10, status_y + 20, 20, PUFF_WHITE); + // EndDrawing(); } void close_client(Client *client) { + if (client->recorder_pid > 0) { + close(client->recorder_pipefd[1]); + waitpid(client->recorder_pid, NULL, 0); + } for (int i = 0; i < 6; i++) { UnloadModel(client->cars[i]); } - UnloadTexture(client->puffers); CloseWindow(); free(client); } From 4517e7ff4bc0e174eab2afbf0141950728b5e001 Mon Sep 17 00:00:00 2001 From: Aditya Gupta Date: Tue, 24 Mar 2026 16:06:56 -0400 Subject: [PATCH 03/23] Further render related updates --- pufferlib/config/ocean/drive.ini | 70 ++-- pufferlib/ocean/benchmark/evaluator.py | 141 +++++++ pufferlib/ocean/drive/drive.c | 134 +++---- pufferlib/ocean/drive/drive.h | 60 ++- pufferlib/ocean/drive/drive.py | 3 +- pufferlib/ocean/env_binding.h | 2 +- pufferlib/pufferl.py | 508 +++++++++---------------- pufferlib/utils.py | 242 ------------ 8 files changed, 475 insertions(+), 685 deletions(-) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index c5a68e0f9..bc78f4bcb 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -54,19 +54,19 @@ polyline_max_segment_length = 10.0 episode_length = 300 resample_frequency = 300 termination_mode = 1 # 0 - terminate at episode_length, 1 - terminate after all agents have been reset -map_dir = "resources/drive/binaries/carla_2D" +map_dir = "resources/drive/binaries/carla_3D" num_maps = 3 ; If True, allows training with fewer maps than requested (warns instead of erroring) allow_fewer_maps = True ; Determines which step of the trajectory to initialize the agents at upon reset init_steps = 0 ; Options: "control_vehicles", "control_agents", "control_wosac", "control_sdc_only" -control_mode = "control_vehicles" +control_mode = "control_agents" ; Options: "create_all_valid", "create_only_controlled", "init_variable_agent_number"(creates random number of controlled agents per env) init_mode = "init_variable_agent_number" ; Below options only valid for "init_variable_agent_number" init_mode min_agents_per_env = 1 -max_agents_per_env = 128 +max_agents_per_env = 32 ; Dimension Ranges for agents spawn_width_min = 1.5 spawn_width_max = 2.5 @@ -74,6 +74,11 @@ spawn_length_min = 2.0 spawn_length_max = 5.5 spawn_height = 1.5 +; Render mode options: +; 0:"window" = pop-up raylib window (original) +; 1:"headless" = off-screen; frames piped to ffmpeg (recommended for training) +render_mode = 1 + turn_off_normalization = 1 ; Options: 0 - Use normalized reward coefs as input to NN, 1 - Dont @@ -171,28 +176,22 @@ vf_clip_coef = 0.1999999999999999 vf_coef = 2 vtrace_c_clip = 1 vtrace_rho_clip = 1 -checkpoint_interval = 250 -; Rendering options -render = True -render_async = True # Render in background process to avoid blocking training -render_interval = 1000 -; If True, show exactly what the agent sees in agent observation -obs_only = True -; Show grid lines -show_grid = True -; Draws lines from ego agent observed ORUs and road elements to show detection range -show_lasers = False -; Display human xy logs in the background -show_human_logs = False -; If True, zoom in on a part of the map. Otherwise, show full map -zoom_in = True -; Options: List[str to path], str to path (e.g., "resources/drive/training/binaries/map_001.bin"), None -render_map = none +checkpoint_interval = 1000 [eval] eval_interval = 1000 ; Path to dataset used for evaluation map_dir = "resources/drive/binaries/training" + +num_eval_agents = 64 +; If True, enable self-play evaluation (pair policy-controlled agent with a copy of itself) +self_play_eval = True +; If True, enable human replay evaluation (pair policy-controlled agent with human replays) +human_replay_eval = False +; If True, render random scenarios. Note: Doing this frequency will slow down the training. +render_human_replay_eval = False +render_self_play_eval = True + ; Number of scenarios to process per batch wosac_batch_size = 32 ; Target number of unique scenarios perform evaluation in @@ -271,29 +270,30 @@ steer = 1.0 acc = 1.0 [render] -; Mode to render a bunch of maps with a given policy -; Path to dataset used for rendering -map_dir = "resources/drive/binaries/training" -; Directory to output rendered videos +; Render a batch of maps offline using the in-process c_render (ffmpeg) pipeline. +; Path to the .bin map directory +map_dir = "resources/drive/binaries/carla_2D" +; Directory to write output mp4 files output_dir = "resources/drive/render_videos" -; Evaluation will run on the first num_maps maps in the map_dir directory +; Number of maps to render (capped at files available in map_dir) num_maps = 100 -; "both", "topdown", "agent"; Other args are passed from train confs -view_mode = "both" +; View mode: sim_state (top-down, origin-centered), zoom_out (top-down, full map bbox), bev (agent BEV obs), persp (third-person follow-cam) +; NOTE: "persp" and "bev" require active_agent_count > 0 +view_mode = "sim_state" +; Whether to draw agent trajectory traces +draw_traces = True +; Maximum number of frames to render per map +max_frames = 91 +; Override [env] init/control modes for rendering logged-trajectory (carla_2D) maps. +; carla_2D maps carry recorded vehicle trajectories — use create_all_valid + control_vehicles. +init_mode = "init_variable_agent_number" +control_mode = "control_vehicles" ; Policy bin file used for rendering videos policy_path = "resources/drive/puffer_drive_weights_resampling_300.bin" ; Allows more than cpu cores workers for rendering overwork = True -; If True, show exactly what the agent sees in agent observation -obs_only = True -; Show grid lines -show_grid = True -; Draws lines from ego agent observed ORUs and road elements to show detection range -show_lasers = True ; Display human xy logs in the background show_human_logs = False -; If True, zoom in on a part of the map. Otherwise, show full map -zoom_in = True [sweep.train.learning_rate] distribution = log_normal diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py index 15b37208e..9f821cf11 100644 --- a/pufferlib/ocean/benchmark/evaluator.py +++ b/pufferlib/ocean/benchmark/evaluator.py @@ -1,5 +1,6 @@ """WOSAC evaluation class for PufferDrive.""" +import copy import torch import numpy as np import pandas as pd @@ -818,6 +819,146 @@ def rollout(self, args, puffer_env, policy): return results +class Evaluator: + """Evaluates policies in self_play or human_replay mode, with optional rendering. + + Initializes the eval envs needed based on eval config flags: + - human_replay_eval: creates sp_env + hr_env + - render_eval: creates sp_env (if not already created) + + + + """ + + def __init__(self, configs, logger=None): + self.configs = configs + self.logger = logger + self.sim_steps = 90 + self.self_play_stats = None + self.human_replay_stats = None + self.sp_env = None + self.hr_env = None + + self._unpack_eval_configs(configs) + + def _unpack_eval_configs(self, configs): + eval_config = copy.deepcopy(configs) + # Create separate evaluation environments based on specified configs + eval_config["env"]["termination_mode"] = 0 + backend = eval_config["eval"].get("backend", "PufferEnv") + eval_config["env"]["map_dir"] = eval_config["eval"]["map_dir"] + eval_config["env"]["num_agents"] = eval_config["eval"]["num_eval_agents"] + eval_config["env"]["episode_length"] = 91 # WOMD scenario length + eval_config["vec"] = dict(backend=backend, num_envs=1) + + self.render_sp_rollout = self.configs["eval"]["render_self_play_eval"] + self.render_hr_rollout = self.configs["eval"]["render_human_replay_eval"] + + self.hr_eval_config = copy.deepcopy(eval_config) + self.hr_eval_config["env"]["control_mode"] = "control_sdc_only" + self.hr_eval_config["env"]["render_mode"] = 1 if self.render_hr_rollout else 0 + + self.sp_eval_config = copy.deepcopy(eval_config) + self.sp_eval_config["env"]["control_mode"] = "control_agents" + self.sp_eval_config["env"]["render_mode"] = 1 if self.render_sp_rollout else 0 + + def rollout(self, policy, mode="self_play", render_rollout=False): + """Roll out the given policy in the specified eval env and collect statistics.""" + + env = self.hr_env if mode == "human_replay" else self.sp_env + render_eval = self.render_sp_rollout if mode == "self_play" else self.render_hr_rollout + driver = env.driver_env + num_agents = env.observation_space.shape[0] + device = self.configs["train"]["device"] + + # Reset environment + obs, info = env.reset() + + # Initialize RNN state if needed + state = {} + if self.configs["train"]["use_rnn"]: + state = dict( + lstm_h=torch.zeros(num_agents, policy.hidden_size, device=device), + lstm_c=torch.zeros(num_agents, policy.hidden_size, device=device), + ) + + # Rollout + for time_idx in range(self.sim_steps): + if render_rollout or render_eval: + driver.render() + + # Get action from policy + with torch.no_grad(): + ob_tensor = torch.as_tensor(obs).to(device) + logits, value = policy.forward_eval(ob_tensor, state) + action, logprob, _ = pufferlib.pytorch.sample_logits(logits) + action_np = action.cpu().numpy().reshape(env.action_space.shape) + + # Clip continuous actions to valid range + if isinstance(logits, torch.distributions.Normal): + action_np = np.clip(action_np, env.action_space.low, env.action_space.high) + + # Step environment + obs, rewards, dones, truncs, info_list = env.step(action_np) + + if truncs.all(): + break + + # Aggregate final statistics + final_info = info_list[0] if info_list else {} + + if mode == "self_play": + self.self_play_stats = final_info + elif mode == "human_replay": + self.human_replay_stats = final_info + + def log_videos(self, eval_mode, epoch): + """Log all mp4s in local path to wandb after env close has flushed ffmpeg pipes.""" + import os + import glob + + if not (self.logger and hasattr(self.logger, "wandb") and self.logger.wandb): + # Still clean up even if not logging + for p in glob.glob("*.mp4"): + os.remove(p) + return + + import wandb + + video_files = glob.glob("*.mp4") + if not video_files: + print("Warning: no render videos found in local path") + return + + for p in video_files: + scenario_id = os.path.splitext(os.path.basename(p))[0] + self.logger.wandb.log( + {f"render/{eval_mode}": wandb.Video(p, format="mp4", caption=f"scene_{scenario_id}_epoch_{epoch}")} + ) + + for p in video_files: + os.remove(p) + + def log_stats(self): + if not (self.logger and hasattr(self.logger, "wandb") and self.logger.wandb): + return + + eval_stats = {} + + if self.human_replay_stats is not None: + eval_stats["eval/hr_collision_rate"] = self.human_replay_stats["collision_rate"] + eval_stats["eval/hr_score"] = self.human_replay_stats["score"] + if self.self_play_stats is not None: + eval_stats["eval/sp_collision_rate"] = self.self_play_stats["collision_rate"] + eval_stats["eval/sp_score"] = self.self_play_stats["score"] + eval_stats["eval/num_agents"] = self.self_play_stats["n"] + + if not eval_stats: + return + + self.logger.wandb.log(eval_stats) + + class SafeEvaluator: """Evaluates policies with fixed safe/law-abiding reward conditioning. diff --git a/pufferlib/ocean/drive/drive.c b/pufferlib/ocean/drive/drive.c index f3c1cd66b..aaae593bd 100644 --- a/pufferlib/ocean/drive/drive.c +++ b/pufferlib/ocean/drive/drive.c @@ -20,8 +20,7 @@ void test_drivenet() { // Weights* weights = load_weights("resources/drive/puffer_drive_weights.bin"); Weights *weights = load_weights("puffer_drive_weights.bin"); - int reward_conditioning = 0; - DriveNet *net = init_drivenet(weights, num_agents, CLASSIC, reward_conditioning); + DriveNet *net = init_drivenet(weights, num_agents, CLASSIC, 1); forward(net, observations, actions); for (int i = 0; i < num_agents * num_actions; i++) { @@ -35,7 +34,7 @@ void test_drivenet() { free(weights); } -void demo() { +void demo(const char *map_name_arg, const char *policy_name_arg, int view_mode, int draw_traces) { // Read configuration from INI file env_init_config conf = {0}; const char *ini_file = "pufferlib/config/ocean/drive.ini"; @@ -83,18 +82,22 @@ void demo() { }; Drive env = { - .human_agent_idx = 0, - .action_type = 0, // Demo doesn't support continuous action space + .action_type = conf.action_type, .dynamics_model = conf.dynamics_model, .reward_vehicle_collision = conf.reward_vehicle_collision, .reward_offroad_collision = conf.reward_offroad_collision, + .reward_lane_align = conf.reward_lane_align, + .reward_lane_center = conf.reward_lane_center, .reward_goal = conf.reward_goal, .reward_goal_post_respawn = conf.reward_goal_post_respawn, .goal_radius = conf.goal_radius, + .min_goal_speed = conf.min_goal_speed, .goal_behavior = conf.goal_behavior, + .reward_randomization = conf.reward_randomization, + .reward_conditioning = conf.reward_conditioning, + .turn_off_normalization = conf.turn_off_normalization, .min_goal_distance = conf.min_goal_distance, .max_goal_distance = conf.max_goal_distance, - .min_goal_speed = conf.min_goal_speed, .max_goal_speed = conf.max_goal_speed, .dt = conf.dt, .episode_length = conf.episode_length, @@ -108,8 +111,27 @@ void demo() { .init_mode = conf.init_mode, .control_mode = conf.control_mode, .spawn_settings = spawn_settings, - .map_name = "resources/drive/binaries/carla_2D/map_001.bin", - .reward_conditioning = conf.reward_conditioning, + .reward_bounds = + { + {conf.reward_bound_goal_radius_min, conf.reward_bound_goal_radius_max}, + {conf.reward_bound_collision_min, conf.reward_bound_collision_max}, + {conf.reward_bound_offroad_min, conf.reward_bound_offroad_max}, + {conf.reward_bound_comfort_min, conf.reward_bound_comfort_max}, + {conf.reward_bound_lane_align_min, conf.reward_bound_lane_align_max}, + {conf.reward_bound_lane_center_min, conf.reward_bound_lane_center_max}, + {conf.reward_bound_velocity_min, conf.reward_bound_velocity_max}, + {conf.reward_bound_traffic_light_min, conf.reward_bound_traffic_light_max}, + {conf.reward_bound_center_bias_min, conf.reward_bound_center_bias_max}, + {conf.reward_bound_vel_align_min, conf.reward_bound_vel_align_max}, + {conf.reward_bound_overspeed_min, conf.reward_bound_overspeed_max}, + {conf.reward_bound_timestep_min, conf.reward_bound_timestep_max}, + {conf.reward_bound_reverse_min, conf.reward_bound_reverse_max}, + {conf.reward_bound_throttle_min, conf.reward_bound_throttle_max}, + {conf.reward_bound_steer_min, conf.reward_bound_steer_max}, + {conf.reward_bound_acc_min, conf.reward_bound_acc_max}, + }, + .map_name = "resources/drive/binaries/carla/carla_3D/map_001.bin", + .render_mode = RENDER_WINDOW, .partner_obs_radius = conf.partner_obs_radius, }; @@ -123,10 +145,11 @@ void demo() { conf.init_mode); free_allocated(&env); return; + // return -1; } c_reset(&env); - c_render(&env); - Weights *weights = load_weights("resources/drive/puffer_drive_weights.bin"); + c_render(&env, view_mode, draw_traces); + Weights *weights = load_weights(policy_name_arg ? policy_name_arg : "resources/drive/puffer_drive_weights.bin"); DriveNet *net = init_drivenet(weights, env.active_agent_count, env.dynamics_model, env.reward_conditioning); int accel_delta = 1; @@ -192,13 +215,14 @@ void demo() { } c_step(&env); - c_render(&env); + c_render(&env, view_mode, draw_traces); } close_client(env.client); free_allocated(&env); free_drivenet(net); free(weights); + return; } void performance_test() { @@ -243,61 +267,19 @@ void performance_test() { } int main(int argc, char *argv[]) { - // Visualization-only parameters (not in [env] section) - int show_grid = 0; - int obs_only = 0; - int lasers = 0; - int show_human_logs = 0; - int frame_skip = 1; - int zoom_in = 0; - const char *view_mode = "both"; - - // File paths and num_maps (not in [env] section) const char *map_name = NULL; - const char *policy_name = "resources/drive/puffer_drive_weights.bin"; - const char *output_topdown = NULL; - const char *output_agent = NULL; - int num_maps = 1; + const char *policy_name = NULL; + int view_mode = VIEW_MODE_SIM_STATE; // Default: full sim-state bird's-eye view + int draw_traces = 1; // Default: show logged trajectories // Parse command line arguments for (int i = 1; i < argc; i++) { - if (strcmp(argv[i], "--show-grid") == 0) { - show_grid = 1; - } else if (strcmp(argv[i], "--obs-only") == 0) { - obs_only = 1; - } else if (strcmp(argv[i], "--lasers") == 0) { - lasers = 1; - } else if (strcmp(argv[i], "--log-trajectories") == 0) { - show_human_logs = 1; - } else if (strcmp(argv[i], "--frame-skip") == 0) { - if (i + 1 < argc) { - frame_skip = atoi(argv[i + 1]); - i++; - if (frame_skip <= 0) { - frame_skip = 1; - } - } - } else if (strcmp(argv[i], "--zoom-in") == 0) { - zoom_in = 1; - } else if (strcmp(argv[i], "--view") == 0) { - if (i + 1 < argc) { - view_mode = argv[i + 1]; - i++; - if (strcmp(view_mode, "both") != 0 && strcmp(view_mode, "topdown") != 0 && - strcmp(view_mode, "agent") != 0) { - fprintf(stderr, "Error: --view must be 'both', 'topdown', or 'agent'\n"); - return 1; - } - } else { - fprintf(stderr, "Error: --view option requires a value (both/topdown/agent)\n"); - return 1; - } - } else if (strcmp(argv[i], "--map-name") == 0) { + if (strcmp(argv[i], "--map-name") == 0) { if (i + 1 < argc) { map_name = argv[i + 1]; i++; } else { - fprintf(stderr, "Error: --map-name option requires a map file path\n"); + fprintf(stderr, "Error: --map-name requires a map file path\n"); return 1; } } else if (strcmp(argv[i], "--policy-name") == 0) { @@ -305,30 +287,36 @@ int main(int argc, char *argv[]) { policy_name = argv[i + 1]; i++; } else { - fprintf(stderr, "Error: --policy-name option requires a policy file path\n"); + fprintf(stderr, "Error: --policy-name requires a policy file path\n"); return 1; } - } else if (strcmp(argv[i], "--output-topdown") == 0) { - if (i + 1 < argc) { - output_topdown = argv[i + 1]; - i++; - } - } else if (strcmp(argv[i], "--output-agent") == 0) { - if (i + 1 < argc) { - output_agent = argv[i + 1]; - i++; - } - } else if (strcmp(argv[i], "--num-maps") == 0) { + } else if (strcmp(argv[i], "--view") == 0) { if (i + 1 < argc) { - num_maps = atoi(argv[i + 1]); + const char *v = argv[i + 1]; i++; + if (strcmp(v, "sim_state") == 0 || strcmp(v, "topdown") == 0) { + view_mode = VIEW_MODE_SIM_STATE; + } else if (strcmp(v, "bev") == 0 || strcmp(v, "agent") == 0) { + view_mode = VIEW_MODE_BEV_AGENT_OBS; + } else if (strcmp(v, "persp") == 0) { + view_mode = VIEW_MODE_AGENT_PERSP; + } else if (strcmp(v, "zoom_out") == 0 || strcmp(v, "full_map") == 0) { + view_mode = VIEW_MODE_SIM_STATE_ZOOMED_OUT; + } else { + fprintf(stderr, "Error: --view must be 'sim_state', 'bev', 'persp', or 'zoom_out'\n"); + return 1; + } + } else { + fprintf(stderr, "Error: --view requires a value (sim_state/bev/persp/zoom_out)\n"); + return 1; } + } else if (strcmp(argv[i], "--no-traces") == 0) { + draw_traces = 0; } } // performance_test(); - demo(map_name, policy_name, show_grid, obs_only, lasers, show_human_logs, frame_skip, view_mode, output_topdown, - output_agent, num_maps, zoom_in); + demo(map_name, policy_name, view_mode, draw_traces); // test_drivenet(); return 0; } diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 61722f483..33b8719ca 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -21,6 +21,8 @@ #define VIEW_MODE_SIM_STATE 0 #define VIEW_MODE_BEV_AGENT_OBS 1 #define VIEW_MODE_AGENT_PERSP 2 +#define VIEW_MODE_SIM_STATE_ZOOMED_OUT \ + 3 // Full map, camera centered on map bbox (matches old visualize.c zoom_in=false) // // Order of entities in rendering (lower is rendered first) // #define Z_ROAD_SURFACE 0.0f @@ -1091,6 +1093,24 @@ void load_map_binary(const char *filename, Drive *env) { if (!file) return; + // Populate scenario_id from the filename stem (e.g. "/path/to/abc123def456.bin" → "abc123def456") + // This is the string used by make_client to name the output mp4. + { + const char *base = filename; + // Find last '/' or '\\' + for (const char *p = filename; *p; p++) { + if (*p == '/' || *p == '\\') + base = p + 1; + } + // Copy up to SCENARIO_ID_STR_LENGTH chars, stopping at '.' or end + int i = 0; + while (i < SCENARIO_ID_STR_LENGTH - 1 && base[i] && base[i] != '.') { + env->scenario_id[i] = base[i]; + i++; + } + env->scenario_id[i] = '\0'; + } + // Read sdc_track_index fread(&env->sdc_track_index, sizeof(int), 1, file); @@ -2082,7 +2102,6 @@ int spawn_active_agents(Drive *env, int num_agents_to_create) { void spawn_agents_with_counts(Drive *env) { // Currently only creates active agents int num_agents_to_create = env->num_agents; - int successfully_created = spawn_active_agents(env, num_agents_to_create); env->num_created_agents = successfully_created; @@ -4337,6 +4356,45 @@ void c_render(Drive *env, int view_mode, int draw_traces) { draw_scene(env, client, 1, 0, 0, 0); + } else if (view_mode == VIEW_MODE_SIM_STATE_ZOOMED_OUT) { + // Orthographic bird's-eye view showing the full map, camera centered on the + // map bounding box — mirrors the old visualize.c renderTopDownView zoom_in=false path. + camera.position = (Vector3){env->grid_map->top_left_x, env->grid_map->bottom_right_y, 500.0f}; + camera.target = (Vector3){env->grid_map->top_left_x, env->grid_map->bottom_right_y, 0.0f}; + camera.up = (Vector3){0.0f, -1.0f, 0.0f}; + camera.projection = CAMERA_ORTHOGRAPHIC; + camera.fovy = map_height * 2.0f; // 2× height shows the full map + + BeginDrawing(); + ClearBackground(ROAD_COLOR); + BeginMode3D(camera); + + if (draw_traces) { + for (int i = 0; i < env->active_agent_count; i++) { + int idx = env->active_agent_indices[i]; + for (int t = env->init_steps; t < env->episode_length; t++) { + Color agent_color = LIGHTBLUE; + if (env->agents[idx].type == PEDESTRIAN) + agent_color = LIGHT_ORANGE; + else if (env->agents[idx].type == CYCLIST) + agent_color = LIGHT_PURPLE; + DrawSphere((Vector3){env->agents[idx].log_trajectory_x[t], env->agents[idx].log_trajectory_y[t], + env->agents[idx].log_trajectory_z[t]}, + 0.15f, agent_color); + } + } + for (int i = 0; i < env->expert_static_agent_count; i++) { + int idx = env->expert_static_agent_indices[i]; + for (int t = env->init_steps; t < env->episode_length; t++) { + DrawSphere((Vector3){env->agents[idx].log_trajectory_x[t], env->agents[idx].log_trajectory_y[t], + env->agents[idx].log_trajectory_z[t]}, + 0.15f, EXPERT_REPLAY); + } + } + } + + draw_scene(env, client, 1, 0, 0, 0); + } else if (view_mode == VIEW_MODE_BEV_AGENT_OBS) { // Orthographic bird's-eye view centered on the selected agent, // showing only that agent's observations diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py index 539db8b81..94d2b8b6b 100644 --- a/pufferlib/ocean/drive/drive.py +++ b/pufferlib/ocean/drive/drive.py @@ -11,9 +11,10 @@ class RenderView(IntEnum): - FULL_SIM_STATE = 0 # Orthographic top-down, fully observable simulator state + FULL_SIM_STATE = 0 # Orthographic top-down, fully observable simulator state (zoomed in, origin-centered) BEV_AGENT_OBS = 1 # Orthographic top-down, only show what the selected agent can observe AGENT_PERSP = 2 # Third-person perspective following selected agent + FULL_SIM_STATE_ZOOMED_OUT = 3 # Orthographic top-down, full map bbox view (mirrors old visualize.c zoom_in=false) class Drive(pufferlib.PufferEnv): diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h index 0fdb48428..a8db5d955 100644 --- a/pufferlib/ocean/env_binding.h +++ b/pufferlib/ocean/env_binding.h @@ -197,7 +197,7 @@ static PyObject *env_reset(PyObject *self, PyObject *args) { static PyObject *env_step(PyObject *self, PyObject *args) { int num_args = PyTuple_Size(args); if (num_args != 1) { - PyErr_SetString(PyExc_TypeError, "vec_render requires 1 argument"); + PyErr_SetString(PyExc_TypeError, "env_step requires 1 argument"); return NULL; } diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 424baf4f6..3d3d3bb52 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -15,6 +15,7 @@ import random import shutil import subprocess +import tempfile import argparse import importlib import configparser @@ -37,6 +38,9 @@ import pufferlib.pytorch import pufferlib.utils +from pufferlib.ocean.benchmark.evaluator import Evaluator + + try: from pufferlib import _C except ImportError: @@ -55,7 +59,6 @@ import signal # Aggressively exit on ctrl+c import multiprocessing -import queue signal.signal(signal.SIGINT, lambda sig, frame: os._exit(0)) @@ -64,7 +67,8 @@ class PuffeRL: - def __init__(self, config, vecenv, policy, logger=None): + def __init__(self, config, vecenv, policy, logger=None, full_args=None): + self.full_args = full_args # Backend perf optimization torch.set_float32_matmul_precision("high") torch.backends.cudnn.deterministic = config["torch_deterministic"] @@ -124,17 +128,6 @@ def __init__(self, config, vecenv, policy, logger=None): self.ep_lengths = torch.zeros(total_agents, device=device, dtype=torch.int32) self.ep_indices = torch.arange(total_agents, device=device, dtype=torch.int32) self.free_idx = total_agents - self.render = config["render"] - self.render_async = config["render_async"] - self.render_interval = config["render_interval"] - - safe_eval_renders = config.get("safe_eval", {}).get("enabled", False) - if self.render or safe_eval_renders: - ensure_drive_binary() - - if self.render_async: - self.render_queue = multiprocessing.Queue() - self.render_processes = [] # LSTM if config["use_rnn"]: @@ -203,9 +196,6 @@ def __init__(self, config, vecenv, policy, logger=None): self.logger = logger if logger is None: self.logger = NoLogger(config) - if self.render_async and hasattr(self.logger, "wandb") and self.logger.wandb: - self.logger.wandb.define_metric("render_step", hidden=True) - self.logger.wandb.define_metric("render/*", step_metric="render_step") # Learning rate scheduler epochs = config["total_timesteps"] // config["batch_size"] @@ -516,124 +506,38 @@ def train(self): self.save_checkpoint() self.msg = f"Checkpoint saved at update {self.epoch}" - if self.render and self.epoch % self.render_interval == 0: - model_dir = os.path.join(self.config["data_dir"], f"{self.config['env']}_{self.logger.run_id}") - model_files = glob.glob(os.path.join(model_dir, "model_*.pt")) - - if model_files: - # Take the latest checkpoint - latest_cpt = max(model_files, key=os.path.getctime) - bin_path = f"{model_dir}.bin" - - # Export to .bin for rendering with raylib - try: - export_args = {"env_name": self.config["env"], "load_model_path": latest_cpt, **self.config} - - export( - args=export_args, - env_name=self.config["env"], - vecenv=self.vecenv, - policy=self.uncompiled_policy, - path=bin_path, - silent=True, - ) - - bin_path_epoch = f"{model_dir}_epoch_{self.epoch:06d}.bin" - shutil.copy2(bin_path, bin_path_epoch) - - driver_env = getattr(self.vecenv, "driver_env", None) - render_ini = pufferlib.utils.generate_env_ini( - self.config.get("env_config", {}), prefix="render_" - ) - self._render_videos( - bin_path=bin_path_epoch, - num_maps=getattr(driver_env, "num_maps", None), - map_dir=getattr(driver_env, "map_dir", None), - wandb_prefix="render", - config_path=render_ini, - cleanup_files=[bin_path_epoch, bin_path, render_ini], - ) - - except Exception as e: - print(f"Failed to export model weights: {e}") - - if self.config["eval"]["wosac_realism_eval"] and ( - (self.epoch - 1) % self.config["eval"]["eval_interval"] == 0 or done_training - ): - pufferlib.utils.run_wosac_eval_in_subprocess(self.config, self.logger, self.global_step) + if self.epoch % self.config["eval"]["eval_interval"] == 0 or done_training: + human_replay_eval = self.config["eval"]["human_replay_eval"] + self_play_eval = self.config["eval"]["self_play_eval"] + safe_eval = self.config["eval"]["safe_eval"] + + self.evaluator = Evaluator(self.full_args, self.logger) + if human_replay_eval: + self.evaluator.hr_env = load_env("puffer_drive", self.evaluator.hr_eval_config) + self.evaluator.rollout(self.uncompiled_policy, mode="human_replay") + self.evaluator.hr_env.close() + self.evaluator.log_videos(eval_mode="human_replay", epoch=self.epoch) + if self_play_eval: + self.evaluator.sp_env = load_env("puffer_drive", self.evaluator.sp_eval_config) + self.evaluator.rollout(self.uncompiled_policy, mode="self_play") + self.evaluator.sp_env.close() + self.evaluator.log_videos(eval_mode="self_play", epoch=self.epoch) + if safe_eval: + self.evaluator.safe_env = load_env("puffer_drive", self.evaluator.safe_eval_config) + self.evaluator.rollout(self.uncompiled_policy, mode="safe_eval") + self.evaluator.sp_env.close() + self.evaluator.log_videos(eval_mode="safe_eval", epoch=self.epoch) + if human_replay_eval or self_play_eval or safe_eval: + self.evaluator.log_stats() + + if self.config["eval"]["wosac_realism_eval"]: + pufferlib.utils.run_wosac_eval_in_subprocess(self.config, self.logger, self.global_step) if self.config["eval"]["human_replay_eval"] and ( (self.epoch - 1) % self.config["eval"]["eval_interval"] == 0 or done_training ): pufferlib.utils.run_human_replay_eval_in_subprocess(self.config, self.logger, self.global_step) - safe_eval_config = self.config.get("safe_eval", {}) - safe_eval_enabled = safe_eval_config.get("enabled", False) - safe_eval_interval = int(safe_eval_config.get("interval", self.render_interval)) - is_main = not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0 - if ( - is_main - and safe_eval_enabled - and safe_eval_interval > 0 - and (self.epoch % safe_eval_interval == 0 or done_training) - ): - self._run_safe_eval() - - def _render_videos( - self, - bin_path, - num_maps=None, - map_dir=None, - wandb_prefix="render", - config_path=None, - cleanup_files=None, - ): - """Render videos, either async (background process) or sync (blocking).""" - wandb_log = hasattr(self.logger, "wandb") and self.logger.wandb is not None - render_kwargs = dict( - config=self.config, - run_id=self.logger.run_id, - num_maps=num_maps, - map_dir=map_dir, - wandb_log=wandb_log, - epoch=self.epoch, - global_step=self.global_step, - bin_path=bin_path, - config_path=config_path, - wandb_prefix=wandb_prefix, - ) - - if self.render_async: - self._reap_render_processes() - max_processes = 3 - if len(self.render_processes) >= max_processes: - print(f"Waiting for render processes to finish ({len(self.render_processes)}/{max_processes})...") - while len(self.render_processes) >= max_processes: - time.sleep(1) - self._reap_render_processes() - - render_proc = multiprocessing.Process( - target=pufferlib.utils.render_videos_and_cleanup, - kwargs={ - **render_kwargs, - "cleanup_files": cleanup_files or [], - "render_async": True, - "render_queue": self.render_queue, - }, - daemon=True, - ) - render_proc.start() - self.render_processes.append(render_proc) - else: - pufferlib.utils.render_videos( - **render_kwargs, - render_async=False, - wandb_run=self.logger.wandb if wandb_log else None, - ) - for f in cleanup_files or []: - if os.path.exists(f): - os.remove(f) - def _run_safe_eval(self): """Run safe eval in-process using SafeEvaluator, then render videos.""" import copy @@ -709,56 +613,7 @@ def _run_safe_eval(self): except OSError: pass - def _reap_render_processes(self): - """Remove finished render processes from the tracking list.""" - if not self.render_async: - return - alive = [] - for p in self.render_processes: - if p.is_alive(): - alive.append(p) - else: - p.join(timeout=1) - self.render_processes = alive - - def check_render_queue(self): - """Check if any async render jobs finished and log them.""" - if not self.render_async: - return - self._reap_render_processes() - - try: - while True: - try: - result = self.render_queue.get_nowait() - except queue.Empty: - break - - step = result["step"] - videos = result["videos"] - prefix = result.get("prefix", "render") - - if hasattr(self.logger, "wandb") and self.logger.wandb: - import wandb - - payload = {} - if videos["output_topdown"]: - payload[f"{prefix}/world_state"] = [ - wandb.Video(p, format="mp4") for p in videos["output_topdown"] - ] - if videos["output_agent"]: - payload[f"{prefix}/agent_view"] = [wandb.Video(p, format="mp4") for p in videos["output_agent"]] - - if payload: - payload["train_step"] = step - self.logger.wandb.log(payload) - except Exception as e: - print(f"Error reading render queue: {e}") - def mean_and_log(self): - # Check render queue for finished async jobs - self.check_render_queue() - config = self.config for k in list(self.stats.keys()): v = self.stats[k] @@ -795,22 +650,6 @@ def close(self): self.vecenv.close() self.utilization.stop() - if self.render_async: - # Drain any remaining async render results before closing - self.check_render_queue() - for p in self.render_processes: - try: - if p.is_alive(): - p.terminate() - p.join(timeout=5) - if p.is_alive(): - p.kill() - except Exception: - print(f"Failed to terminate render process {p.pid}") - self.render_processes = [] - self.render_queue.close() - self.render_queue.join_thread() - model_path = self.save_checkpoint() run_id = self.logger.run_id project_name = "puffer_drive" @@ -1274,7 +1113,7 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None): ) if "vec" in args and "num_workers" in args["vec"]: train_config["num_workers"] = args["vec"]["num_workers"] - pufferl = PuffeRL(train_config, vecenv, policy, logger) + pufferl = PuffeRL(train_config, vecenv, policy, logger, full_args=args) all_logs = [] while pufferl.global_step < train_config["total_timesteps"]: @@ -1365,38 +1204,6 @@ def eval(env_name, args=None, vecenv=None, policy=None): vecenv.close() return results_dict - elif human_replay_enabled: - args["env"]["map_dir"] = args["eval"]["map_dir"] - dataset_name = args["env"]["map_dir"].split("/")[-1] - print(f"Running human replay evaluation with {dataset_name} dataset.\n") - from pufferlib.ocean.benchmark.evaluator import HumanReplayEvaluator - - backend = args["eval"].get("backend", "PufferEnv") - args["env"]["map_dir"] = args["eval"]["map_dir"] - args["env"]["num_agents"] = args["eval"]["human_replay_num_agents"] - - args["vec"] = dict(backend=backend, num_envs=1) - args["env"]["control_mode"] = args["eval"]["human_replay_control_mode"] - args["env"]["episode_length"] = 91 # WOMD scenario length - - vecenv = vecenv or load_env(env_name, args) - policy = policy or load_policy(args, vecenv, env_name) - - print(f"Effective number of scenarios used: {len(vecenv.driver_env.agent_offsets) - 1}") - - evaluator = HumanReplayEvaluator(args) - - # Run rollouts with human replays - results = evaluator.rollout(args, vecenv, policy) - - import json - - print("HUMAN_REPLAY_METRICS_START") - print(json.dumps(results)) - print("HUMAN_REPLAY_METRICS_END") - - return results - else: # Standard evaluation: Render backend = args["vec"]["backend"] if backend != "PufferEnv": @@ -1418,23 +1225,12 @@ def eval(env_name, args=None, vecenv=None, policy=None): lstm_c=torch.zeros(num_agents, policy.hidden_size, device=device), ) - frames = [] + if driver.render_mode == 1: + max_frames = 91 + frame_count = 0 + while True: - render = driver.render() - if len(frames) < args["save_frames"]: - frames.append(render) - - # Screenshot Ocean envs with F12, gifs with control + F12 - if driver.render_mode == "ansi": - print("\033[0;0H" + render + "\n") - time.sleep(1 / args["fps"]) - elif driver.render_mode == "rgb_array": - pass - # import cv2 - # render = cv2.cvtColor(render, cv2.COLOR_RGB2BGR) - # cv2.imshow('frame', render) - # cv2.waitKey(1) - # time.sleep(1/args['fps']) + driver.render() with torch.no_grad(): ob = torch.as_tensor(ob).to(device) @@ -1445,13 +1241,14 @@ def eval(env_name, args=None, vecenv=None, policy=None): if isinstance(logits, torch.distributions.Normal): action = np.clip(action, vecenv.action_space.low, vecenv.action_space.high) - ob = vecenv.step(action)[0] + ob, reward, done, truncated, info = vecenv.step(action) - if len(frames) > 0 and len(frames) == args["save_frames"]: - import imageio + if driver.render_mode == 1: + frame_count += 1 + if frame_count >= max_frames or done.all() or truncated.all(): + break - imageio.mimsave(args["gif_path"], frames, fps=args["fps"], loop=0) - frames.append("Done") + vecenv.close() def sweep(args=None, env_name=None): @@ -1641,27 +1438,6 @@ def export(args=None, env_name=None, vecenv=None, policy=None, path=None, silent print(f"Saved {len(weights)} weights to {path}") -def ensure_drive_binary(): - """Delete existing visualize binary and rebuild it. This ensures the - binary is always up-to-date with the latest code changes. - """ - if os.path.exists("./visualize"): - os.remove("./visualize") - - try: - result = subprocess.run( - ["bash", "scripts/build_ocean.sh", "visualize", "fast"], capture_output=True, text=True, timeout=300 - ) - - if result.returncode != 0: - print(f"Build failed: {result.stderr}") - raise RuntimeError("Failed to build visualize binary for rendering") - except subprocess.TimeoutExpired: - raise RuntimeError("Build timed out") - except Exception as e: - raise RuntimeError(f"Build error: {e}") - - def autotune(args=None, env_name=None, vecenv=None, policy=None): package = args["package"] module_name = "pufferlib.ocean" if package == "ocean" else f"pufferlib.environments.{package}" @@ -1731,6 +1507,7 @@ def load_config(env_name, config_dir=None): add_help=False, ) parser.add_argument("--load-model-path", type=str, default=None, help="Path to a pretrained checkpoint") + parser.add_argument("--export-path", type=str, default=None, help="Output path for puffer export (.bin file)") parser.add_argument( "--load-id", type=str, default=None, help="Kickstart/eval from from a finished Wandb/Neptune run" ) @@ -1806,85 +1583,151 @@ def puffer_type(value): def render(env_name, args=None): + """Render rollouts for a batch of maps using the in-process c_render pipeline. + + Each map is loaded as a separate environment with render_mode=1 (headless + ffmpeg). A policy rollout is run for max_frames steps, producing one + {scenario_id}.mp4 per env in the current working directory. The files are + then moved into output_dir. + """ + from pufferlib.ocean.drive.drive import RenderView + args = args or load_config(env_name) render_configs = args.get("render", {}) - # Renders first num_maps from map_dir using visualize binary try: map_dir = render_configs["map_dir"] num_maps = render_configs.get("num_maps", 1) - view_mode = render_configs["view_mode"] - render_policy_path = render_configs["policy_path"] - overwork = render_configs.get("overwork", False) - num_workers = args["vec"]["num_workers"] + view_mode_str = str(render_configs.get("view_mode", "sim_state")).lower().strip('"').strip("'") + draw_traces = render_configs.get("draw_traces", True) + max_frames = render_configs.get("max_frames", 91) output_dir = render_configs["output_dir"] + # Allow [render] to override [env] init/control modes — important for logged-trajectory + # maps (e.g. carla_2D) which need create_all_valid + control_vehicles, not the training + # init_variable_agent_number mode that spawns agents on road lanes. + render_init_mode = ( + str(render_configs["init_mode"]).strip('"').strip("'") if "init_mode" in render_configs else None + ) + render_control_mode = ( + str(render_configs["control_mode"]).strip('"').strip("'") if "control_mode" in render_configs else None + ) except KeyError as e: raise pufferlib.APIUsageError(f"Missing render config: {e}") - cpu_cores = psutil.cpu_count(logical=False) - if num_workers > cpu_cores and not overwork: - raise pufferlib.APIUsageError( - " ".join( - [ - f"num_workers ({num_workers}) > hardware cores ({cpu_cores}) is disallowed by default.", - "PufferLib multiprocessing is heavily optimized for 1 process per hardware core.", - "If you really want to do this, set overwork=True (--vec-overwork in our demo.py).", - ] - ) - ) + # Map config string → RenderView enum + _VIEW_MODE_MAP = { + "sim_state": RenderView.FULL_SIM_STATE, + "topdown": RenderView.FULL_SIM_STATE, # backward compat + "bev": RenderView.BEV_AGENT_OBS, + "agent": RenderView.BEV_AGENT_OBS, # backward compat + "persp": RenderView.AGENT_PERSP, + "zoom_out": RenderView.FULL_SIM_STATE_ZOOMED_OUT, + "full_map": RenderView.FULL_SIM_STATE_ZOOMED_OUT, # backward compat alias + } + view_mode = _VIEW_MODE_MAP.get(view_mode_str) + if view_mode is None: + raise pufferlib.APIUsageError(f"Unknown view_mode '{view_mode_str}'. Choose from: sim_state, bev, persp") + + bin_files = sorted(f for f in os.listdir(map_dir) if f.endswith(".bin")) + if num_maps > len(bin_files): + num_maps = len(bin_files) + render_maps = [os.path.join(map_dir, f) for f in bin_files[:num_maps]] - if num_maps > len(os.listdir(map_dir)): - num_maps = len(os.listdir(map_dir)) - - render_maps = [os.path.join(map_dir, f) for f in sorted(os.listdir(map_dir)) if f.endswith(".bin")][:num_maps] os.makedirs(output_dir, exist_ok=True) - # Rebuild visualize binary - ensure_drive_binary() + # Fall back to CPU if the configured device is unavailable (e.g. no CUDA on this machine) + configured_device = args["train"]["device"] + if configured_device == "cuda" and not torch.cuda.is_available(): + print("Warning: CUDA not available, falling back to CPU for render.") + configured_device = "cpu" + args["train"]["device"] = configured_device + device = configured_device + + def render_one_map(map_path): + """Render a single map file and move the resulting mp4(s) to output_dir. + + The map binary is symlinked into a private temp directory so the C + loader sees exactly one .bin file (num_maps=1). After env.close() + finalizes the ffmpeg pipe, we glob for new *.mp4 files in cwd and + move them — no fragile scenario_id filename prediction needed. + """ + map_name = os.path.splitext(os.path.basename(map_path))[0] + + with tempfile.TemporaryDirectory() as tmp_map_dir: + # Hard-link (or copy) the single .bin into an isolated dir + tmp_bin = os.path.join(tmp_map_dir, os.path.basename(map_path)) + shutil.copy2(map_path, tmp_bin) + + env_overrides = { + **args["env"], + "num_maps": 1, + "map_dir": tmp_map_dir, + "render_mode": 1, # headless ffmpeg → writes {scenario_id}.mp4 in cwd + } + # Override init/control modes if [render] section specifies them. + # carla_2D logged-trajectory maps need create_all_valid + control_vehicles; + # the training default (init_variable_agent_number) spawns no agents on these maps. + if render_init_mode is not None: + env_overrides["init_mode"] = render_init_mode + if render_control_mode is not None: + env_overrides["control_mode"] = render_control_mode + + map_args = { + **args, + "env": env_overrides, + "vec": {"backend": "Serial", "num_envs": 1}, + } + + env = load_env(env_name, map_args) + policy = load_policy(map_args, env, env_name) + policy.eval() + + ob, _ = env.reset() + driver = env.driver_env + + state = {} + if map_args["train"]["use_rnn"]: + n = env.agents_per_batch if hasattr(env, "agents_per_batch") else ob.shape[0] + state = dict( + lstm_h=torch.zeros(n, policy.hidden_size, device=device), + lstm_c=torch.zeros(n, policy.hidden_size, device=device), + ) + + # Snapshot which mp4s exist before this rollout + before = set(glob.glob(os.path.join(os.getcwd(), "*.mp4"))) - def render_task(map_path): - base_cmd = ( - ["./visualize"] - if sys.platform == "darwin" - else ["xvfb-run", "-a", "-s", "-screen 0 1280x720x24", "./visualize"] - ) - cmd = base_cmd.copy() - cmd.extend(["--map-name", map_path]) - if render_configs.get("show_grid", False): - cmd.append("--show-grid") - if render_configs.get("obs_only", False): - cmd.append("--obs-only") - if render_configs.get("show_lasers", False): - cmd.append("--lasers") - if render_configs.get("show_human_logs", False): - cmd.append("--show-human-logs") - if render_configs.get("zoom_in", False): - cmd.append("--zoom-in") - cmd.extend(["--view", view_mode]) - if render_policy_path is not None: - cmd.extend(["--policy-name", render_policy_path]) - - map_name = os.path.basename(map_path).replace(".bin", "") - - if view_mode == "topdown" or view_mode == "both": - cmd.extend(["--output-topdown", os.path.join(output_dir, f"topdown_{map_name}.mp4")]) - if view_mode == "agent" or view_mode == "both": - cmd.extend(["--output-agent", os.path.join(output_dir, f"agent_{map_name}.mp4")]) - - env_vars = os.environ.copy() - env_vars["ASAN_OPTIONS"] = "exitcode=0" - try: - result = subprocess.run(cmd, cwd=os.getcwd(), capture_output=True, text=True, timeout=600, env=env_vars) - if result.returncode != 0: - print(f"Error rendering {map_name}: {result.stderr}") - except subprocess.TimeoutExpired: - print(f"Timeout rendering {map_name}: exceeded 600 seconds") + for _ in range(max_frames): + driver.render(view_mode=view_mode, draw_traces=draw_traces) + + with torch.no_grad(): + ob_t = torch.as_tensor(ob).to(device) + logits, _ = policy.forward_eval(ob_t, state) + action, _, _ = pufferlib.pytorch.sample_logits(logits) + action = action.cpu().numpy().reshape(env.action_space.shape) + + ob, _, done, truncated, _ = env.step(action) + if done.all() or truncated.all(): + break + + # env.close() finalizes the ffmpeg pipe and flushes the mp4 to disk + env.close() + + # Move any new mp4s that appeared since before the rollout + after = set(glob.glob(os.path.join(os.getcwd(), "*.mp4"))) + new_mp4s = after - before + if new_mp4s: + for src in sorted(new_mp4s): + dst = os.path.join(output_dir, os.path.basename(src)) + shutil.move(src, dst) + print(f" Saved {dst}") + else: + print(f" Warning: no mp4 produced for map {map_name}") if render_maps: - print(f"Rendering {len(render_maps)} from {map_dir} with {num_workers} workers...") - with ThreadPool(num_workers) as pool: - pool.map(render_task, render_maps) - print(f"Finished rendering videos to {output_dir}") + print(f"Rendering {len(render_maps)} map(s) from {map_dir} → {output_dir} ...") + for map_path in render_maps: + render_one_map(map_path) + print(f"Done. Videos written to {output_dir}") def main(): @@ -1907,7 +1750,8 @@ def main(): elif mode == "profile": profile(env_name=env_name) elif mode == "export": - export(env_name=env_name) + args = load_config(env_name) + export(env_name=env_name, args=args, path=args.get("export_path")) elif mode == "sanity": sanity(env_name=env_name) elif mode == "render": diff --git a/pufferlib/utils.py b/pufferlib/utils.py index 9eda5827e..9c6c5185c 100644 --- a/pufferlib/utils.py +++ b/pufferlib/utils.py @@ -62,73 +62,6 @@ def generate_safe_eval_ini(safe_eval_config, base_ini_path="pufferlib/config/oce return tmp_path -def run_human_replay_eval_in_subprocess(config, logger, global_step): - """ - Run human replay evaluation in a subprocess and log metrics to wandb. - - """ - try: - run_id = logger.run_id - model_dir = os.path.join(config["data_dir"], f"{config['env']}_{run_id}") - model_files = glob.glob(os.path.join(model_dir, "model_*.pt")) - - if not model_files: - print("No model files found for human replay evaluation") - return - - latest_cpt = max(model_files, key=os.path.getctime) - - # Prepare evaluation command - eval_config = config["eval"] - cmd = [ - sys.executable, - "-m", - "pufferlib.pufferl", - "eval", - config["env"], - "--load-model-path", - latest_cpt, - "--eval.wosac-realism-eval", - "False", - "--eval.human-replay-eval", - "True", - "--eval.human-replay-num-agents", - str(eval_config["human_replay_num_agents"]), - "--eval.human-replay-control-mode", - str(eval_config["human_replay_control_mode"]), - ] - - # Run human replay evaluation in subprocess - result = subprocess.run(cmd, capture_output=True, text=True, timeout=600, cwd=os.getcwd()) - - if result.returncode == 0: - # Extract JSON from stdout between markers - stdout = result.stdout - if "HUMAN_REPLAY_METRICS_START" in stdout and "HUMAN_REPLAY_METRICS_END" in stdout: - start = stdout.find("HUMAN_REPLAY_METRICS_START") + len("HUMAN_REPLAY_METRICS_START") - end = stdout.find("HUMAN_REPLAY_METRICS_END") - json_str = stdout[start:end].strip() - human_replay_metrics = json.loads(json_str) - - # Log to wandb if available - if hasattr(logger, "wandb") and logger.wandb: - logger.wandb.log( - { - "eval/human_replay_collision_rate": human_replay_metrics["collision_rate"], - "eval/human_replay_offroad_rate": human_replay_metrics["offroad_rate"], - "eval/human_replay_completion_rate": human_replay_metrics["completion_rate"], - }, - step=global_step, - ) - else: - print(f"Human replay evaluation failed with exit code {result.returncode}: {result.stderr}") - - except subprocess.TimeoutExpired: - print("Human replay evaluation timed out") - except Exception as e: - print(f"Failed to run human replay evaluation: {e}") - - def run_wosac_eval_in_subprocess(config, logger, global_step): """ Run WOSAC evaluation in a subprocess and log metrics to wandb. @@ -225,178 +158,3 @@ def run_wosac_eval_in_subprocess(config, logger, global_step): print(f"WOSAC evaluation ran out of memory. Skipping this evaluation: {e}") except Exception as e: print(f"Failed to run WOSAC evaluation: {type(e).__name__}: {e}") - - -def render_videos( - config, - run_id, - wandb_log, - epoch, - global_step, - bin_path, - render_async, - render_queue=None, - wandb_run=None, - config_path=None, - wandb_prefix="render", - num_maps=None, - map_dir=None, -): - """Generate and log training videos using C-based rendering.""" - if not os.path.exists(bin_path): - print(f"Binary weights file does not exist: {bin_path}") - return - - model_dir = os.path.join(config["data_dir"], f"{config['env']}_{run_id}") - - # Now call the C rendering function - try: - # Create output directory for videos - video_output_dir = os.path.join(model_dir, "videos") - os.makedirs(video_output_dir, exist_ok=True) - - # TODO: Fix memory leaks so that this is not needed - # Suppress AddressSanitizer exit code (temp) - env_vars = os.environ.copy() - env_vars["ASAN_OPTIONS"] = "exitcode=0" - - # Base command with only visualization flags (env config comes from INI) - base_cmd = ["xvfb-run", "-a", "-s", "-screen 0 1280x720x24", "./visualize"] - - if config_path: - base_cmd.extend(["--config", config_path]) - - # Visualization config flags only - if config.get("show_grid", False): - base_cmd.append("--show-grid") - if config.get("obs_only", False): - base_cmd.append("--obs-only") - if config.get("show_lasers", False): - base_cmd.append("--lasers") - if config.get("show_human_logs", False): - base_cmd.append("--log-trajectories") - if config.get("zoom_in", False): - base_cmd.append("--zoom-in") - - # Frame skip for rendering performance - frame_skip = config.get("frame_skip", 1) - if frame_skip > 1: - base_cmd.extend(["--frame-skip", str(frame_skip)]) - - # View mode - view_mode = config.get("view_mode", "both") - base_cmd.extend(["--view", view_mode]) - - if num_maps: - base_cmd.extend(["--num-maps", str(num_maps)]) - - base_cmd.extend(["--policy-name", bin_path]) - - # Handle single or multiple map rendering - render_maps = config.get("render_map", None) - if render_maps is None or render_maps == "none": - pass # use map_dir passed as parameter - if map_dir and os.path.isdir(map_dir): - bin_files = [f for f in os.listdir(map_dir) if f.endswith(".bin")] - if bin_files: - render_maps = [os.path.join(map_dir, random.choice(bin_files))] - else: - print(f"Warning: No .bin files found in {map_dir}, skipping render") - return - else: - print(f"Warning: map_dir not found or invalid ({map_dir}), skipping render") - return - elif isinstance(render_maps, (str, os.PathLike)): - render_maps = [render_maps] - else: - render_maps = list(render_maps) - - generated_videos = {"output_topdown": [], "output_agent": []} - output_topdown = f"resources/drive/{wandb_prefix}_output_topdown_{epoch}" - output_agent = f"resources/drive/{wandb_prefix}_output_agent_{epoch}" - - for i, map_path in enumerate(render_maps): - cmd = list(base_cmd) # copy - if os.path.exists(map_path): - cmd.extend(["--map-name", str(map_path)]) - - output_topdown_map = output_topdown + (f"_map{i:02d}.mp4" if len(render_maps) > 1 else ".mp4") - output_agent_map = output_agent + (f"_map{i:02d}.mp4" if len(render_maps) > 1 else ".mp4") - - cmd.extend(["--output-topdown", output_topdown_map]) - cmd.extend(["--output-agent", output_agent_map]) - - print(f"Running render: {' '.join(cmd[:6])}...") - result = subprocess.run(cmd, cwd=os.getcwd(), capture_output=True, text=True, timeout=1200, env=env_vars) - - vids_exist = os.path.exists(output_topdown_map) and os.path.exists(output_agent_map) - print(f"Render exit code: {result.returncode}, vids_exist: {vids_exist}") - if result.returncode != 0 and result.stderr: - print(f"Render stderr: {result.stderr[-500:]}") - - if result.returncode == 0 or (result.returncode == 1 and vids_exist): - videos = [ - ("output_topdown", output_topdown_map, f"epoch_{epoch:06d}_map{i:02d}_topdown.mp4"), - ("output_agent", output_agent_map, f"epoch_{epoch:06d}_map{i:02d}_agent.mp4"), - ] - - for vid_type, source_vid, target_filename in videos: - if os.path.exists(source_vid): - target_path = os.path.join(video_output_dir, target_filename) - shutil.move(source_vid, target_path) - generated_videos[vid_type].append(target_path) - else: - print(f"Video generation completed but {source_vid} not found") - if result.stdout: - print(f"StdOUT: {result.stdout}") - if result.stderr: - print(f"StdERR: {result.stderr}") - else: - print(f"C rendering failed (map index {i}) with exit code {result.returncode}: {result.stderr}") - - if render_async: - render_queue.put( - { - "videos": generated_videos, - "step": global_step, - "prefix": wandb_prefix, - } - ) - elif wandb_log and wandb_run: - import wandb - - payload = {} - if generated_videos["output_topdown"]: - payload[f"{wandb_prefix}/world_state"] = [ - wandb.Video(p, format="mp4") for p in generated_videos["output_topdown"] - ] - if generated_videos["output_agent"]: - payload[f"{wandb_prefix}/agent_view"] = [ - wandb.Video(p, format="mp4") for p in generated_videos["output_agent"] - ] - if payload: - print(f"Logging {len(payload)} video keys to wandb: {list(payload.keys())}") - payload["train_step"] = global_step - wandb_run.log(payload) - - except subprocess.TimeoutExpired: - print("C rendering timed out") - except Exception as e: - print(f"Failed to render videos: {e}") - - -def render_videos_and_cleanup(cleanup_files=None, **render_kwargs): - """Wrapper that runs render_videos then cleans up temp files. - - Intended as the target for multiprocessing.Process so that temp files - (bin weights, generated INI) are cleaned up inside the spawned process. - """ - try: - render_videos(**render_kwargs) - finally: - for f in cleanup_files or []: - try: - if os.path.exists(f): - os.remove(f) - except OSError: - pass From 33b4c93287aec0cd0d7931efc6764c244cee82e0 Mon Sep 17 00:00:00 2001 From: Aditya Gupta Date: Wed, 25 Mar 2026 16:32:39 -0400 Subject: [PATCH 04/23] Fixing bug with top-down render --- pufferlib/config/ocean/drive.ini | 8 ++--- pufferlib/ocean/drive/drive.c | 2 -- pufferlib/ocean/drive/drive.h | 50 +++----------------------------- pufferlib/ocean/drive/drive.py | 1 - pufferlib/pufferl.py | 8 +++-- 5 files changed, 14 insertions(+), 55 deletions(-) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index bc78f4bcb..11f9cd793 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -65,7 +65,7 @@ control_mode = "control_agents" ; Options: "create_all_valid", "create_only_controlled", "init_variable_agent_number"(creates random number of controlled agents per env) init_mode = "init_variable_agent_number" ; Below options only valid for "init_variable_agent_number" init_mode -min_agents_per_env = 1 +min_agents_per_env = 31 max_agents_per_env = 32 ; Dimension Ranges for agents spawn_width_min = 1.5 @@ -272,14 +272,14 @@ acc = 1.0 [render] ; Render a batch of maps offline using the in-process c_render (ffmpeg) pipeline. ; Path to the .bin map directory -map_dir = "resources/drive/binaries/carla_2D" +map_dir = "resources/drive/binaries/carla/carla_3D" ; Directory to write output mp4 files output_dir = "resources/drive/render_videos" ; Number of maps to render (capped at files available in map_dir) -num_maps = 100 +num_maps = 3 ; View mode: sim_state (top-down, origin-centered), zoom_out (top-down, full map bbox), bev (agent BEV obs), persp (third-person follow-cam) ; NOTE: "persp" and "bev" require active_agent_count > 0 -view_mode = "sim_state" +view_mode = "bev" ; Whether to draw agent trajectory traces draw_traces = True ; Maximum number of frames to render per map diff --git a/pufferlib/ocean/drive/drive.c b/pufferlib/ocean/drive/drive.c index aaae593bd..d1a408b5a 100644 --- a/pufferlib/ocean/drive/drive.c +++ b/pufferlib/ocean/drive/drive.c @@ -300,8 +300,6 @@ int main(int argc, char *argv[]) { view_mode = VIEW_MODE_BEV_AGENT_OBS; } else if (strcmp(v, "persp") == 0) { view_mode = VIEW_MODE_AGENT_PERSP; - } else if (strcmp(v, "zoom_out") == 0 || strcmp(v, "full_map") == 0) { - view_mode = VIEW_MODE_SIM_STATE_ZOOMED_OUT; } else { fprintf(stderr, "Error: --view must be 'sim_state', 'bev', 'persp', or 'zoom_out'\n"); return 1; diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 33b8719ca..8181e60e9 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -21,9 +21,6 @@ #define VIEW_MODE_SIM_STATE 0 #define VIEW_MODE_BEV_AGENT_OBS 1 #define VIEW_MODE_AGENT_PERSP 2 -#define VIEW_MODE_SIM_STATE_ZOOMED_OUT \ - 3 // Full map, camera centered on map bbox (matches old visualize.c zoom_in=false) - // // Order of entities in rendering (lower is rendered first) // #define Z_ROAD_SURFACE 0.0f // #define Z_ROAD_MARKINGS 0.05f // Lane lines, road lines, traces @@ -4086,20 +4083,20 @@ void draw_scene(Drive *env, Client *client, int mode, int obs_only, int lasers, // Draw the agent bounding boxes Color agent_color = GRAY; if (is_expert) { - if (agent[i].type == PEDESTRIAN || agent[i].type == CYCLIST) + if (agent->type == PEDESTRIAN || agent->type == CYCLIST) agent_color = EXPERT_REPLAY_SMALL; else agent_color = EXPERT_REPLAY; } if (is_active_agent) { - if (agent[i].type == PEDESTRIAN) + if (agent->type == PEDESTRIAN) agent_color = LIGHT_ORANGE; - else if (agent[i].type == CYCLIST) + else if (agent->type == CYCLIST) agent_color = LIGHT_PURPLE; else agent_color = BLUE; } - if (is_active_agent && agent[i].collision_state > 0) + if (is_active_agent && agent->collision_state > 0) agent_color = RED; rlPushMatrix(); @@ -4356,45 +4353,6 @@ void c_render(Drive *env, int view_mode, int draw_traces) { draw_scene(env, client, 1, 0, 0, 0); - } else if (view_mode == VIEW_MODE_SIM_STATE_ZOOMED_OUT) { - // Orthographic bird's-eye view showing the full map, camera centered on the - // map bounding box — mirrors the old visualize.c renderTopDownView zoom_in=false path. - camera.position = (Vector3){env->grid_map->top_left_x, env->grid_map->bottom_right_y, 500.0f}; - camera.target = (Vector3){env->grid_map->top_left_x, env->grid_map->bottom_right_y, 0.0f}; - camera.up = (Vector3){0.0f, -1.0f, 0.0f}; - camera.projection = CAMERA_ORTHOGRAPHIC; - camera.fovy = map_height * 2.0f; // 2× height shows the full map - - BeginDrawing(); - ClearBackground(ROAD_COLOR); - BeginMode3D(camera); - - if (draw_traces) { - for (int i = 0; i < env->active_agent_count; i++) { - int idx = env->active_agent_indices[i]; - for (int t = env->init_steps; t < env->episode_length; t++) { - Color agent_color = LIGHTBLUE; - if (env->agents[idx].type == PEDESTRIAN) - agent_color = LIGHT_ORANGE; - else if (env->agents[idx].type == CYCLIST) - agent_color = LIGHT_PURPLE; - DrawSphere((Vector3){env->agents[idx].log_trajectory_x[t], env->agents[idx].log_trajectory_y[t], - env->agents[idx].log_trajectory_z[t]}, - 0.15f, agent_color); - } - } - for (int i = 0; i < env->expert_static_agent_count; i++) { - int idx = env->expert_static_agent_indices[i]; - for (int t = env->init_steps; t < env->episode_length; t++) { - DrawSphere((Vector3){env->agents[idx].log_trajectory_x[t], env->agents[idx].log_trajectory_y[t], - env->agents[idx].log_trajectory_z[t]}, - 0.15f, EXPERT_REPLAY); - } - } - } - - draw_scene(env, client, 1, 0, 0, 0); - } else if (view_mode == VIEW_MODE_BEV_AGENT_OBS) { // Orthographic bird's-eye view centered on the selected agent, // showing only that agent's observations diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py index 94d2b8b6b..e650b4e38 100644 --- a/pufferlib/ocean/drive/drive.py +++ b/pufferlib/ocean/drive/drive.py @@ -14,7 +14,6 @@ class RenderView(IntEnum): FULL_SIM_STATE = 0 # Orthographic top-down, fully observable simulator state (zoomed in, origin-centered) BEV_AGENT_OBS = 1 # Orthographic top-down, only show what the selected agent can observe AGENT_PERSP = 2 # Third-person perspective following selected agent - FULL_SIM_STATE_ZOOMED_OUT = 3 # Orthographic top-down, full map bbox view (mirrors old visualize.c zoom_in=false) class Drive(pufferlib.PufferEnv): diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 3d3d3bb52..cce8bf5ad 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -1621,8 +1621,6 @@ def render(env_name, args=None): "bev": RenderView.BEV_AGENT_OBS, "agent": RenderView.BEV_AGENT_OBS, # backward compat "persp": RenderView.AGENT_PERSP, - "zoom_out": RenderView.FULL_SIM_STATE_ZOOMED_OUT, - "full_map": RenderView.FULL_SIM_STATE_ZOOMED_OUT, # backward compat alias } view_mode = _VIEW_MODE_MAP.get(view_mode_str) if view_mode is None: @@ -1693,6 +1691,12 @@ def render_one_map(map_path): lstm_c=torch.zeros(n, policy.hidden_size, device=device), ) + # Remove any stale mp4 with this map's name from a previous run so that + # the set-difference detection below doesn't miss the newly written file. + stale = os.path.join(os.getcwd(), f"{map_name}.mp4") + if os.path.exists(stale): + os.remove(stale) + # Snapshot which mp4s exist before this rollout before = set(glob.glob(os.path.join(os.getcwd(), "*.mp4"))) From 07e64b8a763a8aff2e0b4ccd43bafb250ccb7907 Mon Sep 17 00:00:00 2001 From: Aditya Gupta Date: Wed, 25 Mar 2026 16:56:16 -0400 Subject: [PATCH 05/23] Updating over latest 2.0 first --- pufferlib/config/ocean/drive.ini | 2 + pufferlib/ocean/benchmark/evaluator.py | 82 +++++++++++++++++++++----- pufferlib/ocean/drive/binding.c | 2 + pufferlib/ocean/drive/drive.h | 54 ++++++++++++++--- pufferlib/ocean/drive/drive.py | 36 +++++++---- pufferlib/ocean/env_binding.h | 37 ++++++++++++ pufferlib/pufferl.py | 2 + 7 files changed, 183 insertions(+), 32 deletions(-) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index 11f9cd793..7e7113f10 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -188,6 +188,8 @@ num_eval_agents = 64 self_play_eval = True ; If True, enable human replay evaluation (pair policy-controlled agent with human replays) human_replay_eval = False +; Which env to render during eval. Options: "first" (by index), "worst_collision", "random" +render_select_mode = "first" ; If True, render random scenarios. Note: Doing this frequency will slow down the training. render_human_replay_eval = False render_self_play_eval = True diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py index 9f821cf11..29c4bfe6b 100644 --- a/pufferlib/ocean/benchmark/evaluator.py +++ b/pufferlib/ocean/benchmark/evaluator.py @@ -830,6 +830,11 @@ class Evaluator: """ + RENDER_FIRST = "first" + RENDER_RANDOM = "random" + RENDER_WORST_SCORE = "worst_score" + RENDER_WORST_COLLISION = "worst_collision" + def __init__(self, configs, logger=None): self.configs = configs self.logger = logger @@ -861,13 +866,65 @@ def _unpack_eval_configs(self, configs): self.sp_eval_config = copy.deepcopy(eval_config) self.sp_eval_config["env"]["control_mode"] = "control_agents" self.sp_eval_config["env"]["render_mode"] = 1 if self.render_sp_rollout else 0 + self.render_select_mode = self.configs["eval"]["render_select_mode"] + + def select_render_env(self, env_logs): + """Select which environment to render based on per-env rollout statistics. + Args: + env_logs: List of dicts, one per environment. Each dict contains + aggregated agent statistics (score, collision_rate, offroad_rate, etc.) + with 'n' being the number of controlled agents in that env. + Empty dicts indicate no data was collected for that env. + + Returns: + int: Index of the environment to render. + """ + mode = self.render_select_mode + if mode == self.RENDER_FIRST: + return 0 + if mode == self.RENDER_RANDOM: + return np.random.randint(len(env_logs)) + + populated = [(i, log) for i, log in enumerate(env_logs) if log] - def rollout(self, policy, mode="self_play", render_rollout=False): + if not populated: + return 0 + + if mode == self.RENDER_WORST_SCORE: + return min(populated, key=lambda x: x[1].get("score", 1.0))[0] + elif mode == self.RENDER_WORST_COLLISION: + return max(populated, key=lambda x: x[1].get("collision_rate", 0.0))[0] + # Add other modes based on desiderata here + return 0 + + def rollout(self, policy, mode="self_play"): """Roll out the given policy in the specified eval env and collect statistics.""" env = self.hr_env if mode == "human_replay" else self.sp_env render_eval = self.render_sp_rollout if mode == "self_play" else self.render_hr_rollout driver = env.driver_env + + needs_stats_first = render_eval and self.render_select_mode not in (self.RENDER_FIRST, self.RENDER_RANDOM) + + if needs_stats_first: + env_logs = self._run_rollout(policy, env, per_env_logs=True) + render_env_idx = self.select_render_env(env_logs) + else: + render_env_idx = self.select_render_env([{}] * driver.num_envs) + + info_list = self._run_rollout(policy, env, render_env_idx if render_eval else None) + + final_info = info_list[0] if info_list else {} + if mode == "self_play": + self.self_play_stats = final_info + self.self_play_stats["render_env_idx"] = render_env_idx + elif mode == "human_replay": + self.human_replay_stats = final_info + self.human_replay_stats["render_env_idx"] = render_env_idx + + def _run_rollout(self, policy, env, render_env_idx=None, per_env_logs=False): + """Run a single rollout. If render_env_idx is not None, render that env.""" + driver = env.driver_env num_agents = env.observation_space.shape[0] device = self.configs["train"]["device"] @@ -882,10 +939,10 @@ def rollout(self, policy, mode="self_play", render_rollout=False): lstm_c=torch.zeros(num_agents, policy.hidden_size, device=device), ) - # Rollout + info_list = [] for time_idx in range(self.sim_steps): - if render_rollout or render_eval: - driver.render() + if render_env_idx is not None: + driver.render(env_id=render_env_idx) # Get action from policy with torch.no_grad(): @@ -899,18 +956,12 @@ def rollout(self, policy, mode="self_play", render_rollout=False): action_np = np.clip(action_np, env.action_space.low, env.action_space.high) # Step environment - obs, rewards, dones, truncs, info_list = env.step(action_np) + obs, rewards, dones, truncs, info_list = env.step(action_np, per_env_logs=per_env_logs) if truncs.all(): break - # Aggregate final statistics - final_info = info_list[0] if info_list else {} - - if mode == "self_play": - self.self_play_stats = final_info - elif mode == "human_replay": - self.human_replay_stats = final_info + return info_list def log_videos(self, eval_mode, epoch): """Log all mp4s in local path to wandb after env close has flushed ffmpeg pipes.""" @@ -930,12 +981,13 @@ def log_videos(self, eval_mode, epoch): print("Warning: no render videos found in local path") return + render_mode = self.render_select_mode for p in video_files: scenario_id = os.path.splitext(os.path.basename(p))[0] - self.logger.wandb.log( - {f"render/{eval_mode}": wandb.Video(p, format="mp4", caption=f"scene_{scenario_id}_epoch_{epoch}")} - ) + caption = f"scene_{scenario_id}_epoch_{epoch}_select_{render_mode}" + self.logger.wandb.log({f"render/{eval_mode}": wandb.Video(p, format="mp4", caption=caption)}) + # Clean up for p in video_files: os.remove(p) diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c index 8de367de4..1db5ee0e9 100644 --- a/pufferlib/ocean/drive/binding.c +++ b/pufferlib/ocean/drive/binding.c @@ -264,6 +264,7 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { free(env->active_agent_indices); free(env->static_agent_indices); free(env->expert_static_agent_indices); + free(env->tracks_to_predict_indices); free(env); Py_DECREF(agent_offsets); Py_DECREF(map_ids); @@ -285,6 +286,7 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { free(env->active_agent_indices); free(env->static_agent_indices); free(env->expert_static_agent_indices); + free(env->tracks_to_predict_indices); free(env); continue; } diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 8181e60e9..c6ecb8510 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -1,3 +1,4 @@ +#include #include #include #include @@ -2341,6 +2342,8 @@ void c_close(Drive *env) { free(env->expert_static_agent_indices); free(env->tracks_to_predict_indices); free(env->ini_file); + free(env->tracks_to_predict_indices); + env->tracks_to_predict_indices = NULL; } void allocate(Drive *env) { @@ -3539,20 +3542,49 @@ struct Client { Vector3 default_camera_target; int recorder_pipefd[2]; pid_t recorder_pid; + pid_t xvfb_pid; + int xvfb_display_num; }; Client *make_client(Drive *env) { Client *client = (Client *)calloc(1, sizeof(Client)); if (env->render_mode == RENDER_HEADLESS && getenv("DISPLAY") == NULL) { - // Start Xvfb and set DISPLAY - pid_t xvfb_pid = fork(); - if (xvfb_pid == 0) { - execlp("Xvfb", "Xvfb", ":99", "-screen", "0", "1x1x24", NULL); + + // Kill any existing Xvfb first + system("pkill -9 Xvfb"); + usleep(200000); + unlink("/tmp/.X99-lock"); + unlink("/tmp/.X11-unix/X99"); + + // Hardcode to single display because we only run this in one process at once + client->xvfb_display_num = 99; + + // Clean up stale lock if process is dead + FILE *f = fopen("/tmp/.X99-lock", "r"); + if (f) { + pid_t pid = -1; + fscanf(f, "%d", &pid); + fclose(f); + if (pid > 0 && kill(pid, 0) != 0) + unlink("/tmp/.X99-lock"); + } + + client->xvfb_pid = fork(); + if (client->xvfb_pid == 0) { + close(STDOUT_FILENO); + close(STDERR_FILENO); + execlp("Xvfb", "Xvfb", ":99", "-screen", "0", "1280x720x24", "+extension", "GLX", "-ac", "-noreset", NULL); _exit(1); } + setenv("DISPLAY", ":99", 1); - usleep(500000); // Give Xvfb 500ms to start + // Xvfb starts asynchronously after fork(), so we poll until it creates its + // lock file (max 2s) then wait an extra 200ms for GLX to finish initializing. + // Without this, raylib's InitWindow() would try to connect before Xvfb is ready. + for (int i = 0; i < 20 && access("/tmp/.X99-lock", F_OK) != 0; i++) + usleep(100000); + usleep(200000); } if (env->render_mode == RENDER_WINDOW) { @@ -4552,10 +4584,18 @@ void close_client(Client *client) { close(client->recorder_pipefd[1]); waitpid(client->recorder_pid, NULL, 0); } - for (int i = 0; i < 6; i++) { + for (int i = 0; i < 6; i++) UnloadModel(client->cars[i]); - } + UnloadModel(client->cyclist); + UnloadModel(client->pedestrian); CloseWindow(); + if (client->xvfb_pid > 0) { + kill(client->xvfb_pid, SIGTERM); + waitpid(client->xvfb_pid, NULL, 0); + unlink("/tmp/.X99-lock"); + unsetenv("DISPLAY"); + } + free(client); } diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py index e650b4e38..c464fddb6 100644 --- a/pufferlib/ocean/drive/drive.py +++ b/pufferlib/ocean/drive/drive.py @@ -352,7 +352,7 @@ def __init__( self.map_ids = map_ids self.num_envs = num_envs super().__init__(buf=buf) - env_ids = [] + self.env_ids = [] for i in range(num_envs): cur = agent_offsets[i] nxt = agent_offsets[i + 1] @@ -439,9 +439,9 @@ def __init__( spawn_height=self.spawn_height, render_mode=render_mode, ) - env_ids.append(env_id) + self.env_ids.append(env_id) - self.c_envs = binding.vectorize(*env_ids) + self.c_envs = binding.vectorize(*self.env_ids) def reset(self, seed=0): binding.vec_reset(self.c_envs, seed) @@ -516,7 +516,7 @@ def resample_maps(self): self.agent_offsets = agent_offsets self.map_ids = map_ids self.num_envs = num_envs - env_ids = [] + self.env_ids = [] seed = np.random.randint(0, 2**32 - 1) for i in range(num_envs): cur = agent_offsets[i] @@ -603,22 +603,29 @@ def resample_maps(self): spawn_height=self.spawn_height, render_mode=self.render_mode, ) - env_ids.append(env_id) - self.c_envs = binding.vectorize(*env_ids) + self.env_ids.append(env_id) + self.c_envs = binding.vectorize(*self.env_ids) + + self.c_envs = binding.vectorize(*self.env_ids) binding.vec_reset(self.c_envs, seed) self.truncations[:] = 1 - def step(self, actions): + def step(self, actions, per_env_logs=False): self.terminals[:] = 0 self.actions[:] = actions binding.vec_step(self.c_envs) self.tick += 1 info = [] if self.tick % self.report_interval == 0: - log = binding.vec_log(self.c_envs, self.num_agents) - if log: - info.append(log) + if per_env_logs: # Get the stats for every separate env + logs = self.get_env_logs() + if any(logs): + info = logs + else: # Default: Aggregate across vectorized envs + log = binding.vec_log(self.c_envs, self.num_agents) + if log: + info.append(log) if self.tick > 0 and self.resample_frequency > 0 and self.tick % self.resample_frequency == 0: self.resample_maps() @@ -751,6 +758,15 @@ def render(self, view_mode: RenderView = RenderView.FULL_SIM_STATE, draw_traces: def close(self): binding.vec_close(self.c_envs) + def env_log(self, env_idx): + """Get log statistics for a single environment.""" + num_agents = self.agent_offsets[env_idx + 1] - self.agent_offsets[env_idx] + return binding.env_log(self.env_ids[env_idx], num_agents) + + def get_env_logs(self): + """Get log statistics for all environments (unaggregated).""" + return [self.env_log(i) for i in range(self.num_envs)] + @property def scenario_ids(self) -> list[str]: """Return scenario ID string for each env, stripping null padding.""" diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h index a8db5d955..e6bc576c2 100644 --- a/pufferlib/ocean/env_binding.h +++ b/pufferlib/ocean/env_binding.h @@ -660,6 +660,43 @@ static PyObject *vec_log(PyObject *self, PyObject *args) { return dict; } +static PyObject *env_log(PyObject *self, PyObject *args) { + int num_args = PyTuple_Size(args); + if (num_args != 2) { + PyErr_SetString(PyExc_TypeError, "env_log requires 2 arguments"); + return NULL; + } + + Env *env = unpack_env(args); + if (!env) { + return NULL; + } + + // Aggregate this env's per-agent logs (same as vec_log but for one env) + // Note: breaks horribly if you don't use floats + Log aggregate = {0}; + int num_keys = sizeof(Log) / sizeof(float); + for (int j = 0; j < num_keys; j++) { + ((float *)&aggregate)[j] += ((float *)&env->log)[j]; + } + + PyObject *dict = PyDict_New(); + if (aggregate.n == 0.0f) { + return dict; + } + + // Average across agents in env + float n = aggregate.n; + for (int i = 0; i < num_keys; i++) { + ((float *)&aggregate)[i] /= n; + } + aggregate.n = (float)env->active_agent_count; + + my_log(dict, &aggregate); + + return dict; +} + static PyObject *vec_close(PyObject *self, PyObject *args) { VecEnv *vec = unpack_vecenv(args); if (!vec) { diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index cce8bf5ad..f9fa08e83 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -530,6 +530,8 @@ def train(self): if human_replay_eval or self_play_eval or safe_eval: self.evaluator.log_stats() + del self.evaluator + if self.config["eval"]["wosac_realism_eval"]: pufferlib.utils.run_wosac_eval_in_subprocess(self.config, self.logger, self.global_step) From 7be0267456b569ed3826dbb37cb714617094cf3d Mon Sep 17 00:00:00 2001 From: Aditya Gupta Date: Thu, 26 Mar 2026 13:49:55 -0400 Subject: [PATCH 06/23] Rebased over latest 3.0 with some minor additional changes --- pufferlib/config/ocean/drive.ini | 5 +- pufferlib/ocean/benchmark/evaluator.py | 205 ++++++------------------- pufferlib/pufferl.py | 79 +--------- 3 files changed, 54 insertions(+), 235 deletions(-) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index 7e7113f10..b85f953ba 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -188,11 +188,14 @@ num_eval_agents = 64 self_play_eval = True ; If True, enable human replay evaluation (pair policy-controlled agent with human replays) human_replay_eval = False +; If True, enable safe eval (policy runs with fixed safe reward conditioning) +safe_eval = False ; Which env to render during eval. Options: "first" (by index), "worst_collision", "random" render_select_mode = "first" ; If True, render random scenarios. Note: Doing this frequency will slow down the training. render_human_replay_eval = False render_self_play_eval = True +render_safe_eval = False ; Number of scenarios to process per batch wosac_batch_size = 32 @@ -223,8 +226,6 @@ wosac_sanity_check = False wosac_aggregate_results = True ; Evaluation mode: "policy", "ground_truth" wosac_eval_mode = "policy" -; If True, enable human replay evaluation (pair policy-controlled agent with human replays) -human_replay_eval = False ; Control only the self-driving car human_replay_control_mode = "control_sdc_only" ; Number of scenarios for human replay evaluation equals the number of agents diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py index 29c4bfe6b..2971d95b6 100644 --- a/pufferlib/ocean/benchmark/evaluator.py +++ b/pufferlib/ocean/benchmark/evaluator.py @@ -820,14 +820,12 @@ def rollout(self, args, puffer_env, policy): class Evaluator: - """Evaluates policies in self_play or human_replay mode, with optional rendering. + """Evaluates policies in self_play, human_replay, or safe_eval mode, with optional rendering. Initializes the eval envs needed based on eval config flags: - - human_replay_eval: creates sp_env + hr_env - - render_eval: creates sp_env (if not already created) - - - + - human_replay_eval: creates hr_env (control_sdc_only) + - self_play_eval: creates sp_env (control_agents) + - safe_eval: creates safe_env (control_agents + fixed reward conditioning) """ RENDER_FIRST = "first" @@ -841,8 +839,10 @@ def __init__(self, configs, logger=None): self.sim_steps = 90 self.self_play_stats = None self.human_replay_stats = None + self.safe_eval_stats = None self.sp_env = None self.hr_env = None + self.safe_env = None self._unpack_eval_configs(configs) @@ -858,6 +858,7 @@ def _unpack_eval_configs(self, configs): self.render_sp_rollout = self.configs["eval"]["render_self_play_eval"] self.render_hr_rollout = self.configs["eval"]["render_human_replay_eval"] + self.render_safe_rollout = self.configs["eval"].get("render_safe_eval", False) self.hr_eval_config = copy.deepcopy(eval_config) self.hr_eval_config["env"]["control_mode"] = "control_sdc_only" @@ -866,6 +867,33 @@ def _unpack_eval_configs(self, configs): self.sp_eval_config = copy.deepcopy(eval_config) self.sp_eval_config["env"]["control_mode"] = "control_agents" self.sp_eval_config["env"]["render_mode"] = 1 if self.render_sp_rollout else 0 + + # safe_eval: control_agents like self_play, but with fixed reward conditioning + # from [safe_eval] config section (map_dir, num_agents, reward bounds, etc.) + safe_cfg = configs.get("safe_eval", {}) + self.safe_eval_config = copy.deepcopy(eval_config) + self.safe_eval_config["env"]["control_mode"] = "control_agents" + self.safe_eval_config["env"]["render_mode"] = 1 if self.render_safe_rollout else 0 + self.safe_eval_config["env"]["reward_randomization"] = 1 + self.safe_eval_config["env"]["reward_conditioning"] = 1 + self.safe_eval_config["env"]["resample_frequency"] = 0 + # Override map/episode settings from [safe_eval] section + for key in ("map_dir", "num_maps", "min_goal_distance", "max_goal_distance", "num_agents", "episode_length"): + if key in safe_cfg: + self.safe_eval_config["env"][key] = safe_cfg[key] + # Pin reward bounds: set min=max for each conditioning value + import re + + valid_bounds = { + re.match(r"reward_bound_(.+)_min$", k).group(1) + for k in eval_config["env"] + if re.match(r"reward_bound_(.+)_min$", k) + } + for key, val in safe_cfg.items(): + if key in valid_bounds: + self.safe_eval_config["env"][f"reward_bound_{key}_min"] = float(val) + self.safe_eval_config["env"][f"reward_bound_{key}_max"] = float(val) + self.render_select_mode = self.configs["eval"]["render_select_mode"] def select_render_env(self, env_logs): @@ -900,8 +928,15 @@ def select_render_env(self, env_logs): def rollout(self, policy, mode="self_play"): """Roll out the given policy in the specified eval env and collect statistics.""" - env = self.hr_env if mode == "human_replay" else self.sp_env - render_eval = self.render_sp_rollout if mode == "self_play" else self.render_hr_rollout + if mode == "human_replay": + env = self.hr_env + render_eval = self.render_hr_rollout + elif mode == "safe_eval": + env = self.safe_env + render_eval = self.render_safe_rollout + else: # self_play + env = self.sp_env + render_eval = self.render_sp_rollout driver = env.driver_env needs_stats_first = render_eval and self.render_select_mode not in (self.RENDER_FIRST, self.RENDER_RANDOM) @@ -921,6 +956,9 @@ def rollout(self, policy, mode="self_play"): elif mode == "human_replay": self.human_replay_stats = final_info self.human_replay_stats["render_env_idx"] = render_env_idx + elif mode == "safe_eval": + self.safe_eval_stats = final_info + self.safe_eval_stats["render_env_idx"] = render_env_idx def _run_rollout(self, policy, env, render_env_idx=None, per_env_logs=False): """Run a single rollout. If render_env_idx is not None, render that env.""" @@ -1004,156 +1042,11 @@ def log_stats(self): eval_stats["eval/sp_collision_rate"] = self.self_play_stats["collision_rate"] eval_stats["eval/sp_score"] = self.self_play_stats["score"] eval_stats["eval/num_agents"] = self.self_play_stats["n"] + if self.safe_eval_stats is not None: + eval_stats["eval/safe_collision_rate"] = self.safe_eval_stats["collision_rate"] + eval_stats["eval/safe_score"] = self.safe_eval_stats["score"] if not eval_stats: return self.logger.wandb.log(eval_stats) - - -class SafeEvaluator: - """Evaluates policies with fixed safe/law-abiding reward conditioning. - - Runs the policy with deterministic reward bounds (reward_randomization=1, - min=max for each bound) so the conditioning observation vector is fixed to - safe driving values. Collects metrics over multiple episodes. - - Re-parses the INI config internally so it doesn't need the full training - args dict — only the env_name, safe_eval config, and device. - """ - - def __init__(self, env_name: str, safe_eval_config: Dict, device="cuda", logger=None): - self.env_name = env_name - self.logger = logger - self.safe_eval_config = safe_eval_config - self.num_episodes = safe_eval_config.get("num_episodes", 100) - self.num_agents = safe_eval_config.get("num_agents", 64) - self.episode_length = safe_eval_config.get("episode_length", 1000) - if isinstance(device, int): - device = f"cuda:{device}" - self.device = device - self.stats = None - - def _build_eval_env_config(self): - """Build env config with safe reward conditioning values applied. - - Re-parses the INI file to get a fresh full config, then applies - safe_eval overrides for env, vec, and reward bounds. - """ - import re - from pufferlib.pufferl import load_config - - # Re-parse INI to get full config (env, vec, policy, rnn, etc.) - import sys - - original_argv = sys.argv - sys.argv = ["pufferl"] - try: - eval_config = load_config(self.env_name) - finally: - sys.argv = original_argv - - eval_config["vec"] = dict(backend="PufferEnv", num_envs=1) - eval_config["train"]["device"] = self.device - eval_config["env"]["num_agents"] = self.num_agents - eval_config["env"]["episode_length"] = self.episode_length - eval_config["env"]["resample_frequency"] = 0 - eval_config["env"]["reward_randomization"] = 1 - eval_config["env"]["reward_conditioning"] = 1 - - # Apply safe_eval overrides for map_dir, goal distances, etc. - for override_key in ("map_dir", "num_maps", "min_goal_distance", "max_goal_distance"): - if override_key in self.safe_eval_config: - eval_config["env"][override_key] = self.safe_eval_config[override_key] - - # Discover valid reward bound names from env config - valid_bounds = set() - for key in eval_config["env"]: - m = re.match(r"reward_bound_(.+)_min$", key) - if m: - valid_bounds.add(m.group(1)) - - # Set min=max for each reward bound to fix the conditioning values - for key, val in self.safe_eval_config.items(): - if key not in valid_bounds: - continue - eval_config["env"][f"reward_bound_{key}_min"] = float(val) - eval_config["env"][f"reward_bound_{key}_max"] = float(val) - - return eval_config - - def evaluate(self, vecenv, policy): - """Run evaluation with safe reward conditioning and collect metrics. - - Args: - vecenv: Vectorized environment (created with safe eval config) - policy: Trained policy to evaluate - - Returns: - dict: Averaged metrics over collected episodes - """ - from collections import defaultdict - - policy.eval() - num_agents = vecenv.observation_space.shape[0] - use_rnn = hasattr(policy, "hidden_size") - - ob, _ = vecenv.reset() - state = {} - dones = torch.zeros(num_agents, device=self.device) - prev_rewards = torch.zeros(num_agents, device=self.device) - if use_rnn: - state = dict( - lstm_h=torch.zeros(num_agents, policy.hidden_size, device=self.device), - lstm_c=torch.zeros(num_agents, policy.hidden_size, device=self.device), - ) - - all_stats = defaultdict(list) - episodes_collected = 0 - max_steps = (self.num_episodes // max(num_agents, 1) + 2) * self.episode_length - - for step in range(max_steps): - if episodes_collected >= self.num_episodes: - break - - with torch.no_grad(): - ob_t = torch.as_tensor(ob).to(self.device) - if use_rnn: - state["reward"] = prev_rewards - state["done"] = dones - logits, value = policy.forward_eval(ob_t, state) - action, logprob, _ = pufferlib.pytorch.sample_logits(logits) - action = action.cpu().numpy().reshape(vecenv.action_space.shape) - - if isinstance(logits, torch.distributions.Normal): - action = np.clip(action, vecenv.action_space.low, vecenv.action_space.high) - - ob, rewards, terminals, truncations, infos = vecenv.step(action) - prev_rewards = torch.as_tensor(rewards).float().to(self.device) - dones = torch.as_tensor(np.maximum(terminals, truncations)).float().to(self.device) - - for entry in infos: - if isinstance(entry, dict): - episodes_collected += int(entry.get("n", 1)) - for k, v in entry.items(): - try: - float(v) - all_stats[k].append(v) - except (TypeError, ValueError): - pass - - self.stats = {k: float(np.mean(v)) for k, v in all_stats.items() if len(v) > 0} - return self.stats - - def log_stats(self, global_step=None): - """Log collected metrics to wandb.""" - if self.stats is None: - return - - if not (self.logger and hasattr(self.logger, "wandb") and self.logger.wandb): - return - - payload = {f"eval/safe_{k}": v for k, v in self.stats.items()} - if global_step is not None: - payload["train_step"] = global_step - self.logger.wandb.log(payload) diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index f9fa08e83..1c76a221d 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -509,7 +509,7 @@ def train(self): if self.epoch % self.config["eval"]["eval_interval"] == 0 or done_training: human_replay_eval = self.config["eval"]["human_replay_eval"] self_play_eval = self.config["eval"]["self_play_eval"] - safe_eval = self.config["eval"]["safe_eval"] + safe_eval = self.config.get("safe_eval", {}).get("enabled", False) self.evaluator = Evaluator(self.full_args, self.logger) if human_replay_eval: @@ -525,7 +525,7 @@ def train(self): if safe_eval: self.evaluator.safe_env = load_env("puffer_drive", self.evaluator.safe_eval_config) self.evaluator.rollout(self.uncompiled_policy, mode="safe_eval") - self.evaluator.sp_env.close() + self.evaluator.safe_env.close() self.evaluator.log_videos(eval_mode="safe_eval", epoch=self.epoch) if human_replay_eval or self_play_eval or safe_eval: self.evaluator.log_stats() @@ -540,81 +540,6 @@ def train(self): ): pufferlib.utils.run_human_replay_eval_in_subprocess(self.config, self.logger, self.global_step) - def _run_safe_eval(self): - """Run safe eval in-process using SafeEvaluator, then render videos.""" - import copy - import traceback - - vecenv = None - bin_path = None - safe_ini_path = None - render_handed_off = False - try: - from pufferlib.ocean.benchmark.evaluator import SafeEvaluator - - self.msg = "Running safe eval..." - env_name = self.config["env"] - safe_eval_config = self.config.get("safe_eval", {}) - evaluator = SafeEvaluator(env_name, safe_eval_config, device=self.config["device"], logger=self.logger) - eval_config = evaluator._build_eval_env_config() - - vecenv = load_env(env_name, eval_config) - policy = load_policy(eval_config, vecenv, env_name) - - # Copy weights from in-memory policy (no checkpoint dependency) - state_dict = copy.deepcopy(self.uncompiled_policy.state_dict()) - # Strip DDP "module." prefix if present - state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()} - policy.load_state_dict(state_dict) - - metrics = evaluator.evaluate(vecenv, policy) - evaluator.log_stats(global_step=self.global_step) - - self.msg = f"Safe eval: {len(metrics)} metrics logged" - - self.msg = "Spawning safe eval render..." - model_dir = os.path.join(self.config["data_dir"], f"{self.config['env']}_{self.logger.run_id}") - bin_path = f"{model_dir}_safe_eval_epoch_{self.epoch:06d}.bin" - - export( - args={"env_name": env_name, "load_model_path": "unused", **self.config}, - env_name=env_name, - vecenv=self.vecenv, - policy=self.uncompiled_policy, - path=bin_path, - silent=True, - ) - - safe_ini_path = pufferlib.utils.generate_safe_eval_ini(safe_eval_config) - - self._render_videos( - bin_path=bin_path, - num_maps=safe_eval_config.get("num_maps"), - map_dir=safe_eval_config.get("map_dir"), - wandb_prefix="eval", - config_path=safe_ini_path, - cleanup_files=[bin_path, safe_ini_path], - ) - render_handed_off = True - self.msg = f"Safe eval complete: {len(metrics)} metrics logged" - - except Exception as e: - self.msg = f"Safe eval failed: {e}" - traceback.print_exc(file=sys.stderr) - finally: - if vecenv is not None: - try: - vecenv.close() - except Exception: - pass - if not render_handed_off: - for f in [bin_path, safe_ini_path]: - if f and os.path.exists(f): - try: - os.remove(f) - except OSError: - pass - def mean_and_log(self): config = self.config for k in list(self.stats.keys()): From 615fcde4f11937c3e48b715d76ce237007cb2f7c Mon Sep 17 00:00:00 2001 From: Aditya Gupta Date: Thu, 26 Mar 2026 14:14:49 -0400 Subject: [PATCH 07/23] Adding some render options --- pufferlib/config/ocean/drive.ini | 1 + pufferlib/ocean/benchmark/evaluator.py | 25 ++-- pufferlib/ocean/drive/drive.h | 3 +- pufferlib/pufferl.py | 185 +++++++++++++------------ 4 files changed, 115 insertions(+), 99 deletions(-) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index b85f953ba..09456de2d 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -281,6 +281,7 @@ output_dir = "resources/drive/render_videos" ; Number of maps to render (capped at files available in map_dir) num_maps = 3 ; View mode: sim_state (top-down, origin-centered), zoom_out (top-down, full map bbox), bev (agent BEV obs), persp (third-person follow-cam) +; Multi-view: "both" = sim_state + persp, "all" = sim_state + persp + bev (runs a separate rollout per view) ; NOTE: "persp" and "bev" require active_agent_count > 0 view_mode = "bev" ; Whether to draw agent trajectory traces diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py index 2971d95b6..60414959e 100644 --- a/pufferlib/ocean/benchmark/evaluator.py +++ b/pufferlib/ocean/benchmark/evaluator.py @@ -877,8 +877,10 @@ def _unpack_eval_configs(self, configs): self.safe_eval_config["env"]["reward_randomization"] = 1 self.safe_eval_config["env"]["reward_conditioning"] = 1 self.safe_eval_config["env"]["resample_frequency"] = 0 + self.safe_eval_config["env"]["report_interval"] = 1 # ensure logs fire within 90-step rollout # Override map/episode settings from [safe_eval] section - for key in ("map_dir", "num_maps", "min_goal_distance", "max_goal_distance", "num_agents", "episode_length"): + # Note: episode_length is intentionally NOT overridden here — eval rollout is always 91 steps + for key in ("map_dir", "num_maps", "min_goal_distance", "max_goal_distance", "num_agents"): if key in safe_cfg: self.safe_eval_config["env"][key] = safe_cfg[key] # Pin reward bounds: set min=max for each conditioning value @@ -1036,15 +1038,22 @@ def log_stats(self): eval_stats = {} if self.human_replay_stats is not None: - eval_stats["eval/hr_collision_rate"] = self.human_replay_stats["collision_rate"] - eval_stats["eval/hr_score"] = self.human_replay_stats["score"] + if "collision_rate" in self.human_replay_stats: + eval_stats["eval/hr_collision_rate"] = self.human_replay_stats["collision_rate"] + if "score" in self.human_replay_stats: + eval_stats["eval/hr_score"] = self.human_replay_stats["score"] if self.self_play_stats is not None: - eval_stats["eval/sp_collision_rate"] = self.self_play_stats["collision_rate"] - eval_stats["eval/sp_score"] = self.self_play_stats["score"] - eval_stats["eval/num_agents"] = self.self_play_stats["n"] + if "collision_rate" in self.self_play_stats: + eval_stats["eval/sp_collision_rate"] = self.self_play_stats["collision_rate"] + if "score" in self.self_play_stats: + eval_stats["eval/sp_score"] = self.self_play_stats["score"] + if "n" in self.self_play_stats: + eval_stats["eval/num_agents"] = self.self_play_stats["n"] if self.safe_eval_stats is not None: - eval_stats["eval/safe_collision_rate"] = self.safe_eval_stats["collision_rate"] - eval_stats["eval/safe_score"] = self.safe_eval_stats["score"] + if "collision_rate" in self.safe_eval_stats: + eval_stats["eval/safe_collision_rate"] = self.safe_eval_stats["collision_rate"] + if "score" in self.safe_eval_stats: + eval_stats["eval/safe_score"] = self.safe_eval_stats["score"] if not eval_stats: return diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index c6ecb8510..5c9127727 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -2341,9 +2341,8 @@ void c_close(Drive *env) { free(env->static_agent_indices); free(env->expert_static_agent_indices); free(env->tracks_to_predict_indices); - free(env->ini_file); - free(env->tracks_to_predict_indices); env->tracks_to_predict_indices = NULL; + free(env->ini_file); } void allocate(Drive *env) { diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 1c76a221d..6709ea066 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -1541,17 +1541,22 @@ def render(env_name, args=None): except KeyError as e: raise pufferlib.APIUsageError(f"Missing render config: {e}") - # Map config string → RenderView enum + # Map config string → list of RenderView enums to render + # "both" = sim_state + persp, "all" = sim_state + persp + bev _VIEW_MODE_MAP = { - "sim_state": RenderView.FULL_SIM_STATE, - "topdown": RenderView.FULL_SIM_STATE, # backward compat - "bev": RenderView.BEV_AGENT_OBS, - "agent": RenderView.BEV_AGENT_OBS, # backward compat - "persp": RenderView.AGENT_PERSP, + "sim_state": [RenderView.FULL_SIM_STATE], + "topdown": [RenderView.FULL_SIM_STATE], # backward compat + "bev": [RenderView.BEV_AGENT_OBS], + "agent": [RenderView.BEV_AGENT_OBS], # backward compat + "persp": [RenderView.AGENT_PERSP], + "both": [RenderView.FULL_SIM_STATE, RenderView.AGENT_PERSP], + "all": [RenderView.FULL_SIM_STATE, RenderView.AGENT_PERSP, RenderView.BEV_AGENT_OBS], } - view_mode = _VIEW_MODE_MAP.get(view_mode_str) - if view_mode is None: - raise pufferlib.APIUsageError(f"Unknown view_mode '{view_mode_str}'. Choose from: sim_state, bev, persp") + view_modes = _VIEW_MODE_MAP.get(view_mode_str) + if view_modes is None: + raise pufferlib.APIUsageError( + f"Unknown view_mode '{view_mode_str}'. Choose from: sim_state, bev, persp, both, all" + ) bin_files = sorted(f for f in os.listdir(map_dir) if f.endswith(".bin")) if num_maps > len(bin_files): @@ -1569,90 +1574,92 @@ def render(env_name, args=None): device = configured_device def render_one_map(map_path): - """Render a single map file and move the resulting mp4(s) to output_dir. + """Render a single map file in one or more view modes, moving resulting mp4(s) to output_dir. - The map binary is symlinked into a private temp directory so the C - loader sees exactly one .bin file (num_maps=1). After env.close() - finalizes the ffmpeg pipe, we glob for new *.mp4 files in cwd and - move them — no fragile scenario_id filename prediction needed. + Each view mode runs a separate rollout so that each gets its own ffmpeg + pipe and output file. Output files are named {scenario_id}_{view}.mp4 + when multiple views are requested, or {scenario_id}.mp4 for a single view. """ map_name = os.path.splitext(os.path.basename(map_path))[0] - with tempfile.TemporaryDirectory() as tmp_map_dir: - # Hard-link (or copy) the single .bin into an isolated dir - tmp_bin = os.path.join(tmp_map_dir, os.path.basename(map_path)) - shutil.copy2(map_path, tmp_bin) - - env_overrides = { - **args["env"], - "num_maps": 1, - "map_dir": tmp_map_dir, - "render_mode": 1, # headless ffmpeg → writes {scenario_id}.mp4 in cwd - } - # Override init/control modes if [render] section specifies them. - # carla_2D logged-trajectory maps need create_all_valid + control_vehicles; - # the training default (init_variable_agent_number) spawns no agents on these maps. - if render_init_mode is not None: - env_overrides["init_mode"] = render_init_mode - if render_control_mode is not None: - env_overrides["control_mode"] = render_control_mode - - map_args = { - **args, - "env": env_overrides, - "vec": {"backend": "Serial", "num_envs": 1}, - } - - env = load_env(env_name, map_args) - policy = load_policy(map_args, env, env_name) - policy.eval() - - ob, _ = env.reset() - driver = env.driver_env - - state = {} - if map_args["train"]["use_rnn"]: - n = env.agents_per_batch if hasattr(env, "agents_per_batch") else ob.shape[0] - state = dict( - lstm_h=torch.zeros(n, policy.hidden_size, device=device), - lstm_c=torch.zeros(n, policy.hidden_size, device=device), - ) - - # Remove any stale mp4 with this map's name from a previous run so that - # the set-difference detection below doesn't miss the newly written file. - stale = os.path.join(os.getcwd(), f"{map_name}.mp4") - if os.path.exists(stale): - os.remove(stale) - - # Snapshot which mp4s exist before this rollout - before = set(glob.glob(os.path.join(os.getcwd(), "*.mp4"))) - - for _ in range(max_frames): - driver.render(view_mode=view_mode, draw_traces=draw_traces) - - with torch.no_grad(): - ob_t = torch.as_tensor(ob).to(device) - logits, _ = policy.forward_eval(ob_t, state) - action, _, _ = pufferlib.pytorch.sample_logits(logits) - action = action.cpu().numpy().reshape(env.action_space.shape) - - ob, _, done, truncated, _ = env.step(action) - if done.all() or truncated.all(): - break - - # env.close() finalizes the ffmpeg pipe and flushes the mp4 to disk - env.close() - - # Move any new mp4s that appeared since before the rollout - after = set(glob.glob(os.path.join(os.getcwd(), "*.mp4"))) - new_mp4s = after - before - if new_mp4s: - for src in sorted(new_mp4s): - dst = os.path.join(output_dir, os.path.basename(src)) - shutil.move(src, dst) - print(f" Saved {dst}") - else: - print(f" Warning: no mp4 produced for map {map_name}") + # View-mode suffix labels used in output filenames + _VIEW_SUFFIX = { + RenderView.FULL_SIM_STATE: "sim_state", + RenderView.AGENT_PERSP: "persp", + RenderView.BEV_AGENT_OBS: "bev", + } + multi = len(view_modes) > 1 + + for view_mode in view_modes: + with tempfile.TemporaryDirectory() as tmp_map_dir: + tmp_bin = os.path.join(tmp_map_dir, os.path.basename(map_path)) + shutil.copy2(map_path, tmp_bin) + + env_overrides = { + **args["env"], + "num_maps": 1, + "map_dir": tmp_map_dir, + "render_mode": 1, # headless ffmpeg → writes {scenario_id}.mp4 in cwd + } + if render_init_mode is not None: + env_overrides["init_mode"] = render_init_mode + if render_control_mode is not None: + env_overrides["control_mode"] = render_control_mode + + map_args = { + **args, + "env": env_overrides, + "vec": {"backend": "Serial", "num_envs": 1}, + } + + env = load_env(env_name, map_args) + policy = load_policy(map_args, env, env_name) + policy.eval() + + ob, _ = env.reset() + driver = env.driver_env + + state = {} + if map_args["train"]["use_rnn"]: + n = env.agents_per_batch if hasattr(env, "agents_per_batch") else ob.shape[0] + state = dict( + lstm_h=torch.zeros(n, policy.hidden_size, device=device), + lstm_c=torch.zeros(n, policy.hidden_size, device=device), + ) + + # Remove any stale mp4 from a previous run + stale = os.path.join(os.getcwd(), f"{map_name}.mp4") + if os.path.exists(stale): + os.remove(stale) + + before = set(glob.glob(os.path.join(os.getcwd(), "*.mp4"))) + + for _ in range(max_frames): + driver.render(view_mode=view_mode, draw_traces=draw_traces) + + with torch.no_grad(): + ob_t = torch.as_tensor(ob).to(device) + logits, _ = policy.forward_eval(ob_t, state) + action, _, _ = pufferlib.pytorch.sample_logits(logits) + action = action.cpu().numpy().reshape(env.action_space.shape) + + ob, _, done, truncated, _ = env.step(action) + if done.all() or truncated.all(): + break + + env.close() + + after = set(glob.glob(os.path.join(os.getcwd(), "*.mp4"))) + new_mp4s = after - before + if new_mp4s: + for src in sorted(new_mp4s): + base = os.path.splitext(os.path.basename(src))[0] + suffix = f"_{_VIEW_SUFFIX[view_mode]}" if multi else "" + dst = os.path.join(output_dir, f"{base}{suffix}.mp4") + shutil.move(src, dst) + print(f" Saved {dst}") + else: + print(f" Warning: no mp4 produced for map {map_name} view {_VIEW_SUFFIX[view_mode]}") if render_maps: print(f"Rendering {len(render_maps)} map(s) from {map_dir} → {output_dir} ...") From 54f2792910c44cb53cc238e17be7979237798d87 Mon Sep 17 00:00:00 2001 From: Aditya Gupta Date: Thu, 26 Mar 2026 14:25:08 -0400 Subject: [PATCH 08/23] Adding some render options --- pufferlib/config/ocean/drive.ini | 3 ++ pufferlib/ocean/benchmark/evaluator.py | 75 +++++++++++++++++++++++--- 2 files changed, 70 insertions(+), 8 deletions(-) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index 09456de2d..0d0f11064 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -192,6 +192,9 @@ human_replay_eval = False safe_eval = False ; Which env to render during eval. Options: "first" (by index), "worst_collision", "random" render_select_mode = "first" +; View mode(s) for eval rendering. Options: sim_state, bev, persp, both (sim_state+persp), all (sim_state+persp+bev) +; Multi-view options run a separate rollout per view, each producing its own wandb video under render/{mode}/{view} +render_view_mode = "sim_state" ; If True, render random scenarios. Note: Doing this frequency will slow down the training. render_human_replay_eval = False render_self_play_eval = True diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py index 60414959e..8ddff8a39 100644 --- a/pufferlib/ocean/benchmark/evaluator.py +++ b/pufferlib/ocean/benchmark/evaluator.py @@ -847,6 +847,23 @@ def __init__(self, configs, logger=None): self._unpack_eval_configs(configs) def _unpack_eval_configs(self, configs): + from pufferlib.ocean.drive.drive import RenderView + + _VIEW_MODE_MAP = { + "sim_state": [RenderView.FULL_SIM_STATE], + "topdown": [RenderView.FULL_SIM_STATE], + "bev": [RenderView.BEV_AGENT_OBS], + "agent": [RenderView.BEV_AGENT_OBS], + "persp": [RenderView.AGENT_PERSP], + "both": [RenderView.FULL_SIM_STATE, RenderView.AGENT_PERSP], + "all": [RenderView.FULL_SIM_STATE, RenderView.AGENT_PERSP, RenderView.BEV_AGENT_OBS], + } + _VIEW_SUFFIX = { + RenderView.FULL_SIM_STATE: "sim_state", + RenderView.AGENT_PERSP: "persp", + RenderView.BEV_AGENT_OBS: "bev", + } + eval_config = copy.deepcopy(configs) # Create separate evaluation environments based on specified configs eval_config["env"]["termination_mode"] = 0 @@ -860,6 +877,10 @@ def _unpack_eval_configs(self, configs): self.render_hr_rollout = self.configs["eval"]["render_human_replay_eval"] self.render_safe_rollout = self.configs["eval"].get("render_safe_eval", False) + view_mode_str = str(self.configs["eval"].get("render_view_mode", "sim_state")).lower().strip('"').strip("'") + self.render_view_modes = _VIEW_MODE_MAP.get(view_mode_str, [RenderView.FULL_SIM_STATE]) + self.render_view_suffix = _VIEW_SUFFIX + self.hr_eval_config = copy.deepcopy(eval_config) self.hr_eval_config["env"]["control_mode"] = "control_sdc_only" self.hr_eval_config["env"]["render_mode"] = 1 if self.render_hr_rollout else 0 @@ -928,7 +949,12 @@ def select_render_env(self, env_logs): return 0 def rollout(self, policy, mode="self_play"): - """Roll out the given policy in the specified eval env and collect statistics.""" + """Roll out the given policy in the specified eval env and collect statistics. + + If rendering is enabled and multiple view modes are configured, a separate + rollout is run for each view mode. The first pass also collects stats; + subsequent passes are render-only (stats already captured). + """ if mode == "human_replay": env = self.hr_env @@ -949,9 +975,21 @@ def rollout(self, policy, mode="self_play"): else: render_env_idx = self.select_render_env([{}] * driver.num_envs) - info_list = self._run_rollout(policy, env, render_env_idx if render_eval else None) + view_modes = self.render_view_modes if render_eval else [None] + multi_view = len(view_modes) > 1 + + final_info = {} + for i, view_mode in enumerate(view_modes): + info_list = self._run_rollout( + policy, + env, + render_env_idx=render_env_idx if render_eval else None, + view_mode=view_mode, + view_suffix=self.render_view_suffix.get(view_mode, "") if multi_view else "", + ) + if i == 0: # capture stats from the first pass only + final_info = info_list[0] if info_list else {} - final_info = info_list[0] if info_list else {} if mode == "self_play": self.self_play_stats = final_info self.self_play_stats["render_env_idx"] = render_env_idx @@ -962,11 +1000,19 @@ def rollout(self, policy, mode="self_play"): self.safe_eval_stats = final_info self.safe_eval_stats["render_env_idx"] = render_env_idx - def _run_rollout(self, policy, env, render_env_idx=None, per_env_logs=False): - """Run a single rollout. If render_env_idx is not None, render that env.""" + def _run_rollout(self, policy, env, render_env_idx=None, per_env_logs=False, view_mode=None, view_suffix=""): + """Run a single rollout. If render_env_idx is not None, render that env. + + view_mode: RenderView enum to use when rendering (defaults to FULL_SIM_STATE). + view_suffix: appended to the mp4 filename when logging multiple views, e.g. "_bev". + """ + from pufferlib.ocean.drive.drive import RenderView + driver = env.driver_env num_agents = env.observation_space.shape[0] device = self.configs["train"]["device"] + if view_mode is None: + view_mode = RenderView.FULL_SIM_STATE # Reset environment obs, info = env.reset() @@ -982,7 +1028,7 @@ def _run_rollout(self, policy, env, render_env_idx=None, per_env_logs=False): info_list = [] for time_idx in range(self.sim_steps): if render_env_idx is not None: - driver.render(env_id=render_env_idx) + driver.render(view_mode=view_mode, env_id=render_env_idx) # Get action from policy with torch.no_grad(): @@ -1022,10 +1068,23 @@ def log_videos(self, eval_mode, epoch): return render_mode = self.render_select_mode + multi_view = len(self.render_view_modes) > 1 + _known_suffixes = {"_sim_state", "_persp", "_bev"} + for p in video_files: - scenario_id = os.path.splitext(os.path.basename(p))[0] + stem = os.path.splitext(os.path.basename(p))[0] + # Extract view suffix from filename if present (e.g. "abc123_bev" → view="bev") + view_tag = "" + if multi_view: + for s in _known_suffixes: + if stem.endswith(s): + view_tag = s[1:] # strip leading "_" + stem = stem[: -len(s)] + break + scenario_id = stem caption = f"scene_{scenario_id}_epoch_{epoch}_select_{render_mode}" - self.logger.wandb.log({f"render/{eval_mode}": wandb.Video(p, format="mp4", caption=caption)}) + wandb_key = f"render/{eval_mode}/{view_tag}" if view_tag else f"render/{eval_mode}" + self.logger.wandb.log({wandb_key: wandb.Video(p, format="mp4", caption=caption)}) # Clean up for p in video_files: From e01df36bf9d9abca82efb5f26a0b1ffff71a2342 Mon Sep 17 00:00:00 2001 From: Aditya Gupta Date: Fri, 27 Mar 2026 21:51:28 -0400 Subject: [PATCH 09/23] Adding full safe_eval functionality --- pufferlib/config/ocean/drive.ini | 9 +- pufferlib/ocean/benchmark/evaluator.py | 256 ++++++++++++++++++------- pufferlib/ocean/drive/drive.h | 8 +- pufferlib/ocean/drive/drive.py | 8 + pufferlib/ocean/env_binding.h | 31 +++ pufferlib/pufferl.py | 59 +++++- 6 files changed, 290 insertions(+), 81 deletions(-) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index 0d0f11064..494ee9a4d 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -188,17 +188,15 @@ num_eval_agents = 64 self_play_eval = True ; If True, enable human replay evaluation (pair policy-controlled agent with human replays) human_replay_eval = False -; If True, enable safe eval (policy runs with fixed safe reward conditioning) -safe_eval = False ; Which env to render during eval. Options: "first" (by index), "worst_collision", "random" render_select_mode = "first" ; View mode(s) for eval rendering. Options: sim_state, bev, persp, both (sim_state+persp), all (sim_state+persp+bev) ; Multi-view options run a separate rollout per view, each producing its own wandb video under render/{mode}/{view} -render_view_mode = "sim_state" +render_view_mode = "all" ; If True, render random scenarios. Note: Doing this frequency will slow down the training. render_human_replay_eval = False render_self_play_eval = True -render_safe_eval = False + ; Number of scenarios to process per batch wosac_batch_size = 32 @@ -239,6 +237,7 @@ human_replay_num_agents = 16 enabled = True ; How often to run safe eval (in training epochs). Defaults to render_interval. interval = 250 +render_safe_eval = False ; Number of agents to run in the eval environment num_agents = 50 ; Number of episodes to collect metrics over @@ -286,7 +285,7 @@ num_maps = 3 ; View mode: sim_state (top-down, origin-centered), zoom_out (top-down, full map bbox), bev (agent BEV obs), persp (third-person follow-cam) ; Multi-view: "both" = sim_state + persp, "all" = sim_state + persp + bev (runs a separate rollout per view) ; NOTE: "persp" and "bev" require active_agent_count > 0 -view_mode = "bev" +view_mode = "persp" ; Whether to draw agent trajectory traces draw_traces = True ; Maximum number of frames to render per map diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py index 8ddff8a39..7b158368d 100644 --- a/pufferlib/ocean/benchmark/evaluator.py +++ b/pufferlib/ocean/benchmark/evaluator.py @@ -820,12 +820,11 @@ def rollout(self, args, puffer_env, policy): class Evaluator: - """Evaluates policies in self_play, human_replay, or safe_eval mode, with optional rendering. + """Evaluates policies in self_play, human_replay, with optional rendering. Initializes the eval envs needed based on eval config flags: - human_replay_eval: creates hr_env (control_sdc_only) - self_play_eval: creates sp_env (control_agents) - - safe_eval: creates safe_env (control_agents + fixed reward conditioning) """ RENDER_FIRST = "first" @@ -839,10 +838,8 @@ def __init__(self, configs, logger=None): self.sim_steps = 90 self.self_play_stats = None self.human_replay_stats = None - self.safe_eval_stats = None self.sp_env = None self.hr_env = None - self.safe_env = None self._unpack_eval_configs(configs) @@ -870,12 +867,12 @@ def _unpack_eval_configs(self, configs): backend = eval_config["eval"].get("backend", "PufferEnv") eval_config["env"]["map_dir"] = eval_config["eval"]["map_dir"] eval_config["env"]["num_agents"] = eval_config["eval"]["num_eval_agents"] - eval_config["env"]["episode_length"] = 91 # WOMD scenario length + if self.config["eval"]["human_replay_eval"]: + eval_config["env"]["episode_length"] = 91 # WOMD scenario length eval_config["vec"] = dict(backend=backend, num_envs=1) self.render_sp_rollout = self.configs["eval"]["render_self_play_eval"] self.render_hr_rollout = self.configs["eval"]["render_human_replay_eval"] - self.render_safe_rollout = self.configs["eval"].get("render_safe_eval", False) view_mode_str = str(self.configs["eval"].get("render_view_mode", "sim_state")).lower().strip('"').strip("'") self.render_view_modes = _VIEW_MODE_MAP.get(view_mode_str, [RenderView.FULL_SIM_STATE]) @@ -883,39 +880,15 @@ def _unpack_eval_configs(self, configs): self.hr_eval_config = copy.deepcopy(eval_config) self.hr_eval_config["env"]["control_mode"] = "control_sdc_only" - self.hr_eval_config["env"]["render_mode"] = 1 if self.render_hr_rollout else 0 + self.hr_eval_config["env"]["render_mode"] = ( + 0 # primary env: stats only; render envs created per-view in rollout() + ) self.sp_eval_config = copy.deepcopy(eval_config) self.sp_eval_config["env"]["control_mode"] = "control_agents" - self.sp_eval_config["env"]["render_mode"] = 1 if self.render_sp_rollout else 0 - - # safe_eval: control_agents like self_play, but with fixed reward conditioning - # from [safe_eval] config section (map_dir, num_agents, reward bounds, etc.) - safe_cfg = configs.get("safe_eval", {}) - self.safe_eval_config = copy.deepcopy(eval_config) - self.safe_eval_config["env"]["control_mode"] = "control_agents" - self.safe_eval_config["env"]["render_mode"] = 1 if self.render_safe_rollout else 0 - self.safe_eval_config["env"]["reward_randomization"] = 1 - self.safe_eval_config["env"]["reward_conditioning"] = 1 - self.safe_eval_config["env"]["resample_frequency"] = 0 - self.safe_eval_config["env"]["report_interval"] = 1 # ensure logs fire within 90-step rollout - # Override map/episode settings from [safe_eval] section - # Note: episode_length is intentionally NOT overridden here — eval rollout is always 91 steps - for key in ("map_dir", "num_maps", "min_goal_distance", "max_goal_distance", "num_agents"): - if key in safe_cfg: - self.safe_eval_config["env"][key] = safe_cfg[key] - # Pin reward bounds: set min=max for each conditioning value - import re - - valid_bounds = { - re.match(r"reward_bound_(.+)_min$", k).group(1) - for k in eval_config["env"] - if re.match(r"reward_bound_(.+)_min$", k) - } - for key, val in safe_cfg.items(): - if key in valid_bounds: - self.safe_eval_config["env"][f"reward_bound_{key}_min"] = float(val) - self.safe_eval_config["env"][f"reward_bound_{key}_max"] = float(val) + self.sp_eval_config["env"]["render_mode"] = ( + 0 # primary env: stats only; render envs created per-view in rollout() + ) self.render_select_mode = self.configs["eval"]["render_select_mode"] @@ -951,19 +924,19 @@ def select_render_env(self, env_logs): def rollout(self, policy, mode="self_play"): """Roll out the given policy in the specified eval env and collect statistics. - If rendering is enabled and multiple view modes are configured, a separate - rollout is run for each view mode. The first pass also collects stats; - subsequent passes are render-only (stats already captured). + Stats are collected using the primary env (already loaded by pufferl.py). + If rendering is enabled, each view mode gets its own temporary env with + render_mode=1, so each view has its own ffmpeg pipe and uniquely named mp4. """ + from pufferlib.pufferl import load_env if mode == "human_replay": env = self.hr_env + eval_config = self.hr_eval_config render_eval = self.render_hr_rollout - elif mode == "safe_eval": - env = self.safe_env - render_eval = self.render_safe_rollout else: # self_play env = self.sp_env + eval_config = self.sp_eval_config render_eval = self.render_sp_rollout driver = env.driver_env @@ -975,20 +948,30 @@ def rollout(self, policy, mode="self_play"): else: render_env_idx = self.select_render_env([{}] * driver.num_envs) - view_modes = self.render_view_modes if render_eval else [None] - multi_view = len(view_modes) > 1 - - final_info = {} - for i, view_mode in enumerate(view_modes): - info_list = self._run_rollout( - policy, - env, - render_env_idx=render_env_idx if render_eval else None, - view_mode=view_mode, - view_suffix=self.render_view_suffix.get(view_mode, "") if multi_view else "", - ) - if i == 0: # capture stats from the first pass only - final_info = info_list[0] if info_list else {} + # Collect stats from the primary env (no rendering) + info_list = self._run_rollout(policy, env) + final_info = info_list[0] if info_list else {} + + # Render each view in its own temporary env so each gets its own ffmpeg pipe + # and uniquely named mp4 (e.g. {scenario_id}_bev.mp4). + if render_eval: + view_modes = self.render_view_modes + multi_view = len(view_modes) > 1 + for view_mode in view_modes: + suffix = f"_{self.render_view_suffix[view_mode]}" if multi_view else "" + render_cfg = copy.deepcopy(eval_config) + render_cfg["env"]["render_mode"] = 1 + render_env = load_env("puffer_drive", render_cfg) + try: + self._run_rollout( + policy, + render_env, + render_env_idx=render_env_idx, + view_mode=view_mode, + view_suffix=suffix, + ) + finally: + render_env.close() if mode == "self_play": self.self_play_stats = final_info @@ -996,9 +979,6 @@ def rollout(self, policy, mode="self_play"): elif mode == "human_replay": self.human_replay_stats = final_info self.human_replay_stats["render_env_idx"] = render_env_idx - elif mode == "safe_eval": - self.safe_eval_stats = final_info - self.safe_eval_stats["render_env_idx"] = render_env_idx def _run_rollout(self, policy, env, render_env_idx=None, per_env_logs=False, view_mode=None, view_suffix=""): """Run a single rollout. If render_env_idx is not None, render that env. @@ -1014,6 +994,11 @@ def _run_rollout(self, policy, env, render_env_idx=None, per_env_logs=False, vie if view_mode is None: view_mode = RenderView.FULL_SIM_STATE + # Set video filename suffix in C before the first render call of this rollout. + # This ensures each view produces a uniquely named mp4 (e.g. {scenario_id}_bev.mp4). + if render_env_idx is not None and view_suffix: + driver.set_video_suffix(view_suffix, env_id=render_env_idx) + # Reset environment obs, info = env.reset() @@ -1108,13 +1093,156 @@ def log_stats(self): eval_stats["eval/sp_score"] = self.self_play_stats["score"] if "n" in self.self_play_stats: eval_stats["eval/num_agents"] = self.self_play_stats["n"] - if self.safe_eval_stats is not None: - if "collision_rate" in self.safe_eval_stats: - eval_stats["eval/safe_collision_rate"] = self.safe_eval_stats["collision_rate"] - if "score" in self.safe_eval_stats: - eval_stats["eval/safe_score"] = self.safe_eval_stats["score"] if not eval_stats: return self.logger.wandb.log(eval_stats) + + +class SafeEvaluator: + """Evaluates policies with fixed safe/law-abiding reward conditioning. + + Runs the policy with deterministic reward bounds (reward_randomization=1, + min=max for each bound) so the conditioning observation vector is fixed to + safe driving values. Collects metrics over multiple episodes. + + Re-parses the INI config internally so it doesn't need the full training + args dict — only the env_name, safe_eval config, and device. + """ + + def __init__(self, env_name: str, safe_eval_config: Dict, device="cuda", logger=None): + self.env_name = env_name + self.logger = logger + self.safe_eval_config = safe_eval_config + self.num_episodes = safe_eval_config.get("num_episodes", 100) + self.num_agents = safe_eval_config.get("num_agents", 64) + self.episode_length = safe_eval_config.get("episode_length", 1000) + if isinstance(device, int): + device = f"cuda:{device}" + self.device = device + self.stats = None + + def _build_eval_env_config(self): + """Build env config with safe reward conditioning values applied. + + Re-parses the INI file to get a fresh full config, then applies + safe_eval overrides for env, vec, and reward bounds. + """ + import re + from pufferlib.pufferl import load_config + + # Re-parse INI to get full config (env, vec, policy, rnn, etc.) + import sys + + original_argv = sys.argv + sys.argv = ["pufferl"] + try: + eval_config = load_config(self.env_name) + finally: + sys.argv = original_argv + + eval_config["vec"] = dict(backend="PufferEnv", num_envs=1) + eval_config["train"]["device"] = self.device + eval_config["env"]["num_agents"] = self.num_agents + eval_config["env"]["episode_length"] = self.episode_length + eval_config["env"]["resample_frequency"] = 0 + eval_config["env"]["reward_randomization"] = 1 + eval_config["env"]["reward_conditioning"] = 1 + + # Apply safe_eval overrides for map_dir, goal distances, etc. + for override_key in ("map_dir", "num_maps", "min_goal_distance", "max_goal_distance"): + if override_key in self.safe_eval_config: + eval_config["env"][override_key] = self.safe_eval_config[override_key] + + # Discover valid reward bound names from env config + valid_bounds = set() + for key in eval_config["env"]: + m = re.match(r"reward_bound_(.+)_min$", key) + if m: + valid_bounds.add(m.group(1)) + + # Set min=max for each reward bound to fix the conditioning values + for key, val in self.safe_eval_config.items(): + if key not in valid_bounds: + continue + eval_config["env"][f"reward_bound_{key}_min"] = float(val) + eval_config["env"][f"reward_bound_{key}_max"] = float(val) + + return eval_config + + def evaluate(self, vecenv, policy): + """Run evaluation with safe reward conditioning and collect metrics. + + Args: + vecenv: Vectorized environment (created with safe eval config) + policy: Trained policy to evaluate + + Returns: + dict: Averaged metrics over collected episodes + """ + from collections import defaultdict + + policy.eval() + num_agents = vecenv.observation_space.shape[0] + use_rnn = hasattr(policy, "hidden_size") + + ob, _ = vecenv.reset() + state = {} + dones = torch.zeros(num_agents, device=self.device) + prev_rewards = torch.zeros(num_agents, device=self.device) + if use_rnn: + state = dict( + lstm_h=torch.zeros(num_agents, policy.hidden_size, device=self.device), + lstm_c=torch.zeros(num_agents, policy.hidden_size, device=self.device), + ) + + all_stats = defaultdict(list) + episodes_collected = 0 + max_steps = (self.num_episodes // max(num_agents, 1) + 2) * self.episode_length + + for step in range(max_steps): + if episodes_collected >= self.num_episodes: + break + + with torch.no_grad(): + ob_t = torch.as_tensor(ob).to(self.device) + if use_rnn: + state["reward"] = prev_rewards + state["done"] = dones + logits, value = policy.forward_eval(ob_t, state) + action, logprob, _ = pufferlib.pytorch.sample_logits(logits) + action = action.cpu().numpy().reshape(vecenv.action_space.shape) + + if isinstance(logits, torch.distributions.Normal): + action = np.clip(action, vecenv.action_space.low, vecenv.action_space.high) + + ob, rewards, terminals, truncations, infos = vecenv.step(action) + prev_rewards = torch.as_tensor(rewards).float().to(self.device) + dones = torch.as_tensor(np.maximum(terminals, truncations)).float().to(self.device) + + for entry in infos: + if isinstance(entry, dict): + episodes_collected += int(entry.get("n", 1)) + for k, v in entry.items(): + try: + float(v) + all_stats[k].append(v) + except (TypeError, ValueError): + pass + + self.stats = {k: float(np.mean(v)) for k, v in all_stats.items() if len(v) > 0} + return self.stats + + def log_stats(self, global_step=None): + """Log collected metrics to wandb.""" + if self.stats is None: + return + + if not (self.logger and hasattr(self.logger, "wandb") and self.logger.wandb): + return + + payload = {f"eval/safe_{k}": v for k, v in self.stats.items()} + if global_step is not None: + payload["train_step"] = global_step + self.logger.wandb.log(payload) diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 5c9127727..4e4e06142 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -375,6 +375,7 @@ struct Drive { float max_goal_distance; char *ini_file; char scenario_id[SCENARIO_ID_STR_LENGTH]; + char video_suffix[64]; // Optional suffix appended to mp4 filename (e.g. "_bev") int collision_behavior; int offroad_behavior; float observation_window_size; @@ -3650,8 +3651,11 @@ Client *make_client(Drive *env) { char size_str[64]; snprintf(size_str, sizeof(size_str), "%dx%d", (int)client->width, (int)client->height); - char filename[256]; - snprintf(filename, sizeof(filename), "%s.mp4", env->scenario_id); + char filename[320]; + if (env->video_suffix[0] != '\0') + snprintf(filename, sizeof(filename), "%s%s.mp4", env->scenario_id, env->video_suffix); + else + snprintf(filename, sizeof(filename), "%s.mp4", env->scenario_id); client->recorder_pid = fork(); if (client->recorder_pid == -1) { diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py index c464fddb6..6ca81b885 100644 --- a/pufferlib/ocean/drive/drive.py +++ b/pufferlib/ocean/drive/drive.py @@ -755,6 +755,14 @@ def get_road_edge_polylines(self): def render(self, view_mode: RenderView = RenderView.FULL_SIM_STATE, draw_traces: bool = True, env_id: int = 0): binding.vec_render(self.c_envs, int(view_mode), draw_traces, env_id) + def set_video_suffix(self, suffix: str, env_id: int = 0): + """Set the suffix appended to the mp4 filename for headless rendering. + + Must be called before the first render() call of a rollout. + E.g. set_video_suffix("_bev", env_id=0) → {scenario_id}_bev.mp4 + """ + binding.vec_set_video_suffix(self.c_envs, env_id, suffix) + def close(self): binding.vec_close(self.c_envs) diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h index e6bc576c2..b6c89712a 100644 --- a/pufferlib/ocean/env_binding.h +++ b/pufferlib/ocean/env_binding.h @@ -550,6 +550,36 @@ static PyObject *vec_step(PyObject *self, PyObject *arg) { Py_RETURN_NONE; } +static PyObject *vec_set_video_suffix(PyObject *self, PyObject *args) { + // Set a suffix appended to the mp4 filename for headless rendering. + // Call this before the first vec_render of each rollout when using multi-view. + // Args: (vec_env_ptr, env_id, suffix_str) + int num_args = PyTuple_Size(args); + if (num_args != 3) { + PyErr_SetString(PyExc_TypeError, "vec_set_video_suffix requires 3 arguments: (vec, env_id, suffix)"); + return NULL; + } + VecEnv *vec = (VecEnv *)PyLong_AsVoidPtr(PyTuple_GetItem(args, 0)); + if (!vec) { + PyErr_SetString(PyExc_ValueError, "Invalid vec_env handle"); + return NULL; + } + int env_id = (int)PyLong_AsLong(PyTuple_GetItem(args, 1)); + const char *suffix = PyUnicode_AsUTF8(PyTuple_GetItem(args, 2)); + if (!suffix) { + PyErr_SetString(PyExc_TypeError, "suffix must be a string"); + return NULL; + } + if (env_id < 0 || env_id >= vec->num_envs) { + PyErr_SetString(PyExc_IndexError, "env_id out of range"); + return NULL; + } + Drive *env = (Drive *)vec->envs[env_id]; + strncpy(env->video_suffix, suffix, sizeof(env->video_suffix) - 1); + env->video_suffix[sizeof(env->video_suffix) - 1] = '\0'; + Py_RETURN_NONE; +} + static PyObject *vec_render(PyObject *self, PyObject *args) { int num_args = PyTuple_Size(args); if (num_args != 4) { @@ -1071,6 +1101,7 @@ static PyMethodDef methods[] = { {"vec_step", vec_step, METH_VARARGS, "Step the vector of environments"}, {"vec_log", vec_log, METH_VARARGS, "Log the vector of environments"}, {"vec_render", vec_render, METH_VARARGS, "Render the vector of environments"}, + {"vec_set_video_suffix", vec_set_video_suffix, METH_VARARGS, "Set mp4 filename suffix for the given env"}, {"vec_close", vec_close, METH_VARARGS, "Close the vector of environments"}, {"vec_get_scenario_ids", vec_get_scenario_ids, METH_VARARGS, "Get scenario IDs for all envs"}, {"shared", (PyCFunction)my_shared, METH_VARARGS | METH_KEYWORDS, "Shared state"}, diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 6709ea066..f44ead043 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -509,7 +509,6 @@ def train(self): if self.epoch % self.config["eval"]["eval_interval"] == 0 or done_training: human_replay_eval = self.config["eval"]["human_replay_eval"] self_play_eval = self.config["eval"]["self_play_eval"] - safe_eval = self.config.get("safe_eval", {}).get("enabled", False) self.evaluator = Evaluator(self.full_args, self.logger) if human_replay_eval: @@ -522,12 +521,7 @@ def train(self): self.evaluator.rollout(self.uncompiled_policy, mode="self_play") self.evaluator.sp_env.close() self.evaluator.log_videos(eval_mode="self_play", epoch=self.epoch) - if safe_eval: - self.evaluator.safe_env = load_env("puffer_drive", self.evaluator.safe_eval_config) - self.evaluator.rollout(self.uncompiled_policy, mode="safe_eval") - self.evaluator.safe_env.close() - self.evaluator.log_videos(eval_mode="safe_eval", epoch=self.epoch) - if human_replay_eval or self_play_eval or safe_eval: + if human_replay_eval or self_play_eval: self.evaluator.log_stats() del self.evaluator @@ -535,10 +529,55 @@ def train(self): if self.config["eval"]["wosac_realism_eval"]: pufferlib.utils.run_wosac_eval_in_subprocess(self.config, self.logger, self.global_step) - if self.config["eval"]["human_replay_eval"] and ( - (self.epoch - 1) % self.config["eval"]["eval_interval"] == 0 or done_training + safe_eval_config = self.config.get("safe_eval", {}) + safe_eval_enabled = safe_eval_config.get("enabled", False) + safe_eval_interval = int(safe_eval_config.get("interval", self.render_interval)) + is_main = not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0 + if ( + is_main + and safe_eval_enabled + and safe_eval_interval > 0 + and (self.epoch % safe_eval_interval == 0 or done_training) ): - pufferlib.utils.run_human_replay_eval_in_subprocess(self.config, self.logger, self.global_step) + self._run_safe_eval() + + def _run_safe_eval(self): + """Run safe eval in-process using SafeEvaluator.""" + import copy + import traceback + + vecenv = None + try: + from pufferlib.ocean.benchmark.evaluator import SafeEvaluator + + self.msg = "Running safe eval..." + env_name = self.config["env"] + safe_eval_config = self.config.get("safe_eval", {}) + evaluator = SafeEvaluator(env_name, safe_eval_config, device=self.config["device"], logger=self.logger) + eval_config = evaluator._build_eval_env_config() + + vecenv = load_env(env_name, eval_config) + policy = load_policy(eval_config, vecenv, env_name) + + # Copy weights from in-memory policy (no checkpoint dependency) + state_dict = copy.deepcopy(self.uncompiled_policy.state_dict()) + # Strip DDP "module." prefix if present + state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()} + policy.load_state_dict(state_dict) + + metrics = evaluator.evaluate(vecenv, policy) + evaluator.log_stats(global_step=self.global_step) + + self.msg = f"Safe eval: {len(metrics)} metrics logged" + except Exception as e: + self.msg = f"Safe eval failed: {e}" + traceback.print_exc(file=sys.stderr) + finally: + if vecenv is not None: + try: + vecenv.close() + except Exception: + pass def mean_and_log(self): config = self.config From ee80a1df2843ef35518ae4357999e28aa9cd85ea Mon Sep 17 00:00:00 2001 From: Aditya Gupta Date: Sat, 28 Mar 2026 00:10:41 -0400 Subject: [PATCH 10/23] Further bug fixes --- pufferlib/ocean/benchmark/evaluator.py | 2 +- pufferlib/pufferl.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py index 7b158368d..49ac86f97 100644 --- a/pufferlib/ocean/benchmark/evaluator.py +++ b/pufferlib/ocean/benchmark/evaluator.py @@ -867,7 +867,7 @@ def _unpack_eval_configs(self, configs): backend = eval_config["eval"].get("backend", "PufferEnv") eval_config["env"]["map_dir"] = eval_config["eval"]["map_dir"] eval_config["env"]["num_agents"] = eval_config["eval"]["num_eval_agents"] - if self.config["eval"]["human_replay_eval"]: + if self.configs["eval"]["human_replay_eval"]: eval_config["env"]["episode_length"] = 91 # WOMD scenario length eval_config["vec"] = dict(backend=backend, num_envs=1) diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index f44ead043..5fd632808 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -128,6 +128,7 @@ def __init__(self, config, vecenv, policy, logger=None, full_args=None): self.ep_lengths = torch.zeros(total_agents, device=device, dtype=torch.int32) self.ep_indices = torch.arange(total_agents, device=device, dtype=torch.int32) self.free_idx = total_agents + self.render_interval = config["eval"]["eval_interval"] # LSTM if config["use_rnn"]: From 2099537f746b084187ba97b500e37d4d44e0a86c Mon Sep 17 00:00:00 2001 From: Aditya Gupta Date: Sat, 28 Mar 2026 10:29:45 -0400 Subject: [PATCH 11/23] Cleaning up some commented dead code --- pufferlib/config/ocean/drive.ini | 10 ++-- pufferlib/ocean/drive/drive.c | 1 - pufferlib/ocean/drive/drive.h | 85 -------------------------------- 3 files changed, 5 insertions(+), 91 deletions(-) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index 494ee9a4d..a94c974b4 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -54,19 +54,19 @@ polyline_max_segment_length = 10.0 episode_length = 300 resample_frequency = 300 termination_mode = 1 # 0 - terminate at episode_length, 1 - terminate after all agents have been reset -map_dir = "resources/drive/binaries/carla_3D" +map_dir = "resources/drive/binaries/carla_2D" num_maps = 3 ; If True, allows training with fewer maps than requested (warns instead of erroring) allow_fewer_maps = True ; Determines which step of the trajectory to initialize the agents at upon reset init_steps = 0 ; Options: "control_vehicles", "control_agents", "control_wosac", "control_sdc_only" -control_mode = "control_agents" +control_mode = "control_vehicles" ; Options: "create_all_valid", "create_only_controlled", "init_variable_agent_number"(creates random number of controlled agents per env) init_mode = "init_variable_agent_number" ; Below options only valid for "init_variable_agent_number" init_mode -min_agents_per_env = 31 -max_agents_per_env = 32 +min_agents_per_env = 1 +max_agents_per_env = 128 ; Dimension Ranges for agents spawn_width_min = 1.5 spawn_width_max = 2.5 @@ -295,7 +295,7 @@ max_frames = 91 init_mode = "init_variable_agent_number" control_mode = "control_vehicles" ; Policy bin file used for rendering videos -policy_path = "resources/drive/puffer_drive_weights_resampling_300.bin" +policy_path = "resources/drive/puffer_drive_weights.bin" ; Allows more than cpu cores workers for rendering overwork = True ; Display human xy logs in the background diff --git a/pufferlib/ocean/drive/drive.c b/pufferlib/ocean/drive/drive.c index d1a408b5a..57f38623e 100644 --- a/pufferlib/ocean/drive/drive.c +++ b/pufferlib/ocean/drive/drive.c @@ -145,7 +145,6 @@ void demo(const char *map_name_arg, const char *policy_name_arg, int view_mode, conf.init_mode); free_allocated(&env); return; - // return -1; } c_reset(&env); c_render(&env, view_mode, draw_traces); diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 4e4e06142..5f48e5bd6 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -22,11 +22,6 @@ #define VIEW_MODE_SIM_STATE 0 #define VIEW_MODE_BEV_AGENT_OBS 1 #define VIEW_MODE_AGENT_PERSP 2 -// // Order of entities in rendering (lower is rendered first) -// #define Z_ROAD_SURFACE 0.0f -// #define Z_ROAD_MARKINGS 0.05f // Lane lines, road lines, traces -// #define Z_AGENT_DETAILS 0.4f // Arrow, goal markers, obs overlays -// #define Z_AGENTS 0.6f // Vehicles, cyclists, pedestrians // constants for strings, data etc. #define SCENARIO_ID_STR_LENGTH 16 @@ -4500,86 +4495,6 @@ void c_render(Drive *env, int view_mode, int draw_traces) { DrawText(TextFormat("Grid Cols: %d", env->grid_map->grid_cols), 10, status_y + 20, 20, PUFF_WHITE); EndDrawing(); } - - // BeginDrawing(); - // Color road = (Color){35, 35, 37, 255}; - // ClearBackground(road); - // BeginMode3D(client->camera); - // handle_camera_controls(env->client); - // draw_scene(env, client, 0, 0, 0, 0); - - // if (IsKeyPressed(KEY_TAB) && env->active_agent_count > 0) { - // env->human_agent_idx = (env->human_agent_idx + 1) % env->active_agent_count; - // } - - // // Draw debug info - // DrawText(TextFormat("Camera Position: (%.2f, %.2f, %.2f)", client->camera.position.x, client->camera.position.y, - // client->camera.position.z), - // 10, 10, 20, PUFF_WHITE); - // DrawText(TextFormat("Camera Target: (%.2f, %.2f, %.2f)", client->camera.target.x, client->camera.target.y, - // client->camera.target.z), - // 10, 30, 20, PUFF_WHITE); - // DrawText(TextFormat("Timestep: %d", env->timestep), 10, 50, 20, PUFF_WHITE); - - // int human_idx = env->active_agent_indices[env->human_agent_idx]; - // DrawText(TextFormat("Controlling Agent: %d", env->human_agent_idx), 10, 70, 20, PUFF_WHITE); - // DrawText(TextFormat("Agent Index: %d", human_idx), 10, 90, 20, PUFF_WHITE); - - // // Display current action values - yellow when controlling, white otherwise - // Color action_color = IsKeyDown(KEY_LEFT_SHIFT) ? YELLOW : PUFF_WHITE; - - // if (env->action_type == 0) { // discrete - // int *action_array = (int *)env->actions; - // int action_val = action_array[env->human_agent_idx]; - - // if (env->dynamics_model == CLASSIC) { - // int num_steer = 13; - // int accel_idx = action_val / num_steer; - // int steer_idx = action_val % num_steer; - // float accel_value = ACCELERATION_VALUES[accel_idx]; - // float steer_value = STEERING_VALUES[steer_idx]; - - // DrawText(TextFormat("Acceleration: %.2f m/s^2", accel_value), 10, 110, 20, action_color); - // DrawText(TextFormat("Steering: %.3f", steer_value), 10, 130, 20, action_color); - // } else if (env->dynamics_model == JERK) { - // int num_lat = 3; - // int jerk_long_idx = action_val / num_lat; - // int jerk_lat_idx = action_val % num_lat; - // float jerk_long_value = JERK_LONG[jerk_long_idx]; - // float jerk_lat_value = JERK_LAT[jerk_lat_idx]; - - // DrawText(TextFormat("Longitudinal Jerk: %.2f m/s^3", jerk_long_value), 10, 110, 20, action_color); - // DrawText(TextFormat("Lateral Jerk: %.2f m/s^3", jerk_lat_value), 10, 130, 20, action_color); - // } - // } else { // continuous - // float (*action_array_f)[2] = (float (*)[2])env->actions; - // DrawText(TextFormat("Acceleration: %.2f", action_array_f[env->human_agent_idx][0]), 10, 110, 20, - // action_color); DrawText(TextFormat("Steering: %.2f", action_array_f[env->human_agent_idx][1]), 10, 130, 20, - // action_color); - // } - - // // Show key press status - // int status_y = 150; - // if (IsKeyDown(KEY_LEFT_SHIFT)) { - // DrawText("[shift pressed]", 10, status_y, 20, YELLOW); - // status_y += 20; - // } - // if (IsKeyDown(KEY_SPACE)) { - // DrawText("[space pressed]", 10, status_y, 20, YELLOW); - // status_y += 20; - // } - // if (IsKeyDown(KEY_LEFT_CONTROL)) { - // DrawText("[ctrl pressed]", 10, status_y, 20, YELLOW); - // status_y += 20; - // } - - // // Controls help - // DrawText("Controls: SHIFT + W/S - Accelerate/Brake, SHIFT + A/D - Steer, TAB - Switch Agent", 10, - // client->height - 30, 20, PUFF_WHITE); - - // DrawText(TextFormat("Grid Rows: %d", env->grid_map->grid_rows), 10, status_y, 20, PUFF_WHITE); - // DrawText(TextFormat("Grid Cols: %d", env->grid_map->grid_cols), 10, status_y + 20, 20, PUFF_WHITE); - // EndDrawing(); } void close_client(Client *client) { From ab8adf0390f2e210b748271897935550506ec431 Mon Sep 17 00:00:00 2001 From: Aditya Gupta Date: Sun, 29 Mar 2026 20:01:50 -0400 Subject: [PATCH 12/23] Fixing bugs based on PR comments --- pufferlib/config/ocean/drive.ini | 8 +++--- pufferlib/ocean/drive/drive.h | 44 ++++++++++++++++++-------------- pufferlib/ocean/drive/drive.py | 2 -- pufferlib/ocean/env_binding.h | 1 + pufferlib/pufferl.py | 6 ++--- 5 files changed, 32 insertions(+), 29 deletions(-) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index a94c974b4..2a3eab604 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -193,7 +193,7 @@ render_select_mode = "first" ; View mode(s) for eval rendering. Options: sim_state, bev, persp, both (sim_state+persp), all (sim_state+persp+bev) ; Multi-view options run a separate rollout per view, each producing its own wandb video under render/{mode}/{view} render_view_mode = "all" -; If True, render random scenarios. Note: Doing this frequency will slow down the training. +; If True, render random scenarios. Note: Doing this frequently will slow down the training. render_human_replay_eval = False render_self_play_eval = True @@ -276,8 +276,8 @@ acc = 1.0 [render] ; Render a batch of maps offline using the in-process c_render (ffmpeg) pipeline. -; Path to the .bin map directory -map_dir = "resources/drive/binaries/carla/carla_3D" +; Path to the .bin map directoryz +map_dir = "resources/drive/binaries/carla_2D" ; Directory to write output mp4 files output_dir = "resources/drive/render_videos" ; Number of maps to render (capped at files available in map_dir) @@ -294,8 +294,6 @@ max_frames = 91 ; carla_2D maps carry recorded vehicle trajectories — use create_all_valid + control_vehicles. init_mode = "init_variable_agent_number" control_mode = "control_vehicles" -; Policy bin file used for rendering videos -policy_path = "resources/drive/puffer_drive_weights.bin" ; Allows more than cpu cores workers for rendering overwork = True ; Display human xy logs in the background diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 5f48e5bd6..79f06e940 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -2636,10 +2636,9 @@ void compute_agent_metrics(Drive *env, int agent_idx) { // Check for vehicle collisions (skip for pedestrians) int car_collided_with_index = -1; - if (agent->type != PEDESTRIAN) { - car_collided_with_index = collision_check(env, agent_idx); - if (car_collided_with_index != -1) - collided = VEHICLE_COLLISION; + car_collided_with_index = collision_check(env, agent_idx); + if (car_collided_with_index != -1) { + collided = VEHICLE_COLLISION; } agent->collision_state = collided; @@ -3546,38 +3545,41 @@ Client *make_client(Drive *env) { if (env->render_mode == RENDER_HEADLESS && getenv("DISPLAY") == NULL) { - // Kill any existing Xvfb first - system("pkill -9 Xvfb"); - usleep(200000); - unlink("/tmp/.X99-lock"); - unlink("/tmp/.X11-unix/X99"); + // Use a per-process display number so multiple rendering jobs on the same + // node don't collide. Range :100-:999 avoids the system default :0. + client->xvfb_display_num = 100 + (getpid() % 900); - // Hardcode to single display because we only run this in one process at once - client->xvfb_display_num = 99; + char lock_file[32], socket_file[32], display_str[16]; + snprintf(display_str, sizeof(display_str), ":%d", client->xvfb_display_num); + snprintf(lock_file, sizeof(lock_file), "/tmp/.X%d-lock", client->xvfb_display_num); + snprintf(socket_file, sizeof(socket_file), "/tmp/.X11-unix/X%d", client->xvfb_display_num); - // Clean up stale lock if process is dead - FILE *f = fopen("/tmp/.X99-lock", "r"); + // Clean up a stale lock only if the owning process is already dead + FILE *f = fopen(lock_file, "r"); if (f) { pid_t pid = -1; fscanf(f, "%d", &pid); fclose(f); - if (pid > 0 && kill(pid, 0) != 0) - unlink("/tmp/.X99-lock"); + if (pid > 0 && kill(pid, 0) != 0) { + unlink(lock_file); + unlink(socket_file); + } } client->xvfb_pid = fork(); if (client->xvfb_pid == 0) { close(STDOUT_FILENO); close(STDERR_FILENO); - execlp("Xvfb", "Xvfb", ":99", "-screen", "0", "1280x720x24", "+extension", "GLX", "-ac", "-noreset", NULL); + execlp("Xvfb", "Xvfb", display_str, "-screen", "0", "1280x720x24", "+extension", "GLX", "-ac", "-noreset", + NULL); _exit(1); } - setenv("DISPLAY", ":99", 1); + setenv("DISPLAY", display_str, 1); // Xvfb starts asynchronously after fork(), so we poll until it creates its // lock file (max 2s) then wait an extra 200ms for GLX to finish initializing. // Without this, raylib's InitWindow() would try to connect before Xvfb is ready. - for (int i = 0; i < 20 && access("/tmp/.X99-lock", F_OK) != 0; i++) + for (int i = 0; i < 20 && access(lock_file, F_OK) != 0; i++) usleep(100000); usleep(200000); } @@ -4510,7 +4512,11 @@ void close_client(Client *client) { if (client->xvfb_pid > 0) { kill(client->xvfb_pid, SIGTERM); waitpid(client->xvfb_pid, NULL, 0); - unlink("/tmp/.X99-lock"); + char lock_file[32], socket_file[32]; + snprintf(lock_file, sizeof(lock_file), "/tmp/.X%d-lock", client->xvfb_display_num); + snprintf(socket_file, sizeof(socket_file), "/tmp/.X11-unix/X%d", client->xvfb_display_num); + unlink(lock_file); + unlink(socket_file); unsetenv("DISPLAY"); } diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py index 6ca81b885..46726a942 100644 --- a/pufferlib/ocean/drive/drive.py +++ b/pufferlib/ocean/drive/drive.py @@ -606,8 +606,6 @@ def resample_maps(self): self.env_ids.append(env_id) self.c_envs = binding.vectorize(*self.env_ids) - self.c_envs = binding.vectorize(*self.env_ids) - binding.vec_reset(self.c_envs, seed) self.truncations[:] = 1 diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h index b6c89712a..86be97971 100644 --- a/pufferlib/ocean/env_binding.h +++ b/pufferlib/ocean/env_binding.h @@ -1114,6 +1114,7 @@ static PyMethodDef methods[] = { "Get road edge polyline counts from vectorized env"}, {"vec_get_road_edge_polylines", vec_get_road_edge_polylines, METH_VARARGS, "Get road edge polylines from vectorized env"}, + {"env_log", env_log, METH_VARARGS, "Log a single environment"}, MY_METHODS, {NULL, NULL, 0, NULL}}; diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 5fd632808..31dbda7d8 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -128,7 +128,7 @@ def __init__(self, config, vecenv, policy, logger=None, full_args=None): self.ep_lengths = torch.zeros(total_agents, device=device, dtype=torch.int32) self.ep_indices = torch.arange(total_agents, device=device, dtype=torch.int32) self.free_idx = total_agents - self.render_interval = config["eval"]["eval_interval"] + self.eval_interval = config["eval"]["eval_interval"] # LSTM if config["use_rnn"]: @@ -532,7 +532,7 @@ def train(self): safe_eval_config = self.config.get("safe_eval", {}) safe_eval_enabled = safe_eval_config.get("enabled", False) - safe_eval_interval = int(safe_eval_config.get("interval", self.render_interval)) + safe_eval_interval = int(safe_eval_config.get("interval", self.eval_interval)) is_main = not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0 if ( is_main @@ -1215,7 +1215,7 @@ def eval(env_name, args=None, vecenv=None, policy=None): if frame_count >= max_frames or done.all() or truncated.all(): break - vecenv.close() + vecenv.close() def sweep(args=None, env_name=None): From e61ccecda440b514f8715f84adf0a8312574e3b5 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 29 Mar 2026 20:13:24 -0400 Subject: [PATCH 13/23] Update pufferlib/pufferl.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- pufferlib/pufferl.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 31dbda7d8..d573ef37c 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -128,7 +128,8 @@ def __init__(self, config, vecenv, policy, logger=None, full_args=None): self.ep_lengths = torch.zeros(total_agents, device=device, dtype=torch.int32) self.ep_indices = torch.arange(total_agents, device=device, dtype=torch.int32) self.free_idx = total_agents - self.eval_interval = config["eval"]["eval_interval"] + # Use a safe default when eval settings are absent + self.eval_interval = config.get("eval", {}).get("eval_interval", 0) # LSTM if config["use_rnn"]: From da25579e0871d53e52378d03dc568fc884fa6371 Mon Sep 17 00:00:00 2001 From: Aditya Gupta Date: Sun, 5 Apr 2026 10:03:16 -0400 Subject: [PATCH 14/23] Some bug fixes --- pufferlib/ocean/env_config.h | 2 ++ pufferlib/pufferl.py | 8 +++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/pufferlib/ocean/env_config.h b/pufferlib/ocean/env_config.h index 0d83d9762..8aebe15ea 100644 --- a/pufferlib/ocean/env_config.h +++ b/pufferlib/ocean/env_config.h @@ -111,6 +111,8 @@ static int handler(void *config, const char *section, const char *name, const ch printf("Warning: Unknown dynamics_model value '%s', defaulting to JERK\n", value); env_config->dynamics_model = 1; // Default to JERK } + } else if (MATCH("env", "render_mode")) { + env_config->render_mode = atoi(value); } else if (MATCH("env", "goal_behavior")) { env_config->goal_behavior = atoi(value); } else if (MATCH("env", "reward_randomization")) { diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index d573ef37c..23a5025ac 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -514,12 +514,12 @@ def train(self): self.evaluator = Evaluator(self.full_args, self.logger) if human_replay_eval: - self.evaluator.hr_env = load_env("puffer_drive", self.evaluator.hr_eval_config) + self.evaluator.hr_env = load_env(self.config["env"], self.evaluator.hr_eval_config) self.evaluator.rollout(self.uncompiled_policy, mode="human_replay") self.evaluator.hr_env.close() self.evaluator.log_videos(eval_mode="human_replay", epoch=self.epoch) if self_play_eval: - self.evaluator.sp_env = load_env("puffer_drive", self.evaluator.sp_eval_config) + self.evaluator.sp_env = load_env(self.config["env"], self.evaluator.sp_eval_config) self.evaluator.rollout(self.uncompiled_policy, mode="self_play") self.evaluator.sp_env.close() self.evaluator.log_videos(eval_mode="self_play", epoch=self.epoch) @@ -528,7 +528,9 @@ def train(self): del self.evaluator - if self.config["eval"]["wosac_realism_eval"]: + if self.config["eval"]["wosac_realism_eval"] and ( + (self.epoch) % self.config["eval"]["eval_interval"] == 0 or done_training + ): pufferlib.utils.run_wosac_eval_in_subprocess(self.config, self.logger, self.global_step) safe_eval_config = self.config.get("safe_eval", {}) From 04f9304c7e30f31401f1025679061ad618e9f95f Mon Sep 17 00:00:00 2001 From: Aditya Gupta Date: Sun, 5 Apr 2026 10:26:57 -0400 Subject: [PATCH 15/23] Updating over latest 3.0 and some minor bug fixes --- pufferlib/ocean/drive/drive.h | 13 +++---------- pufferlib/ocean/env_binding.h | 21 +++++++++++++++++++-- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 79f06e940..4336b0161 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -128,8 +128,8 @@ #define MAX_CHECKED_LANES 32 // Ego features depend on dynamics model -#define EGO_FEATURES_CLASSIC 14 -#define EGO_FEATURES_JERK 17 +#define EGO_FEATURES_CLASSIC 13 +#define EGO_FEATURES_JERK 16 // Observation normalization constants #define MAX_SPEED 100.0f @@ -1505,11 +1505,6 @@ int collision_check(Drive *env, int agent_idx) { if (agent->sim_x == INVALID_POSITION) return -1; - // Skip collision checking for pedestrians because they are often too - // close to other entities at initialization. - if (agent->type == PEDESTRIAN) - return -1; - int car_collided_with_index = -1; if (agent->respawn_timestep != -1) @@ -1604,7 +1599,7 @@ static bool check_offroad(Drive *env, Agent *agent) { entity = &env->road_elements[entity_list[i].entity_idx]; // Check for offroad collision with road edges (only for vehicles and cyclists) - if (entity->type == ROAD_EDGE && agent->type != PEDESTRIAN) { + if (entity->type == ROAD_EDGE) { int geometry_idx = entity_list[i].geometry_idx; if (entity->z[geometry_idx] > agent->sim_z + agent->sim_height / 2.0f || entity->z[geometry_idx] < agent->sim_z - agent->sim_height / 2.0f) @@ -2805,7 +2800,6 @@ void compute_observations(Drive *env) { obs[13] = fminf(SPEED_LIMIT / MAX_SPEED, 1.0f); obs[14] = lane_center_dist; obs[15] = ego_entity->metrics_array[LANE_ANGLE_IDX]; - obs[16] = ego_entity->type / 3.0f; } else { obs[7] = (ego_entity->respawn_timestep != -1) ? 1 : 0; obs[8] = normalized_goal_speed_min; @@ -2813,7 +2807,6 @@ void compute_observations(Drive *env) { obs[10] = fminf(SPEED_LIMIT / MAX_SPEED, 1.0f); obs[11] = lane_center_dist; obs[12] = ego_entity->metrics_array[LANE_ANGLE_IDX]; - obs[13] = ego_entity->type / 3.0f; } int obs_idx = (env->reward_conditioning == 1) ? ego_dim - NUM_REWARD_COEFS : ego_dim; // Placeholder for reward conditioning encoder - diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h index 86be97971..d4090ea46 100644 --- a/pufferlib/ocean/env_binding.h +++ b/pufferlib/ocean/env_binding.h @@ -702,6 +702,9 @@ static PyObject *env_log(PyObject *self, PyObject *args) { return NULL; } + PyObject *num_agents_arg = PyTuple_GetItem(args, 1); + float num_agents = (float)PyLong_AsLong(num_agents_arg); + // Aggregate this env's per-agent logs (same as vec_log but for one env) // Note: breaks horribly if you don't use floats Log aggregate = {0}; @@ -711,18 +714,32 @@ static PyObject *env_log(PyObject *self, PyObject *args) { } PyObject *dict = PyDict_New(); - if (aggregate.n == 0.0f) { + + // Mirror vec_log: only report when enough data has accumulated + if (aggregate.n < num_agents) { return dict; } + // Reset log now that we've consumed it, mirroring vec_log + for (int j = 0; j < num_keys; j++) { + ((float *)&env->log)[j] = 0.0f; + } + // Average across agents in env float n = aggregate.n; for (int i = 0; i < num_keys; i++) { ((float *)&aggregate)[i] /= n; } - aggregate.n = (float)env->active_agent_count; + + // Compute completion_rate from raw counts (mirrors vec_log) + if (aggregate.goals_attempted_this_episode > 0.0f) { + aggregate.completion_rate = aggregate.goals_reached_this_episode / aggregate.goals_attempted_this_episode; + } else { + aggregate.completion_rate = 0.0f; + } my_log(dict, &aggregate); + assign_to_dict(dict, "n", n); return dict; } From 293f9d2b1ffcef7f29888da11e40ec3adc17c5b1 Mon Sep 17 00:00:00 2001 From: Aditya Gupta Date: Tue, 7 Apr 2026 11:20:40 -0400 Subject: [PATCH 16/23] Adding safe eval renders --- pufferlib/ocean/benchmark/evaluator.py | 105 ++++++++++++++++++++++++- pufferlib/pufferl.py | 9 ++- 2 files changed, 112 insertions(+), 2 deletions(-) diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py index 49ac86f97..8a416d2e3 100644 --- a/pufferlib/ocean/benchmark/evaluator.py +++ b/pufferlib/ocean/benchmark/evaluator.py @@ -1111,7 +1111,7 @@ class SafeEvaluator: args dict — only the env_name, safe_eval config, and device. """ - def __init__(self, env_name: str, safe_eval_config: Dict, device="cuda", logger=None): + def __init__(self, env_name: str, safe_eval_config: Dict, device="cuda", logger=None, full_config=None): self.env_name = env_name self.logger = logger self.safe_eval_config = safe_eval_config @@ -1122,6 +1122,30 @@ def __init__(self, env_name: str, safe_eval_config: Dict, device="cuda", logger= device = f"cuda:{device}" self.device = device self.stats = None + self.render_safe_eval = safe_eval_config.get("render_safe_eval", False) + # Resolve view modes from [eval].render_view_mode + from pufferlib.ocean.drive.drive import RenderView + + _VIEW_MODE_MAP = { + "sim_state": [RenderView.FULL_SIM_STATE], + "topdown": [RenderView.FULL_SIM_STATE], + "bev": [RenderView.BEV_AGENT_OBS], + "agent": [RenderView.BEV_AGENT_OBS], + "persp": [RenderView.AGENT_PERSP], + "both": [RenderView.FULL_SIM_STATE, RenderView.AGENT_PERSP], + "all": [RenderView.FULL_SIM_STATE, RenderView.AGENT_PERSP, RenderView.BEV_AGENT_OBS], + } + self._view_suffix = { + RenderView.FULL_SIM_STATE: "sim_state", + RenderView.AGENT_PERSP: "persp", + RenderView.BEV_AGENT_OBS: "bev", + } + view_mode_str = "sim_state" + if full_config is not None: + view_mode_str = ( + str(full_config.get("eval", {}).get("render_view_mode", "sim_state")).lower().strip('"').strip("'") + ) + self.render_view_modes = _VIEW_MODE_MAP.get(view_mode_str, [RenderView.FULL_SIM_STATE]) def _build_eval_env_config(self): """Build env config with safe reward conditioning values applied. @@ -1234,6 +1258,85 @@ def evaluate(self, vecenv, policy): self.stats = {k: float(np.mean(v)) for k, v in all_stats.items() if len(v) > 0} return self.stats + def render(self, eval_config, policy): + """Run a single rollout with rendering enabled, one env per view mode. + + Always renders env index 0 (first env). Uses [eval].render_view_mode. + Produces .mp4 files on disk (flushed when each render_env is closed). + """ + from pufferlib.ocean.drive.drive import RenderView + from pufferlib.pufferl import load_env + import copy + + multi_view = len(self.render_view_modes) > 1 + for view_mode in self.render_view_modes: + suffix = f"_{self._view_suffix[view_mode]}" if multi_view else "" + render_cfg = copy.deepcopy(eval_config) + render_cfg["env"]["render_mode"] = 1 + render_env = load_env(self.env_name, render_cfg) + driver = render_env.driver_env + if suffix: + driver.set_video_suffix(suffix, env_id=0) + try: + num_agents = render_env.observation_space.shape[0] + use_rnn = hasattr(policy, "hidden_size") + ob, _ = render_env.reset() + state = {} + if use_rnn: + state = dict( + lstm_h=torch.zeros(num_agents, policy.hidden_size, device=self.device), + lstm_c=torch.zeros(num_agents, policy.hidden_size, device=self.device), + ) + for _ in range(self.episode_length): + driver.render(view_mode=view_mode, env_id=0) + with torch.no_grad(): + ob_t = torch.as_tensor(ob).to(self.device) + logits, value = policy.forward_eval(ob_t, state) + action, _, _ = pufferlib.pytorch.sample_logits(logits) + action_np = action.cpu().numpy().reshape(render_env.action_space.shape) + if isinstance(logits, torch.distributions.Normal): + action_np = np.clip(action_np, render_env.action_space.low, render_env.action_space.high) + ob, _, _, truncs, _ = render_env.step(action_np) + if truncs.all(): + break + finally: + render_env.close() + + def log_videos(self, epoch): + """Glob .mp4 files produced by render() and log them to wandb under render/safe_eval.""" + import os + import glob + + if not (self.logger and hasattr(self.logger, "wandb") and self.logger.wandb): + for p in glob.glob("*.mp4"): + os.remove(p) + return + + import wandb + + video_files = glob.glob("*.mp4") + if not video_files: + print("Warning: safe_eval render produced no mp4 files") + return + + multi_view = len(self.render_view_modes) > 1 + _known_suffixes = {"_sim_state", "_persp", "_bev"} + for p in video_files: + stem = os.path.splitext(os.path.basename(p))[0] + view_tag = "" + if multi_view: + for s in _known_suffixes: + if stem.endswith(s): + view_tag = s[1:] + stem = stem[: -len(s)] + break + caption = f"scene_{stem}_epoch_{epoch}_safe_eval" + wandb_key = f"render/safe_eval/{view_tag}" if view_tag else "render/safe_eval" + self.logger.wandb.log({wandb_key: wandb.Video(p, format="mp4", caption=caption)}) + + for p in video_files: + os.remove(p) + def log_stats(self, global_step=None): """Log collected metrics to wandb.""" if self.stats is None: diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 23a5025ac..6aba814b7 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -557,7 +557,9 @@ def _run_safe_eval(self): self.msg = "Running safe eval..." env_name = self.config["env"] safe_eval_config = self.config.get("safe_eval", {}) - evaluator = SafeEvaluator(env_name, safe_eval_config, device=self.config["device"], logger=self.logger) + evaluator = SafeEvaluator( + env_name, safe_eval_config, device=self.config["device"], logger=self.logger, full_config=self.config + ) eval_config = evaluator._build_eval_env_config() vecenv = load_env(env_name, eval_config) @@ -572,6 +574,11 @@ def _run_safe_eval(self): metrics = evaluator.evaluate(vecenv, policy) evaluator.log_stats(global_step=self.global_step) + if evaluator.render_safe_eval: + self.msg = "Rendering safe eval..." + evaluator.render(eval_config, policy) + evaluator.log_videos(epoch=self.epoch) + self.msg = f"Safe eval: {len(metrics)} metrics logged" except Exception as e: self.msg = f"Safe eval failed: {e}" From 90361f54ada6cc17358667a4621c7962a663ec95 Mon Sep 17 00:00:00 2001 From: Aditya Gupta Date: Tue, 7 Apr 2026 12:07:34 -0400 Subject: [PATCH 17/23] Fixing some episoden length related bugs --- pufferlib/ocean/benchmark/evaluator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py index 8a416d2e3..c54ce2406 100644 --- a/pufferlib/ocean/benchmark/evaluator.py +++ b/pufferlib/ocean/benchmark/evaluator.py @@ -835,7 +835,6 @@ class Evaluator: def __init__(self, configs, logger=None): self.configs = configs self.logger = logger - self.sim_steps = 90 self.self_play_stats = None self.human_replay_stats = None self.sp_env = None @@ -867,8 +866,6 @@ def _unpack_eval_configs(self, configs): backend = eval_config["eval"].get("backend", "PufferEnv") eval_config["env"]["map_dir"] = eval_config["eval"]["map_dir"] eval_config["env"]["num_agents"] = eval_config["eval"]["num_eval_agents"] - if self.configs["eval"]["human_replay_eval"]: - eval_config["env"]["episode_length"] = 91 # WOMD scenario length eval_config["vec"] = dict(backend=backend, num_envs=1) self.render_sp_rollout = self.configs["eval"]["render_self_play_eval"] @@ -883,6 +880,8 @@ def _unpack_eval_configs(self, configs): self.hr_eval_config["env"]["render_mode"] = ( 0 # primary env: stats only; render envs created per-view in rollout() ) + if self.configs["eval"]["human_replay_eval"]: + self.hr_eval_config["env"]["episode_length"] = 91 # WOMD scenario length self.sp_eval_config = copy.deepcopy(eval_config) self.sp_eval_config["env"]["control_mode"] = "control_agents" @@ -1011,7 +1010,8 @@ def _run_rollout(self, policy, env, render_env_idx=None, per_env_logs=False, vie ) info_list = [] - for time_idx in range(self.sim_steps): + episode_length = env.driver_env.episode_length + for time_idx in range(episode_length): if render_env_idx is not None: driver.render(view_mode=view_mode, env_id=render_env_idx) From eb2c53038500e082bfcdc8bf8bb2a370bf993204 Mon Sep 17 00:00:00 2001 From: Aditya Gupta Date: Tue, 7 Apr 2026 12:44:58 -0400 Subject: [PATCH 18/23] Fixing bugs with human replay with variable agent mode --- pufferlib/config/ocean/drive.ini | 15 +++++++++------ pufferlib/ocean/benchmark/evaluator.py | 24 +++++++++++++++++++++--- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index 2a3eab604..5da2a7f15 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -180,14 +180,21 @@ checkpoint_interval = 1000 [eval] eval_interval = 1000 -; Path to dataset used for evaluation -map_dir = "resources/drive/binaries/training" +; Map directory for self-play evaluation (Carla maps) +sp_map_dir = "resources/drive/binaries/carla_2D" +; Map directory for human replay evaluation (WOMD training scenarios) +hr_map_dir = "resources/drive/binaries/training" +; Number of agents for self-play evaluation num_eval_agents = 64 +; Number of agents for human replay evaluation (one SDC per scenario) +human_replay_num_agents = 16 ; If True, enable self-play evaluation (pair policy-controlled agent with a copy of itself) self_play_eval = True ; If True, enable human replay evaluation (pair policy-controlled agent with human replays) human_replay_eval = False +; Control mode for human replay: "control_sdc_only" controls only the SDC; others replay logged trajectories +human_replay_control_mode = "control_sdc_only" ; Which env to render during eval. Options: "first" (by index), "worst_collision", "random" render_select_mode = "first" ; View mode(s) for eval rendering. Options: sim_state, bev, persp, both (sim_state+persp), all (sim_state+persp+bev) @@ -227,10 +234,6 @@ wosac_sanity_check = False wosac_aggregate_results = True ; Evaluation mode: "policy", "ground_truth" wosac_eval_mode = "policy" -; Control only the self-driving car -human_replay_control_mode = "control_sdc_only" -; Number of scenarios for human replay evaluation equals the number of agents -human_replay_num_agents = 16 [safe_eval] ; If True, periodically run policy with safe/law-abiding reward conditioning and log metrics diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py index c54ce2406..3ebe97c38 100644 --- a/pufferlib/ocean/benchmark/evaluator.py +++ b/pufferlib/ocean/benchmark/evaluator.py @@ -864,8 +864,6 @@ def _unpack_eval_configs(self, configs): # Create separate evaluation environments based on specified configs eval_config["env"]["termination_mode"] = 0 backend = eval_config["eval"].get("backend", "PufferEnv") - eval_config["env"]["map_dir"] = eval_config["eval"]["map_dir"] - eval_config["env"]["num_agents"] = eval_config["eval"]["num_eval_agents"] eval_config["vec"] = dict(backend=backend, num_envs=1) self.render_sp_rollout = self.configs["eval"]["render_self_play_eval"] @@ -875,15 +873,35 @@ def _unpack_eval_configs(self, configs): self.render_view_modes = _VIEW_MODE_MAP.get(view_mode_str, [RenderView.FULL_SIM_STATE]) self.render_view_suffix = _VIEW_SUFFIX + # --- Human replay config --- + hr_control_mode = ( + str(self.configs["eval"].get("human_replay_control_mode", "control_sdc_only")).strip('"').strip("'") + ) + hr_num_agents = int(self.configs["eval"].get("human_replay_num_agents", 16)) + hr_map_dir = ( + str(self.configs["eval"].get("hr_map_dir", "resources/drive/binaries/training")).strip('"').strip("'") + ) + self.hr_eval_config = copy.deepcopy(eval_config) - self.hr_eval_config["env"]["control_mode"] = "control_sdc_only" + self.hr_eval_config["env"]["map_dir"] = hr_map_dir + self.hr_eval_config["env"]["num_agents"] = hr_num_agents + self.hr_eval_config["env"]["control_mode"] = hr_control_mode + self.hr_eval_config["env"]["init_mode"] = "create_all_valid" self.hr_eval_config["env"]["render_mode"] = ( 0 # primary env: stats only; render envs created per-view in rollout() ) if self.configs["eval"]["human_replay_eval"]: self.hr_eval_config["env"]["episode_length"] = 91 # WOMD scenario length + # --- Self-play config --- + sp_map_dir = ( + str(self.configs["eval"].get("sp_map_dir", "resources/drive/binaries/carla_2D")).strip('"').strip("'") + ) + sp_num_agents = int(self.configs["eval"].get("num_eval_agents", 64)) + self.sp_eval_config = copy.deepcopy(eval_config) + self.sp_eval_config["env"]["map_dir"] = sp_map_dir + self.sp_eval_config["env"]["num_agents"] = sp_num_agents self.sp_eval_config["env"]["control_mode"] = "control_agents" self.sp_eval_config["env"]["render_mode"] = ( 0 # primary env: stats only; render envs created per-view in rollout() From be85a6f6a236c4e319ed0b283ef93135624d96eb Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 11 Apr 2026 10:26:57 -0400 Subject: [PATCH 19/23] Update drive.ini nit: spell check --- pufferlib/config/ocean/drive.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index 5da2a7f15..6453d0bb9 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -279,7 +279,7 @@ acc = 1.0 [render] ; Render a batch of maps offline using the in-process c_render (ffmpeg) pipeline. -; Path to the .bin map directoryz +; Path to the .bin map directory map_dir = "resources/drive/binaries/carla_2D" ; Directory to write output mp4 files output_dir = "resources/drive/render_videos" From 849be1f92290bb463f6993a0cf9a0ac2163999e5 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 11 Apr 2026 10:27:46 -0400 Subject: [PATCH 20/23] Update drive.ini turn on safe eval render --- pufferlib/config/ocean/drive.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index 6453d0bb9..9ff2fa63d 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -240,7 +240,7 @@ wosac_eval_mode = "policy" enabled = True ; How often to run safe eval (in training epochs). Defaults to render_interval. interval = 250 -render_safe_eval = False +render_safe_eval = True ; Number of agents to run in the eval environment num_agents = 50 ; Number of episodes to collect metrics over From f418fc10f4eee30e35777c4e482762571a94d23d Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 11 Apr 2026 10:29:25 -0400 Subject: [PATCH 21/23] Update drive.h remove stale comment --- pufferlib/ocean/drive/drive.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 4336b0161..741a9307e 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -2629,7 +2629,7 @@ void compute_agent_metrics(Drive *env, int agent_idx) { // FORMAT COMES IN agent->metrics_array[SPEED_LIMIT_IDX] = (speed_magnitude > SPEED_LIMIT + 2.0f) ? 1.0f : 0.0f; - // Check for vehicle collisions (skip for pedestrians) + // Check for vehicle collisions int car_collided_with_index = -1; car_collided_with_index = collision_check(env, agent_idx); if (car_collided_with_index != -1) { From face788be77a6f03631bae4acb220568589446b3 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 11 Apr 2026 10:31:28 -0400 Subject: [PATCH 22/23] Update drive.ini render should not override training settings --- pufferlib/config/ocean/drive.ini | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index 9ff2fa63d..e68a53185 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -291,12 +291,6 @@ num_maps = 3 view_mode = "persp" ; Whether to draw agent trajectory traces draw_traces = True -; Maximum number of frames to render per map -max_frames = 91 -; Override [env] init/control modes for rendering logged-trajectory (carla_2D) maps. -; carla_2D maps carry recorded vehicle trajectories — use create_all_valid + control_vehicles. -init_mode = "init_variable_agent_number" -control_mode = "control_vehicles" ; Allows more than cpu cores workers for rendering overwork = True ; Display human xy logs in the background From 02d8ecf89895597b70095da634876c923f7b79e0 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 11 Apr 2026 14:10:56 -0400 Subject: [PATCH 23/23] Unify rollout loop across Evaluator, render(), and SafeEvaluator (#395) Unify rollout loop across Evaluator, render(), and SafeEvaluator --- pufferlib/ocean/benchmark/evaluator.py | 119 ++++++++------------- pufferlib/ocean/drive/rollout.py | 142 +++++++++++++++++++++++++ pufferlib/pufferl.py | 64 ++++++----- 3 files changed, 217 insertions(+), 108 deletions(-) create mode 100644 pufferlib/ocean/drive/rollout.py diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py index 3ebe97c38..55b0f5a3e 100644 --- a/pufferlib/ocean/benchmark/evaluator.py +++ b/pufferlib/ocean/benchmark/evaluator.py @@ -1000,57 +1000,29 @@ def rollout(self, policy, mode="self_play"): def _run_rollout(self, policy, env, render_env_idx=None, per_env_logs=False, view_mode=None, view_suffix=""): """Run a single rollout. If render_env_idx is not None, render that env. - view_mode: RenderView enum to use when rendering (defaults to FULL_SIM_STATE). - view_suffix: appended to the mp4 filename when logging multiple views, e.g. "_bev". + Thin wrapper around :func:`pufferlib.ocean.drive.rollout.rollout_loop`. + The shared helper owns the actual forward-sample-step-break loop; this + method just builds the RenderContext when rendering is requested. """ from pufferlib.ocean.drive.drive import RenderView - - driver = env.driver_env - num_agents = env.observation_space.shape[0] - device = self.configs["train"]["device"] - if view_mode is None: - view_mode = RenderView.FULL_SIM_STATE - - # Set video filename suffix in C before the first render call of this rollout. - # This ensures each view produces a uniquely named mp4 (e.g. {scenario_id}_bev.mp4). - if render_env_idx is not None and view_suffix: - driver.set_video_suffix(view_suffix, env_id=render_env_idx) - - # Reset environment - obs, info = env.reset() - - # Initialize RNN state if needed - state = {} - if self.configs["train"]["use_rnn"]: - state = dict( - lstm_h=torch.zeros(num_agents, policy.hidden_size, device=device), - lstm_c=torch.zeros(num_agents, policy.hidden_size, device=device), + from pufferlib.ocean.drive.rollout import RenderContext, rollout_loop + + render_ctx = None + if render_env_idx is not None: + render_ctx = RenderContext( + view_mode=view_mode if view_mode is not None else RenderView.FULL_SIM_STATE, + env_id=render_env_idx, + video_suffix=view_suffix, ) - info_list = [] - episode_length = env.driver_env.episode_length - for time_idx in range(episode_length): - if render_env_idx is not None: - driver.render(view_mode=view_mode, env_id=render_env_idx) - - # Get action from policy - with torch.no_grad(): - ob_tensor = torch.as_tensor(obs).to(device) - logits, value = policy.forward_eval(ob_tensor, state) - action, logprob, _ = pufferlib.pytorch.sample_logits(logits) - action_np = action.cpu().numpy().reshape(env.action_space.shape) - - # Clip continuous actions to valid range - if isinstance(logits, torch.distributions.Normal): - action_np = np.clip(action_np, env.action_space.low, env.action_space.high) - - # Step environment - obs, rewards, dones, truncs, info_list = env.step(action_np, per_env_logs=per_env_logs) - - if truncs.all(): - break - - return info_list + return rollout_loop( + policy=policy, + env=env, + device=self.configs["train"]["device"], + use_rnn=self.configs["train"]["use_rnn"], + render_ctx=render_ctx, + per_env_logs=per_env_logs, + ) def log_videos(self, eval_mode, epoch): """Log all mp4s in local path to wandb after env close has flushed ffmpeg pipes.""" @@ -1165,6 +1137,14 @@ def __init__(self, env_name: str, safe_eval_config: Dict, device="cuda", logger= ) self.render_view_modes = _VIEW_MODE_MAP.get(view_mode_str, [RenderView.FULL_SIM_STATE]) + # Authoritative RNN flag comes from `full_config`, which PuffeRL passes + # in flattened form for this evaluator. In this code path the flag + # lives at `full_config["use_rnn"]` (no `train` nesting) — it is set + # during config loading from `rnn_name`. We previously used + # hasattr(policy, "hidden_size") as a proxy, which is fragile because + # non-RNN policies can also expose hidden_size. + self.use_rnn = bool(full_config.get("use_rnn", False)) if full_config is not None else False + def _build_eval_env_config(self): """Build env config with safe reward conditioning values applied. @@ -1227,7 +1207,7 @@ def evaluate(self, vecenv, policy): policy.eval() num_agents = vecenv.observation_space.shape[0] - use_rnn = hasattr(policy, "hidden_size") + use_rnn = self.use_rnn ob, _ = vecenv.reset() state = {} @@ -1281,42 +1261,33 @@ def render(self, eval_config, policy): Always renders env index 0 (first env). Uses [eval].render_view_mode. Produces .mp4 files on disk (flushed when each render_env is closed). + + Thin wrapper around :func:`pufferlib.ocean.drive.rollout.rollout_loop`. """ - from pufferlib.ocean.drive.drive import RenderView - from pufferlib.pufferl import load_env import copy + from pufferlib.pufferl import load_env + from pufferlib.ocean.drive.rollout import RenderContext, rollout_loop + multi_view = len(self.render_view_modes) > 1 for view_mode in self.render_view_modes: suffix = f"_{self._view_suffix[view_mode]}" if multi_view else "" render_cfg = copy.deepcopy(eval_config) render_cfg["env"]["render_mode"] = 1 render_env = load_env(self.env_name, render_cfg) - driver = render_env.driver_env - if suffix: - driver.set_video_suffix(suffix, env_id=0) try: - num_agents = render_env.observation_space.shape[0] - use_rnn = hasattr(policy, "hidden_size") - ob, _ = render_env.reset() - state = {} - if use_rnn: - state = dict( - lstm_h=torch.zeros(num_agents, policy.hidden_size, device=self.device), - lstm_c=torch.zeros(num_agents, policy.hidden_size, device=self.device), - ) - for _ in range(self.episode_length): - driver.render(view_mode=view_mode, env_id=0) - with torch.no_grad(): - ob_t = torch.as_tensor(ob).to(self.device) - logits, value = policy.forward_eval(ob_t, state) - action, _, _ = pufferlib.pytorch.sample_logits(logits) - action_np = action.cpu().numpy().reshape(render_env.action_space.shape) - if isinstance(logits, torch.distributions.Normal): - action_np = np.clip(action_np, render_env.action_space.low, render_env.action_space.high) - ob, _, _, truncs, _ = render_env.step(action_np) - if truncs.all(): - break + rollout_loop( + policy=policy, + env=render_env, + device=self.device, + use_rnn=self.use_rnn, + max_steps=self.episode_length, + render_ctx=RenderContext( + view_mode=view_mode, + env_id=0, + video_suffix=suffix, + ), + ) finally: render_env.close() diff --git a/pufferlib/ocean/drive/rollout.py b/pufferlib/ocean/drive/rollout.py new file mode 100644 index 000000000..5c5c1a29c --- /dev/null +++ b/pufferlib/ocean/drive/rollout.py @@ -0,0 +1,142 @@ +"""Shared rollout loop for Drive evaluation and rendering. + +Single source of truth for the forward-sample-step-break cycle. Used by: + - ``Evaluator._run_rollout`` — periodic training eval, optional env-selective render + - ``pufferl.render`` — offline batch rendering, one video per map + - ``SafeEvaluator.render`` — safe-eval-time rendering + +Extracting this eliminates three hand-maintained copies of the same loop that +had drifted in subtle ways: + * ``render_one_map`` was missing continuous-action clipping + * ``render_one_map`` used ``done.all() or truncated.all()`` while the others used ``truncs.all()`` + * ``render_one_map`` inferred video filenames via a ``glob`` diff instead of calling ``set_video_suffix`` + * ``SafeEvaluator`` was using ``hasattr(policy, "hidden_size")`` as a proxy + for "is an RNN policy", which is fragile (many non-RNN policies also + expose ``hidden_size``). Callers now pass the authoritative ``use_rnn`` + flag from the training config. + +Callers pass a ``RenderContext`` to turn on rendering; pass ``None`` for a pure +stats rollout. +""" + +from dataclasses import dataclass +from typing import Optional + +import numpy as np +import torch + +import pufferlib.pytorch + + +@dataclass +class RenderContext: + """Enables rendering inside :func:`rollout_loop`. + + Attributes: + view_mode: ``RenderView`` enum value passed to ``driver.render``. + env_id: which sub-env in the vecenv to record from (default 0). + draw_traces: whether the C renderer should overlay trajectory traces. + video_suffix: appended to the mp4 filename; applied once before the + first render via ``set_video_suffix`` so multi-view rollouts don't + collide on output paths. + """ + + view_mode: int + env_id: int = 0 + draw_traces: bool = True + video_suffix: str = "" + + +def rollout_loop( + policy, + env, + device, + use_rnn: bool, + max_steps: Optional[int] = None, + render_ctx: Optional[RenderContext] = None, + per_env_logs: bool = False, +): + """Run a single policy rollout in a Drive vecenv. + + Args: + policy: the policy to run. Caller is responsible for calling ``.eval()``. + env: a ``PufferEnv``-compatible vecenv wrapping one or more Drive sub-envs. + device: torch device for observation / state tensors. + use_rnn: whether to allocate and carry LSTM hidden state. + max_steps: loop iteration cap. Defaults to ``env.driver_env.episode_length``. + render_ctx: if set, render the specified env/view every step before + sampling actions. Filename suffix is applied via ``set_video_suffix``. + per_env_logs: passed through to ``env.step`` so callers can get + unaggregated per-env logs instead of the default ``vec_log`` aggregate. + + Returns: + The last ``info`` returned by ``env.step``. For stats rollouts this is + the aggregated ``vec_log`` dict or per-env logs list; for render + rollouts callers typically ignore it. + """ + driver = env.driver_env + num_agents = env.observation_space.shape[0] + + # Set (or clear) the video filename suffix before the first render call so + # the C binding names the mp4 correctly up front (e.g. {scenario_id}_bev.mp4). + # Without this, callers have to glob-diff cwd and rename post hoc. We call + # this unconditionally whenever render_ctx is provided — including the + # empty-string case — so reused envs can't leak a stale suffix from a + # prior rollout into the default-filename path. + if render_ctx is not None: + driver.set_video_suffix(render_ctx.video_suffix, env_id=render_ctx.env_id) + + obs, _ = env.reset() + + state = {} + if use_rnn: + state = dict( + lstm_h=torch.zeros(num_agents, policy.hidden_size, device=device), + lstm_c=torch.zeros(num_agents, policy.hidden_size, device=device), + ) + + if max_steps is None: + max_steps = driver.episode_length + + info = [] + for _ in range(max_steps): + # Render BEFORE the step so each frame shows the state the policy was + # conditioned on. + if render_ctx is not None: + driver.render( + view_mode=render_ctx.view_mode, + draw_traces=render_ctx.draw_traces, + env_id=render_ctx.env_id, + ) + + with torch.no_grad(): + ob_t = torch.as_tensor(obs).to(device) + logits, _ = policy.forward_eval(ob_t, state) + action, _, _ = pufferlib.pytorch.sample_logits(logits) + action_np = action.cpu().numpy().reshape(env.action_space.shape) + + # Clip continuous actions to the valid range. This was previously + # missing in render_one_map, so continuous policies could emit OOB + # actions during offline rendering. + if isinstance(logits, torch.distributions.Normal): + action_np = np.clip(action_np, env.action_space.low, env.action_space.high) + + # Only pass per_env_logs when the caller explicitly asked for it. + # pufferlib.vector.Serial.step == pufferlib.vector.step, whose + # signature is (vecenv, actions) with no kwargs — passing per_env_logs + # unconditionally would raise TypeError on render_one_map's Serial + # backend. The per_env_logs=True path is only exercised by the + # Evaluator with a native PufferEnv backend where Drive.step does + # accept the kwarg. + if per_env_logs: + obs, _, _, truncs, info = env.step(action_np, per_env_logs=True) + else: + obs, _, _, truncs, info = env.step(action_np) + + # truncs.all() is set in the single c_step where the env auto-resets + # (time limit or early termination). Breaking here exits the loop + # exactly when the current episode wraps. + if truncs.all(): + break + + return info diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 6aba814b7..e9d84b41f 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -1626,13 +1626,17 @@ def render(env_name, args=None): def render_one_map(map_path): """Render a single map file in one or more view modes, moving resulting mp4(s) to output_dir. - Each view mode runs a separate rollout so that each gets its own ffmpeg - pipe and output file. Output files are named {scenario_id}_{view}.mp4 - when multiple views are requested, or {scenario_id}.mp4 for a single view. + Each view mode runs a separate rollout via the shared + :func:`pufferlib.ocean.drive.rollout.rollout_loop` helper, so that each + gets its own ffmpeg pipe and output file. Output files are named + ``{scenario_id}_{view}.mp4`` for multi-view runs or ``{scenario_id}.mp4`` + for single-view runs — the C binding writes the correct name directly + via ``set_video_suffix``, no post-hoc renaming required. """ + from pufferlib.ocean.drive.rollout import RenderContext, rollout_loop + map_name = os.path.splitext(os.path.basename(map_path))[0] - # View-mode suffix labels used in output filenames _VIEW_SUFFIX = { RenderView.FULL_SIM_STATE: "sim_state", RenderView.AGENT_PERSP: "persp", @@ -1649,7 +1653,7 @@ def render_one_map(map_path): **args["env"], "num_maps": 1, "map_dir": tmp_map_dir, - "render_mode": 1, # headless ffmpeg → writes {scenario_id}.mp4 in cwd + "render_mode": 1, # headless ffmpeg → writes {scenario_id}[suffix].mp4 in cwd } if render_init_mode is not None: env_overrides["init_mode"] = render_init_mode @@ -1666,46 +1670,38 @@ def render_one_map(map_path): policy = load_policy(map_args, env, env_name) policy.eval() - ob, _ = env.reset() - driver = env.driver_env - - state = {} - if map_args["train"]["use_rnn"]: - n = env.agents_per_batch if hasattr(env, "agents_per_batch") else ob.shape[0] - state = dict( - lstm_h=torch.zeros(n, policy.hidden_size, device=device), - lstm_c=torch.zeros(n, policy.hidden_size, device=device), - ) - - # Remove any stale mp4 from a previous run - stale = os.path.join(os.getcwd(), f"{map_name}.mp4") + # Clean up any stale mp4 from a previous run and snapshot cwd so + # we can find the new file(s) created during this rollout. + suffix = f"_{_VIEW_SUFFIX[view_mode]}" if multi else "" + stale = os.path.join(os.getcwd(), f"{map_name}{suffix}.mp4") if os.path.exists(stale): os.remove(stale) - before = set(glob.glob(os.path.join(os.getcwd(), "*.mp4"))) - for _ in range(max_frames): - driver.render(view_mode=view_mode, draw_traces=draw_traces) - - with torch.no_grad(): - ob_t = torch.as_tensor(ob).to(device) - logits, _ = policy.forward_eval(ob_t, state) - action, _, _ = pufferlib.pytorch.sample_logits(logits) - action = action.cpu().numpy().reshape(env.action_space.shape) - - ob, _, done, truncated, _ = env.step(action) - if done.all() or truncated.all(): - break + rollout_loop( + policy=policy, + env=env, + device=device, + use_rnn=map_args["train"]["use_rnn"], + max_steps=max_frames, + render_ctx=RenderContext( + view_mode=view_mode, + env_id=0, + draw_traces=draw_traces, + video_suffix=suffix, + ), + ) env.close() + # Move the newly produced mp4(s) to output_dir. With + # set_video_suffix, filenames are correct by construction — no + # post-hoc renaming needed. after = set(glob.glob(os.path.join(os.getcwd(), "*.mp4"))) new_mp4s = after - before if new_mp4s: for src in sorted(new_mp4s): - base = os.path.splitext(os.path.basename(src))[0] - suffix = f"_{_VIEW_SUFFIX[view_mode]}" if multi else "" - dst = os.path.join(output_dir, f"{base}{suffix}.mp4") + dst = os.path.join(output_dir, os.path.basename(src)) shutil.move(src, dst) print(f" Saved {dst}") else: