From 8c154bb700319e185a472968265912bb236a9ed7 Mon Sep 17 00:00:00 2001
From: Aditya Gupta <adigupta2602@gmail.com>
Date: Thu, 19 Mar 2026 16:33:32 -0400
Subject: [PATCH 01/23] Making changes

---
 pufferlib/ocean/drive/binding.c |   1 +
 pufferlib/ocean/drive/drive.h   | 113 ++++++++++++++++++++++++++++----
 pufferlib/ocean/drive/drive.py  |  20 +++++-
 pufferlib/ocean/env_binding.h   |  62 +++++++++++++++---
 pufferlib/ocean/env_config.h    |   1 +
 5 files changed, 172 insertions(+), 25 deletions(-)

diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c
index 75189f8dd..8de367de4 100644
--- a/pufferlib/ocean/drive/binding.c
+++ b/pufferlib/ocean/drive/binding.c
@@ -369,6 +369,7 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) {
     env->min_goal_distance = (float)unpack(kwargs, "min_goal_distance");
     env->max_goal_distance = (float)unpack(kwargs, "max_goal_distance");
     env->goal_radius = (float)unpack(kwargs, "goal_radius");
+    env->render_mode = (int)unpack(kwargs, "render_mode");
     env->min_goal_speed = (float)unpack(kwargs, "min_goal_speed");
     env->max_goal_speed = (float)unpack(kwargs, "max_goal_speed");
     env->min_avg_speed_to_consider_goal_attempt = (float)unpack(kwargs, "min_avg_speed_to_consider_goal_attempt");
diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index 7953617eb..ccd78653d 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -1,3 +1,4 @@
+#include <sys/types.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdint.h>
@@ -13,6 +14,20 @@
 #include "error.h"
 #include "datatypes.h"
 
+#define RENDER_WINDOW 0
+#define RENDER_HEADLESS 1
+
+// View modes
+#define VIEW_MODE_SIM_STATE 0
+#define VIEW_MODE_BEV_AGENT_OBS 1
+#define VIEW_MODE_AGENT_PERSP 2
+
+// // Order of entities in rendering (lower is rendered first)
+// #define Z_ROAD_SURFACE 0.0f
+// #define Z_ROAD_MARKINGS 0.05f // Lane lines, road lines, traces
+// #define Z_AGENT_DETAILS 0.4f  // Arrow, goal markers, obs overlays
+// #define Z_AGENTS 0.6f         // Vehicles, cyclists, pedestrians
+
 // constants for strings, data etc.
 #define SCENARIO_ID_STR_LENGTH 16
 
@@ -118,8 +133,8 @@
 #define MAX_CHECKED_LANES 32
 
 // Ego features depend on dynamics model
-#define EGO_FEATURES_CLASSIC 13
-#define EGO_FEATURES_JERK 16
+#define EGO_FEATURES_CLASSIC 14
+#define EGO_FEATURES_JERK 17
 
 // Observation normalization constants
 #define MAX_SPEED 100.0f
@@ -201,6 +216,13 @@ const Color LIGHTGREEN = (Color){152, 255, 152, 255};
 const Color LIGHTYELLOW = (Color){255, 255, 152, 255};
 const Color SOFT_YELLOW = (Color){245, 245, 220, 255};
 
+const Color LIGHTBLUE = (Color){167, 204, 255, 255};
+const Color DEEPBLUE = (Color){45, 112, 226, 255};
+const Color EXPERT_REPLAY = (Color){162, 220, 183, 255};
+const Color EXPERT_REPLAY_SMALL = (Color){95, 112, 93, 255};
+const Color LIGHT_ORANGE = (Color){255, 160, 80, 255};
+const Color LIGHT_PURPLE = (Color){204, 204, 255, 255};
+
 struct timespec ts;
 
 typedef struct AgentSpawnSettings AgentSpawnSettings;
@@ -366,6 +388,7 @@ struct Drive {
     int reward_randomization;
     int reward_conditioning;
     int turn_off_normalization;
+    int render_mode;
     RewardBound reward_bounds[NUM_REWARD_COEFS];
     float min_avg_speed_to_consider_goal_attempt;
     float partner_obs_radius;
@@ -1468,6 +1491,11 @@ int collision_check(Drive *env, int agent_idx) {
     if (agent->sim_x == INVALID_POSITION)
         return -1;
 
+    // Skip collision checking for pedestrians because they are often too
+    // close to other entities at initialization.
+    if (agent->type == PEDESTRIAN)
+        return -1;
+
     int car_collided_with_index = -1;
 
     if (agent->respawn_timestep != -1)
@@ -1561,8 +1589,8 @@ static bool check_offroad(Drive *env, Agent *agent) {
         RoadMapElement *entity;
         entity = &env->road_elements[entity_list[i].entity_idx];
 
-        // Check for offroad collision with road edges
-        if (entity->type == ROAD_EDGE) {
+        // Check for offroad collision with road edges (only for vehicles and cyclists)
+        if (entity->type == ROAD_EDGE && agent->type != PEDESTRIAN) {
             int geometry_idx = entity_list[i].geometry_idx;
             if (entity->z[geometry_idx] > agent->sim_z + agent->sim_height / 2.0f ||
                 entity->z[geometry_idx] < agent->sim_z - agent->sim_height / 2.0f)
@@ -2261,7 +2289,13 @@ void init(Drive *env) {
     env->logs = (Log *)calloc(env->active_agent_count, sizeof(Log));
 }
 
+void close_client(Client *client);
+
 void c_close(Drive *env) {
+    if (env->client != NULL) {
+        close_client(env->client);
+        env->client = NULL;
+    }
     free_agents(env->agents, env->num_objects);
     for (int i = 0; i < env->num_roads; i++) {
         free_road_element(&env->road_elements[i]);
@@ -2586,11 +2620,13 @@ void compute_agent_metrics(Drive *env, int agent_idx) {
     // FORMAT COMES IN
     agent->metrics_array[SPEED_LIMIT_IDX] = (speed_magnitude > SPEED_LIMIT + 2.0f) ? 1.0f : 0.0f;
 
-    // Check for vehicle collisions
-    int car_collided_with_index = collision_check(env, agent_idx);
-    if (car_collided_with_index != -1)
-        collided = VEHICLE_COLLISION;
-
+    // Check for vehicle collisions (skip for pedestrians)
+    int car_collided_with_index = -1;
+    if (agent->type != PEDESTRIAN) {
+        car_collided_with_index = collision_check(env, agent_idx);
+        if (car_collided_with_index != -1)
+            collided = VEHICLE_COLLISION;
+    }
     agent->collision_state = collided;
 
     if (collided == VEHICLE_COLLISION) {
@@ -2756,6 +2792,7 @@ void compute_observations(Drive *env) {
             obs[13] = fminf(SPEED_LIMIT / MAX_SPEED, 1.0f);
             obs[14] = lane_center_dist;
             obs[15] = ego_entity->metrics_array[LANE_ANGLE_IDX];
+            obs[16] = ego_entity->type / 3.0f;
         } else {
             obs[7] = (ego_entity->respawn_timestep != -1) ? 1 : 0;
             obs[8] = normalized_goal_speed_min;
@@ -2763,6 +2800,7 @@ void compute_observations(Drive *env) {
             obs[10] = fminf(SPEED_LIMIT / MAX_SPEED, 1.0f);
             obs[11] = lane_center_dist;
             obs[12] = ego_entity->metrics_array[LANE_ANGLE_IDX];
+            obs[13] = ego_entity->type / 3.0f;
         }
         int obs_idx = (env->reward_conditioning == 1) ? ego_dim - NUM_REWARD_COEFS : ego_dim;
         // Placeholder for reward conditioning encoder -
@@ -3483,16 +3521,63 @@ struct Client {
     int car_assignments[MAX_AGENTS]; // To keep car model assignments consistent per vehicle
     Vector3 default_camera_position;
     Vector3 default_camera_target;
+    int recorder_pipefd[2];
+    pid_t recorder_pid;
 };
 
 Client *make_client(Drive *env) {
     Client *client = (Client *)calloc(1, sizeof(Client));
-    client->width = 1280;
-    client->height = 704;
-    SetConfigFlags(FLAG_MSAA_4X_HINT);
+
+    if (env->render_mode == RENDER_HEADLESS && getenv("DISPLAY") == NULL) {
+        // Start Xvfb and set DISPLAY
+        pid_t xvfb_pid = fork();
+        if (xvfb_pid == 0) {
+            execlp("Xvfb", "Xvfb", ":99", "-screen", "0", "1x1x24", NULL);
+            _exit(1);
+        }
+        setenv("DISPLAY", ":99", 1);
+        usleep(500000); // Give Xvfb 500ms to start
+    }
+
+    if (env->render_mode == RENDER_WINDOW) {
+        client->width = 1280;
+        client->height = 704;
+        SetConfigFlags(FLAG_MSAA_4X_HINT);
+        SetTargetFPS(30);
+
+        // Set up camera for interactive window
+        Vector3 target_pos = {0, 0, 1}; // Y is up, Z is depth
+
+        client->default_camera_position = (Vector3){
+            0,      // Same X as target
+            120.0f, // 20 units above target
+            175.0f  // 20 units behind target
+        };
+        client->default_camera_target = target_pos;
+        client->camera.position = client->default_camera_position;
+        client->camera.target = client->default_camera_target;
+        client->camera.up = (Vector3){0.0f, -1.0f, 0.0f}; // Y is up
+        client->camera.fovy = 45.0f;
+        client->camera.projection = CAMERA_PERSPECTIVE;
+
+    } else { // Headless rendering
+        SetConfigFlags(FLAG_WINDOW_HIDDEN);
+        SetTargetFPS(6000);
+
+        float map_width = env->grid_map->bottom_right_x - env->grid_map->top_left_x;
+        float map_height = env->grid_map->top_left_y - env->grid_map->bottom_right_y;
+        float scale = 6.0f; // Controls the resolution of the output video
+        int img_width = (int)roundf(map_width * scale / 2.0f) * 2;
+        int img_height = (int)roundf(map_height * scale / 2.0f) * 2;
+
+        client->width = img_width;
+        client->height = img_height;
+    }
+
+    SetTraceLogLevel(LOG_WARNING); // Only show warnings and errors
     InitWindow(client->width, client->height, "PufferDrive");
-    SetTargetFPS(30);
-    client->puffers = LoadTexture("resources/puffers_128.png");
+
+    // Load assets
     client->cars[0] = LoadModel("resources/drive/RedCar.glb");
     client->cars[1] = LoadModel("resources/drive/WhiteCar.glb");
     client->cars[2] = LoadModel("resources/drive/BlueCar.glb");
diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py
index 7e9324692..539db8b81 100644
--- a/pufferlib/ocean/drive/drive.py
+++ b/pufferlib/ocean/drive/drive.py
@@ -4,15 +4,22 @@
 import struct
 import os
 import pufferlib
+from enum import IntEnum
 from pufferlib.ocean.drive import binding
 from multiprocessing import Pool, cpu_count
 from tqdm import tqdm
 
 
+class RenderView(IntEnum):
+    FULL_SIM_STATE = 0  # Orthographic top-down, fully observable simulator state
+    BEV_AGENT_OBS = 1  # Orthographic top-down, only show what the selected agent can observe
+    AGENT_PERSP = 2  # Third-person perspective following selected agent
+
+
 class Drive(pufferlib.PufferEnv):
     def __init__(
         self,
-        render_mode=None,
+        render_mode=RenderView.FULL_SIM_STATE,
         report_interval=1,
         width=1280,
         height=1024,
@@ -430,6 +437,7 @@ def __init__(
                 spawn_length_min=self.spawn_length_min,
                 spawn_length_max=self.spawn_length_max,
                 spawn_height=self.spawn_height,
+                render_mode=render_mode,
             )
             env_ids.append(env_id)
 
@@ -593,6 +601,7 @@ def resample_maps(self):
                 spawn_length_min=self.spawn_length_min,
                 spawn_length_max=self.spawn_length_max,
                 spawn_height=self.spawn_height,
+                render_mode=self.render_mode,
             )
             env_ids.append(env_id)
         self.c_envs = binding.vectorize(*env_ids)
@@ -736,12 +745,17 @@ def get_road_edge_polylines(self):
 
         return polylines
 
-    def render(self):
-        binding.vec_render(self.c_envs, 0)
+    def render(self, view_mode: RenderView = RenderView.FULL_SIM_STATE, draw_traces: bool = True, env_id: int = 0):
+        binding.vec_render(self.c_envs, int(view_mode), draw_traces, env_id)
 
     def close(self):
         binding.vec_close(self.c_envs)
 
+    @property
+    def scenario_ids(self) -> list[str]:
+        """Return scenario ID string for each env, stripping null padding."""
+        return [s.rstrip("\x00") for s in binding.vec_get_scenario_ids(self.c_envs)]
+
 
 def calculate_area(p1, p2, p3):
     # 3D triangle area = 0.5 * ||(p2-p1) x (p3-p1)||
diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h
index 44eeacb40..0fdb48428 100644
--- a/pufferlib/ocean/env_binding.h
+++ b/pufferlib/ocean/env_binding.h
@@ -209,13 +209,34 @@ static PyObject *env_step(PyObject *self, PyObject *args) {
     Py_RETURN_NONE;
 }
 
-// Python function to step the environment
+// Python function to render the environment
 static PyObject *env_render(PyObject *self, PyObject *args) {
+    int num_args = PyTuple_Size(args);
+    if (num_args != 3) {
+        PyErr_SetString(PyExc_TypeError, "env_render requires 3 arguments (env_handle, view_mode, draw_traces)");
+        return NULL;
+    }
     Env *env = unpack_env(args);
     if (!env) {
         return NULL;
     }
-    c_render(env);
+
+    PyObject *view_mode_arg = PyTuple_GetItem(args, 1);
+    if (!PyObject_TypeCheck(view_mode_arg, &PyLong_Type)) {
+        PyErr_SetString(PyExc_TypeError, "view_mode must be an integer");
+        return NULL;
+    }
+    int view_mode = PyLong_AsLong(view_mode_arg);
+
+    PyObject *show_traces_arg = PyTuple_GetItem(args, 2);
+    if (!PyObject_TypeCheck(show_traces_arg, &PyBool_Type)) {
+        PyErr_SetString(PyExc_TypeError, "draw_traces must be a boolean");
+        return NULL;
+    }
+    bool draw_traces = PyObject_IsTrue(show_traces_arg);
+
+    c_render(env, view_mode, draw_traces);
+
     Py_RETURN_NONE;
 }
 
@@ -531,8 +552,8 @@ static PyObject *vec_step(PyObject *self, PyObject *arg) {
 
 static PyObject *vec_render(PyObject *self, PyObject *args) {
     int num_args = PyTuple_Size(args);
-    if (num_args != 2) {
-        PyErr_SetString(PyExc_TypeError, "vec_render requires 2 arguments");
+    if (num_args != 4) {
+        PyErr_SetString(PyExc_TypeError, "vec_render requires 4 arguments");
         return NULL;
     }
 
@@ -542,14 +563,24 @@ static PyObject *vec_render(PyObject *self, PyObject *args) {
         return NULL;
     }
 
-    PyObject *env_id_arg = PyTuple_GetItem(args, 1);
-    if (!PyObject_TypeCheck(env_id_arg, &PyLong_Type)) {
+    if (!PyObject_TypeCheck(PyTuple_GetItem(args, 1), &PyLong_Type)) {
+        PyErr_SetString(PyExc_TypeError, "view_mode must be an integer");
+        return NULL;
+    }
+    if (!PyObject_TypeCheck(PyTuple_GetItem(args, 2), &PyBool_Type)) {
+        PyErr_SetString(PyExc_TypeError, "draw_traces must be a boolean");
+        return NULL;
+    }
+    if (!PyObject_TypeCheck(PyTuple_GetItem(args, 3), &PyLong_Type)) {
         PyErr_SetString(PyExc_TypeError, "env_id must be an integer");
         return NULL;
     }
-    int env_id = PyLong_AsLong(env_id_arg);
 
-    c_render(vec->envs[env_id]);
+    int view_mode = PyLong_AsLong(PyTuple_GetItem(args, 1));
+    bool draw_traces = PyObject_IsTrue(PyTuple_GetItem(args, 2));
+    int env_id = PyLong_AsLong(PyTuple_GetItem(args, 3));
+
+    c_render(vec->envs[env_id], view_mode, draw_traces);
     Py_RETURN_NONE;
 }
 
@@ -684,6 +715,20 @@ static PyObject *get_global_agent_state(PyObject *self, PyObject *args) {
 
     Py_RETURN_NONE;
 }
+
+static PyObject *vec_get_scenario_ids(PyObject *self, PyObject *args) {
+    VecEnv *vec = unpack_vecenv(args);
+    if (!vec)
+        return NULL;
+
+    PyObject *list = PyList_New(vec->num_envs);
+    for (int i = 0; i < vec->num_envs; i++) {
+        // scenario_id is char[16], may not be null-terminated at byte 16
+        PyList_SET_ITEM(list, i, PyUnicode_FromStringAndSize(vec->envs[i]->scenario_id, 16));
+    }
+    return list;
+}
+
 static PyObject *vec_get_global_agent_state(PyObject *self, PyObject *args) {
     if (PyTuple_Size(args) != 8) {
         PyErr_SetString(PyExc_TypeError, "vec_get_global_agent_state requires 8 arguments");
@@ -990,6 +1035,7 @@ static PyMethodDef methods[] = {
     {"vec_log", vec_log, METH_VARARGS, "Log the vector of environments"},
     {"vec_render", vec_render, METH_VARARGS, "Render the vector of environments"},
     {"vec_close", vec_close, METH_VARARGS, "Close the vector of environments"},
+    {"vec_get_scenario_ids", vec_get_scenario_ids, METH_VARARGS, "Get scenario IDs for all envs"},
     {"shared", (PyCFunction)my_shared, METH_VARARGS | METH_KEYWORDS, "Shared state"},
     {"get_global_agent_state", get_global_agent_state, METH_VARARGS, "Get global agent state"},
     {"vec_get_global_agent_state", vec_get_global_agent_state, METH_VARARGS, "Get agent state from vectorized env"},
diff --git a/pufferlib/ocean/env_config.h b/pufferlib/ocean/env_config.h
index 8105a40e9..0d83d9762 100644
--- a/pufferlib/ocean/env_config.h
+++ b/pufferlib/ocean/env_config.h
@@ -8,6 +8,7 @@
 
 // Config struct for parsing INI files - contains all environment configuration
 typedef struct {
+    int render_mode;
     int action_type;
     int dynamics_model;
     float reward_vehicle_collision;

From bf8e27e7a0c0e339db2160d489c21e1e1251795b Mon Sep 17 00:00:00 2001
From: Aditya Gupta <adigupta2602@gmail.com>
Date: Thu, 19 Mar 2026 23:41:53 -0400
Subject: [PATCH 02/23] Updating drive.h

---
 pufferlib/ocean/drive/drive.h | 433 +++++++++++++++++++++++++---------
 1 file changed, 321 insertions(+), 112 deletions(-)

diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index ccd78653d..61722f483 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -215,7 +215,7 @@ const Color PUFF_BACKGROUND2 = (Color){18, 72, 72, 255};
 const Color LIGHTGREEN = (Color){152, 255, 152, 255};
 const Color LIGHTYELLOW = (Color){255, 255, 152, 255};
 const Color SOFT_YELLOW = (Color){245, 245, 220, 255};
-
+const Color ROAD_COLOR = (Color){35, 35, 37, 255};
 const Color LIGHTBLUE = (Color){167, 204, 255, 255};
 const Color DEEPBLUE = (Color){45, 112, 226, 255};
 const Color EXPERT_REPLAY = (Color){162, 220, 183, 255};
@@ -3591,26 +3591,43 @@ Client *make_client(Drive *env) {
     for (int i = 0; i < MAX_AGENTS; i++) {
         client->car_assignments[i] = (rand() % 4) + 1;
     }
-    // Get initial target position from first active agent
-    Vector3 target_pos = {
-        0,
-        0, // Y is up
-        1  // Z is depth
-    };
 
-    // Set up camera to look at target from above and behind
-    client->default_camera_position = (Vector3){
-        0,      // Same X as target
-        120.0f, // 20 units above target
-        40.0f   // 20 units behind target
-    };
-    client->default_camera_target = target_pos;
-    client->camera.position = client->default_camera_position;
-    client->camera.target = client->default_camera_target;
-    client->camera.up = (Vector3){0.0f, -1.0f, 0.0f}; // Y is up
-    client->camera.fovy = 45.0f;
-    client->camera.projection = CAMERA_PERSPECTIVE;
-    client->camera_zoom = 1.0f;
+    // Set up ffmpeg process for recording
+    if (env->render_mode == RENDER_HEADLESS) {
+        if (pipe(client->recorder_pipefd) == -1) {
+            fprintf(stderr, "Failed to create pipe\n");
+            free(client);
+            return NULL;
+        }
+
+        char size_str[64];
+        snprintf(size_str, sizeof(size_str), "%dx%d", (int)client->width, (int)client->height);
+
+        char filename[256];
+        snprintf(filename, sizeof(filename), "%s.mp4", env->scenario_id);
+
+        client->recorder_pid = fork();
+        if (client->recorder_pid == -1) {
+            fprintf(stderr, "Failed to fork\n");
+            free(client);
+            return NULL;
+        }
+
+        if (client->recorder_pid == 0) { // Child process
+            close(client->recorder_pipefd[1]);
+            dup2(client->recorder_pipefd[0], STDIN_FILENO);
+            close(client->recorder_pipefd[0]);
+            for (int fd = 3; fd < 256; fd++)
+                close(fd);
+            execlp("ffmpeg", "ffmpeg", "-y", "-f", "rawvideo", "-pix_fmt", "rgba", "-s", size_str, "-r", "30", "-i",
+                   "-", "-c:v", "libx264", "-pix_fmt", "yuv420p", "-preset", "ultrafast", "-crf", "23", "-loglevel",
+                   "error", filename, NULL);
+            fprintf(stderr, "execlp ffmpeg failed\n");
+            _exit(1);
+        }
+        close(client->recorder_pipefd[0]);
+    }
+
     return client;
 }
 
@@ -3717,20 +3734,27 @@ void draw_agent_obs(Drive *env, int agent_index, int mode, int obs_only, int las
     float goal_y = agent_obs[1] * 200;
     float goal_z = agent_obs[2] * 200;
 
+    int agent_type = env->agents[active_idx].type;
+    Color goal_color = LIGHTBLUE;
+    if (agent_type == PEDESTRIAN)
+        goal_color = LIGHT_ORANGE;
+    else if (agent_type == CYCLIST)
+        goal_color = LIGHT_PURPLE;
+
     if (mode == 0) {
-        DrawSphere((Vector3){goal_x, goal_y, goal_z}, 0.5f, LIGHTGREEN);
+        DrawSphere((Vector3){goal_x, goal_y, goal_z}, 0.5f, goal_color);
         DrawCircle3D((Vector3){goal_x, goal_y, goal_z}, env->agents[active_idx].reward_coefs[REWARD_COEF_GOAL_RADIUS],
-                     (Vector3){0, 0, 1}, 90.0f, Fade(LIGHTGREEN, 0.3f));
+                     (Vector3){0, 0, 1}, 90.0f, Fade(goal_color, 0.3f));
     }
 
     if (mode == 1) {
         float goal_x_world = px + (goal_x * heading_self_x - goal_y * heading_self_y);
         float goal_y_world = py + (goal_x * heading_self_y + goal_y * heading_self_x);
         float goal_z_world = pz + goal_z;
-        DrawSphere((Vector3){goal_x_world, goal_y_world, goal_z_world}, 0.5f, LIGHTGREEN);
+        DrawSphere((Vector3){goal_x_world, goal_y_world, goal_z_world}, 0.5f, goal_color);
         DrawCircle3D((Vector3){goal_x_world, goal_y_world, goal_z_world},
                      env->agents[active_idx].reward_coefs[REWARD_COEF_GOAL_RADIUS], (Vector3){0, 0, 1}, 90.0f,
-                     Fade(LIGHTGREEN, 0.3f));
+                     Fade(goal_color, 0.3f));
     }
     // First draw other agent observations
     int obs_idx = ego_dim; // Start after ego obs
@@ -4040,27 +4064,39 @@ void draw_scene(Drive *env, Client *client, int mode, int obs_only, int lasers,
                     continue;
                 }
 
-                // --- Draw the car  ---
-                Color car_color = GRAY; // default for static
-                if (is_expert)
-                    car_color = GOLD; // expert replay
-                if (is_active_agent)
-                    car_color = BLUE; // policy-controlled
-                if (is_active_agent && agent->aabb_collision_state > 0)
-                    car_color = LIGHTGREEN;
-                if (is_active_agent && agent->collision_state > 0)
-                    car_color = RED;
-                rlSetLineWidth(3.0f);
-                for (int j = 0; j < 4; j++) {
-                    DrawLine3D(corners[j], corners[(j + 1) % 4], car_color);
+                // Draw the agent bounding boxes
+                Color agent_color = GRAY;
+                if (is_expert) {
+                    if (agent[i].type == PEDESTRIAN || agent[i].type == CYCLIST)
+                        agent_color = EXPERT_REPLAY_SMALL;
+                    else
+                        agent_color = EXPERT_REPLAY;
+                }
+                if (is_active_agent) {
+                    if (agent[i].type == PEDESTRIAN)
+                        agent_color = LIGHT_ORANGE;
+                    else if (agent[i].type == CYCLIST)
+                        agent_color = LIGHT_PURPLE;
+                    else
+                        agent_color = BLUE;
                 }
+                if (is_active_agent && agent[i].collision_state > 0)
+                    agent_color = RED;
+
+                rlPushMatrix();
+                rlTranslatef(position.x, position.y, position.z);
+                rlRotatef(heading * RAD2DEG, 0.0f, 0.0f, 1.0f);
+                DrawCube((Vector3){0.0f, 0.0f, 0.0f}, size.x, size.y, 1.0f, Fade(agent_color, 0.5f));
+                DrawCubeWires((Vector3){0.0f, 0.0f, 0.0f}, size.x, size.y, 1.0f, agent_color);
+                rlPopMatrix();
+
                 // --- Draw a heading arrow pointing forward ---
                 Vector3 arrowStart = position;
                 Vector3 arrowEnd = {position.x + cos_heading * half_len * 1.5f, // extend arrow beyond car
                                     position.y + sin_heading * half_len * 1.5f, position.z};
 
-                DrawLine3D(arrowStart, arrowEnd, car_color);
-                DrawSphere(arrowEnd, 0.2f, car_color); // arrow tip
+                DrawLine3D(arrowStart, arrowEnd, agent_color);
+                DrawSphere(arrowEnd, 0.2f, agent_color); // arrow tip
 
             } else { // Agent view
                 rlPushMatrix();
@@ -4126,7 +4162,7 @@ void draw_scene(Drive *env, Client *client, int mode, int obs_only, int lasers,
                     };
                     Color wire_color = GRAY; // static
                     if (!is_active_agent && agent->mark_as_expert == 1)
-                        wire_color = GOLD; // expert replay
+                        wire_color = EXPERT_REPLAY; // expert replay
                     if (is_active_agent)
                         wire_color = BLUE; // policy
                     if (is_active_agent && agent->aabb_collision_state > 0)
@@ -4162,13 +4198,20 @@ void draw_scene(Drive *env, Client *client, int mode, int obs_only, int lasers,
                 continue;
             }
             if (!IsKeyDown(KEY_LEFT_CONTROL) && obs_only == 0) {
+
+                Color goal_color = DEEPBLUE;
+                if (agent->type == PEDESTRIAN)
+                    goal_color = LIGHT_ORANGE;
+                else if (agent->type == CYCLIST)
+                    goal_color = LIGHT_PURPLE;
+
                 DrawSphere(
                     (Vector3){
                         agent->goal_position_x,
                         agent->goal_position_y,
                         agent->goal_position_z,
                     },
-                    0.5f, DARKGREEN);
+                    0.5f, goal_color);
 
                 DrawCircle3D(
                     (Vector3){
@@ -4176,7 +4219,7 @@ void draw_scene(Drive *env, Client *client, int mode, int obs_only, int lasers,
                         agent->goal_position_y,
                         agent->goal_position_z,
                     },
-                    agent->reward_coefs[REWARD_COEF_GOAL_RADIUS], (Vector3){0, 0, 1}, 90.0f, Fade(LIGHTGREEN, 0.3f));
+                    agent->reward_coefs[REWARD_COEF_GOAL_RADIUS], (Vector3){0, 0, 1}, 90.0f, Fade(goal_color, 0.3f));
             }
         }
     }
@@ -4193,7 +4236,7 @@ void draw_scene(Drive *env, Client *client, int mode, int obs_only, int lasers,
             else if (road->type == ROAD_LINE)
                 lineColor = WHITE;
             else if (road->type == ROAD_EDGE)
-                lineColor = WHITE;
+                lineColor = Fade(WHITE, 0.7f);
             else if (road->type == DRIVEWAY)
                 lineColor = RED;
 
@@ -4240,96 +4283,262 @@ void draw_scene(Drive *env, Client *client, int mode, int obs_only, int lasers,
     }
 }
 
-void c_render(Drive *env) {
+void c_render(Drive *env, int view_mode, int draw_traces) {
+
+    // Create client on first render call
     if (env->client == NULL) {
         env->client = make_client(env);
     }
     Client *client = env->client;
-    BeginDrawing();
-    Color road = (Color){35, 35, 37, 255};
-    ClearBackground(road);
-    BeginMode3D(client->camera);
-    handle_camera_controls(env->client);
-    draw_scene(env, client, 0, 0, 0, 0);
 
-    if (IsKeyPressed(KEY_TAB) && env->active_agent_count > 0) {
-        env->human_agent_idx = (env->human_agent_idx + 1) % env->active_agent_count;
-    }
+    if (env->render_mode == RENDER_HEADLESS) { // Headless rendering via ffmpeg
+        float map_width = env->grid_map->bottom_right_x - env->grid_map->top_left_x;
+        float map_height = env->grid_map->top_left_y - env->grid_map->bottom_right_y;
+
+        Camera3D camera = {0};
+
+        if (view_mode == VIEW_MODE_SIM_STATE) {
+            // Orthographic bird's-eye view over the entire map (fully observable)
+            camera.position = (Vector3){0.0, 0.0, 400.0f}; // Above the scene
+            camera.target = (Vector3){0.0, 0.0, 0.0};      // Look at origin
+            camera.up = (Vector3){0.0f, -1.0f, 0.0f};
+            camera.projection = CAMERA_ORTHOGRAPHIC;
+            camera.fovy = map_height;
+
+            BeginDrawing();
+            ClearBackground(ROAD_COLOR);
+            BeginMode3D(camera);
+
+            if (draw_traces) { // Show logged trajectories of active agents and expert static agents
+                for (int i = 0; i < env->active_agent_count; i++) {
+                    int idx = env->active_agent_indices[i];
+                    for (int t = env->init_steps; t < env->episode_length; t++) {
+                        Color agent_color = LIGHTBLUE;
+                        if (env->agents[idx].type == PEDESTRIAN) {
+                            agent_color = LIGHT_ORANGE;
+                        } else if (env->agents[idx].type == CYCLIST) {
+                            agent_color = LIGHT_PURPLE;
+                        }
+                        DrawSphere((Vector3){env->agents[idx].log_trajectory_x[t], env->agents[idx].log_trajectory_y[t],
+                                             env->agents[idx].log_trajectory_z[t]},
+                                   0.15f, agent_color);
+                    }
+                }
+
+                for (int i = 0; i < env->expert_static_agent_count; i++) {
+                    int idx = env->expert_static_agent_indices[i];
+                    for (int t = env->init_steps; t < env->episode_length; t++) {
+                        DrawSphere((Vector3){env->agents[idx].log_trajectory_x[t], env->agents[idx].log_trajectory_y[t],
+                                             env->agents[idx].log_trajectory_z[t]},
+                                   0.15f, EXPERT_REPLAY);
+                    }
+                }
+            }
 
-    // Draw debug info
-    DrawText(TextFormat("Camera Position: (%.2f, %.2f, %.2f)", client->camera.position.x, client->camera.position.y,
-                        client->camera.position.z),
-             10, 10, 20, PUFF_WHITE);
-    DrawText(TextFormat("Camera Target: (%.2f, %.2f, %.2f)", client->camera.target.x, client->camera.target.y,
-                        client->camera.target.z),
-             10, 30, 20, PUFF_WHITE);
-    DrawText(TextFormat("Timestep: %d", env->timestep), 10, 50, 20, PUFF_WHITE);
+            draw_scene(env, client, 1, 0, 0, 0);
 
-    int human_idx = env->active_agent_indices[env->human_agent_idx];
-    DrawText(TextFormat("Controlling Agent: %d", env->human_agent_idx), 10, 70, 20, PUFF_WHITE);
-    DrawText(TextFormat("Agent Index: %d", human_idx), 10, 90, 20, PUFF_WHITE);
+        } else if (view_mode == VIEW_MODE_BEV_AGENT_OBS) {
+            // Orthographic bird's-eye view centered on the selected agent,
+            // showing only that agent's observations
+            int agent_idx = env->active_agent_indices[env->human_agent_idx];
+            Agent *agent = &env->agents[agent_idx];
 
-    // Display current action values - yellow when controlling, white otherwise
-    Color action_color = IsKeyDown(KEY_LEFT_SHIFT) ? YELLOW : PUFF_WHITE;
+            Camera3D camera = {0};
+            camera.position = (Vector3){agent->sim_x, agent->sim_y, 400.0f};
+            camera.target = (Vector3){agent->sim_x, agent->sim_y, 0.0f};
+            camera.up = (Vector3){0.0f, -1.0f, 0.0f};
+            camera.projection = CAMERA_ORTHOGRAPHIC;
+            camera.fovy = env->grid_map->vision_range * GRID_CELL_SIZE * 2.0f;
 
-    if (env->action_type == 0) { // discrete
-        int *action_array = (int *)env->actions;
-        int action_val = action_array[env->human_agent_idx];
+            BeginDrawing();
+            ClearBackground(ROAD_COLOR);
+            BeginMode3D(camera);
+            draw_scene(env, client, 1, 1, 0, 0);
 
-        if (env->dynamics_model == CLASSIC) {
-            int num_steer = 13;
-            int accel_idx = action_val / num_steer;
-            int steer_idx = action_val % num_steer;
-            float accel_value = ACCELERATION_VALUES[accel_idx];
-            float steer_value = STEERING_VALUES[steer_idx];
-
-            DrawText(TextFormat("Acceleration: %.2f m/s^2", accel_value), 10, 110, 20, action_color);
-            DrawText(TextFormat("Steering: %.3f", steer_value), 10, 130, 20, action_color);
-        } else if (env->dynamics_model == JERK) {
-            int num_lat = 3;
-            int jerk_long_idx = action_val / num_lat;
-            int jerk_lat_idx = action_val % num_lat;
-            float jerk_long_value = JERK_LONG[jerk_long_idx];
-            float jerk_lat_value = JERK_LAT[jerk_lat_idx];
-
-            DrawText(TextFormat("Longitudinal Jerk: %.2f m/s^3", jerk_long_value), 10, 110, 20, action_color);
-            DrawText(TextFormat("Lateral Jerk: %.2f m/s^3", jerk_lat_value), 10, 130, 20, action_color);
+        } else { // First-person perspective from a selected agent
+            int agent_idx = env->active_agent_indices[env->human_agent_idx];
+            Agent *agent = &env->agents[agent_idx];
+
+            Camera3D camera = {0};
+            // Position camera behind and above the agent
+            camera.position = (Vector3){agent->sim_x - (25.0f * cosf(agent->sim_heading)),
+                                        agent->sim_y - (25.0f * sinf(agent->sim_heading)), 15.0f};
+            camera.target = (Vector3){agent->sim_x + 40.0f * cosf(agent->sim_heading),
+                                      agent->sim_y + 40.0f * sinf(agent->sim_heading), 1.0f};
+            camera.up = (Vector3){0.0f, 0.0f, 1.0f};
+            camera.fovy = 60.0f;
+            camera.projection = CAMERA_PERSPECTIVE;
+
+            BeginDrawing();
+            ClearBackground(ROAD_COLOR);
+            BeginMode3D(camera);
+            draw_scene(env, client, 0, 0, 0, 1);
         }
-    } else { // continuous
-        float (*action_array_f)[2] = (float (*)[2])env->actions;
-        DrawText(TextFormat("Acceleration: %.2f", action_array_f[env->human_agent_idx][0]), 10, 110, 20, action_color);
-        DrawText(TextFormat("Steering: %.2f", action_array_f[env->human_agent_idx][1]), 10, 130, 20, action_color);
-    }
 
-    // Show key press status
-    int status_y = 150;
-    if (IsKeyDown(KEY_LEFT_SHIFT)) {
-        DrawText("[shift pressed]", 10, status_y, 20, YELLOW);
-        status_y += 20;
-    }
-    if (IsKeyDown(KEY_SPACE)) {
-        DrawText("[space pressed]", 10, status_y, 20, YELLOW);
-        status_y += 20;
-    }
-    if (IsKeyDown(KEY_LEFT_CONTROL)) {
-        DrawText("[ctrl pressed]", 10, status_y, 20, YELLOW);
-        status_y += 20;
+        EndDrawing();
+
+        unsigned char *screen_data = rlReadScreenPixels((int)client->width, (int)client->height);
+        if (screen_data) {
+            write(client->recorder_pipefd[1], screen_data, (int)client->width * (int)client->height * 4);
+            RL_FREE(screen_data);
+        }
+    } else { // Pop-up window
+        BeginDrawing();
+        ClearBackground(ROAD_COLOR);
+        BeginMode3D(client->camera);
+        handle_camera_controls(env->client);
+        draw_scene(env, client, 0, 0, 0, 0);
+
+        if (IsKeyPressed(KEY_TAB) && env->active_agent_count > 0) {
+            env->human_agent_idx = (env->human_agent_idx + 1) % env->active_agent_count;
+        }
+
+        DrawText(TextFormat("Timestep: %d", env->timestep), 10, 50, 20, PUFF_WHITE);
+        DrawText(TextFormat("Controlling agent: %d", env->human_agent_idx), 10, 70, 20, PUFF_WHITE);
+        int human_idx = env->active_agent_indices[env->human_agent_idx];
+
+        Color action_color = IsKeyDown(KEY_LEFT_SHIFT) ? YELLOW : PUFF_WHITE;
+
+        if (env->action_type == 0) { // discrete
+            int *action_array = (int *)env->actions;
+            int action_val = action_array[env->human_agent_idx];
+
+            if (env->dynamics_model == CLASSIC) {
+                int num_steer = 13;
+                int accel_idx = action_val / num_steer;
+                int steer_idx = action_val % num_steer;
+                float accel_value = ACCELERATION_VALUES[accel_idx];
+                float steer_value = STEERING_VALUES[steer_idx];
+
+                DrawText(TextFormat("Acceleration: %.2f m/s^2", accel_value), 10, 110, 20, action_color);
+                DrawText(TextFormat("Steering: %.3f", steer_value), 10, 130, 20, action_color);
+            } else if (env->dynamics_model == JERK) {
+                int num_lat = 3;
+                int jerk_long_idx = action_val / num_lat;
+                int jerk_lat_idx = action_val % num_lat;
+                float jerk_long_value = JERK_LONG[jerk_long_idx];
+                float jerk_lat_value = JERK_LAT[jerk_lat_idx];
+
+                DrawText(TextFormat("Longitudinal Jerk: %.2f m/s^3", jerk_long_value), 10, 110, 20, action_color);
+                DrawText(TextFormat("Lateral Jerk: %.2f m/s^3", jerk_lat_value), 10, 130, 20, action_color);
+            }
+        } else { // continuous
+            float (*action_array_f)[2] = (float (*)[2])env->actions;
+            DrawText(TextFormat("Acceleration: %.2f", action_array_f[env->human_agent_idx][0]), 10, 110, 20,
+                     action_color);
+            DrawText(TextFormat("Steering: %.2f", action_array_f[env->human_agent_idx][1]), 10, 130, 20, action_color);
+        }
+
+        int status_y = 150;
+        if (IsKeyDown(KEY_LEFT_SHIFT)) {
+            DrawText("[shift pressed]", 10, status_y, 20, YELLOW);
+            status_y += 20;
+        }
+        if (IsKeyDown(KEY_SPACE)) {
+            DrawText("[space pressed]", 10, status_y, 20, YELLOW);
+            status_y += 20;
+        }
+        if (IsKeyDown(KEY_LEFT_CONTROL)) {
+            DrawText("[ctrl pressed]", 10, status_y, 20, YELLOW);
+            status_y += 20;
+        }
+
+        DrawText("Controls: SHIFT + W/S - Accelerate/Brake, SHIFT + A/D - Steer, TAB - Switch Agent", 10,
+                 client->height - 30, 20, PUFF_WHITE);
+        DrawText(TextFormat("Grid Rows: %d", env->grid_map->grid_rows), 10, status_y, 20, PUFF_WHITE);
+        DrawText(TextFormat("Grid Cols: %d", env->grid_map->grid_cols), 10, status_y + 20, 20, PUFF_WHITE);
+        EndDrawing();
     }
 
-    // Controls help
-    DrawText("Controls: SHIFT + W/S - Accelerate/Brake, SHIFT + A/D - Steer, TAB - Switch Agent", 10,
-             client->height - 30, 20, PUFF_WHITE);
+    // BeginDrawing();
+    // Color road = (Color){35, 35, 37, 255};
+    // ClearBackground(road);
+    // BeginMode3D(client->camera);
+    // handle_camera_controls(env->client);
+    // draw_scene(env, client, 0, 0, 0, 0);
+
+    // if (IsKeyPressed(KEY_TAB) && env->active_agent_count > 0) {
+    //     env->human_agent_idx = (env->human_agent_idx + 1) % env->active_agent_count;
+    // }
+
+    // // Draw debug info
+    // DrawText(TextFormat("Camera Position: (%.2f, %.2f, %.2f)", client->camera.position.x, client->camera.position.y,
+    //                     client->camera.position.z),
+    //          10, 10, 20, PUFF_WHITE);
+    // DrawText(TextFormat("Camera Target: (%.2f, %.2f, %.2f)", client->camera.target.x, client->camera.target.y,
+    //                     client->camera.target.z),
+    //          10, 30, 20, PUFF_WHITE);
+    // DrawText(TextFormat("Timestep: %d", env->timestep), 10, 50, 20, PUFF_WHITE);
+
+    // int human_idx = env->active_agent_indices[env->human_agent_idx];
+    // DrawText(TextFormat("Controlling Agent: %d", env->human_agent_idx), 10, 70, 20, PUFF_WHITE);
+    // DrawText(TextFormat("Agent Index: %d", human_idx), 10, 90, 20, PUFF_WHITE);
+
+    // // Display current action values - yellow when controlling, white otherwise
+    // Color action_color = IsKeyDown(KEY_LEFT_SHIFT) ? YELLOW : PUFF_WHITE;
+
+    // if (env->action_type == 0) { // discrete
+    //     int *action_array = (int *)env->actions;
+    //     int action_val = action_array[env->human_agent_idx];
+
+    //     if (env->dynamics_model == CLASSIC) {
+    //         int num_steer = 13;
+    //         int accel_idx = action_val / num_steer;
+    //         int steer_idx = action_val % num_steer;
+    //         float accel_value = ACCELERATION_VALUES[accel_idx];
+    //         float steer_value = STEERING_VALUES[steer_idx];
+
+    //         DrawText(TextFormat("Acceleration: %.2f m/s^2", accel_value), 10, 110, 20, action_color);
+    //         DrawText(TextFormat("Steering: %.3f", steer_value), 10, 130, 20, action_color);
+    //     } else if (env->dynamics_model == JERK) {
+    //         int num_lat = 3;
+    //         int jerk_long_idx = action_val / num_lat;
+    //         int jerk_lat_idx = action_val % num_lat;
+    //         float jerk_long_value = JERK_LONG[jerk_long_idx];
+    //         float jerk_lat_value = JERK_LAT[jerk_lat_idx];
+
+    //         DrawText(TextFormat("Longitudinal Jerk: %.2f m/s^3", jerk_long_value), 10, 110, 20, action_color);
+    //         DrawText(TextFormat("Lateral Jerk: %.2f m/s^3", jerk_lat_value), 10, 130, 20, action_color);
+    //     }
+    // } else { // continuous
+    //     float (*action_array_f)[2] = (float (*)[2])env->actions;
+    //     DrawText(TextFormat("Acceleration: %.2f", action_array_f[env->human_agent_idx][0]), 10, 110, 20,
+    //     action_color); DrawText(TextFormat("Steering: %.2f", action_array_f[env->human_agent_idx][1]), 10, 130, 20,
+    //     action_color);
+    // }
+
+    // // Show key press status
+    // int status_y = 150;
+    // if (IsKeyDown(KEY_LEFT_SHIFT)) {
+    //     DrawText("[shift pressed]", 10, status_y, 20, YELLOW);
+    //     status_y += 20;
+    // }
+    // if (IsKeyDown(KEY_SPACE)) {
+    //     DrawText("[space pressed]", 10, status_y, 20, YELLOW);
+    //     status_y += 20;
+    // }
+    // if (IsKeyDown(KEY_LEFT_CONTROL)) {
+    //     DrawText("[ctrl pressed]", 10, status_y, 20, YELLOW);
+    //     status_y += 20;
+    // }
+
+    // // Controls help
+    // DrawText("Controls: SHIFT + W/S - Accelerate/Brake, SHIFT + A/D - Steer, TAB - Switch Agent", 10,
+    //          client->height - 30, 20, PUFF_WHITE);
 
-    DrawText(TextFormat("Grid Rows: %d", env->grid_map->grid_rows), 10, status_y, 20, PUFF_WHITE);
-    DrawText(TextFormat("Grid Cols: %d", env->grid_map->grid_cols), 10, status_y + 20, 20, PUFF_WHITE);
-    EndDrawing();
+    // DrawText(TextFormat("Grid Rows: %d", env->grid_map->grid_rows), 10, status_y, 20, PUFF_WHITE);
+    // DrawText(TextFormat("Grid Cols: %d", env->grid_map->grid_cols), 10, status_y + 20, 20, PUFF_WHITE);
+    // EndDrawing();
 }
 
 void close_client(Client *client) {
+    if (client->recorder_pid > 0) {
+        close(client->recorder_pipefd[1]);
+        waitpid(client->recorder_pid, NULL, 0);
+    }
     for (int i = 0; i < 6; i++) {
         UnloadModel(client->cars[i]);
     }
-    UnloadTexture(client->puffers);
     CloseWindow();
     free(client);
 }

From 4517e7ff4bc0e174eab2afbf0141950728b5e001 Mon Sep 17 00:00:00 2001
From: Aditya Gupta <adigupta2602@gmail.com>
Date: Tue, 24 Mar 2026 16:06:56 -0400
Subject: [PATCH 03/23] Further render related updates

---
 pufferlib/config/ocean/drive.ini       |  70 ++--
 pufferlib/ocean/benchmark/evaluator.py | 141 +++++++
 pufferlib/ocean/drive/drive.c          | 134 +++----
 pufferlib/ocean/drive/drive.h          |  60 ++-
 pufferlib/ocean/drive/drive.py         |   3 +-
 pufferlib/ocean/env_binding.h          |   2 +-
 pufferlib/pufferl.py                   | 508 +++++++++----------------
 pufferlib/utils.py                     | 242 ------------
 8 files changed, 475 insertions(+), 685 deletions(-)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index c5a68e0f9..bc78f4bcb 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -54,19 +54,19 @@ polyline_max_segment_length = 10.0
 episode_length = 300
 resample_frequency = 300
 termination_mode = 1 # 0 - terminate at episode_length, 1 - terminate after all agents have been reset
-map_dir = "resources/drive/binaries/carla_2D"
+map_dir = "resources/drive/binaries/carla_3D"
 num_maps = 3
 ; If True, allows training with fewer maps than requested (warns instead of erroring)
 allow_fewer_maps = True
 ; Determines which step of the trajectory to initialize the agents at upon reset
 init_steps = 0
 ; Options: "control_vehicles", "control_agents", "control_wosac", "control_sdc_only"
-control_mode = "control_vehicles"
+control_mode = "control_agents"
 ; Options: "create_all_valid", "create_only_controlled", "init_variable_agent_number"(creates random number of controlled agents per env)
 init_mode = "init_variable_agent_number"
 ; Below options only valid for "init_variable_agent_number" init_mode
 min_agents_per_env = 1
-max_agents_per_env = 128
+max_agents_per_env = 32
 ; Dimension Ranges for agents
 spawn_width_min = 1.5
 spawn_width_max = 2.5
@@ -74,6 +74,11 @@ spawn_length_min = 2.0
 spawn_length_max = 5.5
 spawn_height = 1.5
 
+; Render mode options:
+; 0:"window" = pop-up raylib window (original)
+; 1:"headless" = off-screen; frames piped to ffmpeg (recommended for training)
+render_mode = 1
+
 turn_off_normalization = 1
 ; Options: 0 - Use normalized reward coefs as input to NN, 1 - Dont
 
@@ -171,28 +176,22 @@ vf_clip_coef = 0.1999999999999999
 vf_coef = 2
 vtrace_c_clip = 1
 vtrace_rho_clip = 1
-checkpoint_interval = 250
-; Rendering options
-render = True
-render_async = True      # Render in background process to avoid blocking training
-render_interval = 1000
-; If True, show exactly what the agent sees in agent observation
-obs_only = True
-; Show grid lines
-show_grid = True
-; Draws lines from ego agent observed ORUs and road elements to show detection range
-show_lasers = False
-; Display human xy logs in the background
-show_human_logs = False
-; If True, zoom in on a part of the map. Otherwise, show full map
-zoom_in = True
-; Options: List[str to path], str to path (e.g., "resources/drive/training/binaries/map_001.bin"), None
-render_map = none
+checkpoint_interval = 1000
 
 [eval]
 eval_interval = 1000
 ; Path to dataset used for evaluation
 map_dir = "resources/drive/binaries/training"
+
+num_eval_agents = 64
+; If True, enable self-play evaluation (pair policy-controlled agent with a copy of itself)
+self_play_eval = True
+; If True, enable human replay evaluation (pair policy-controlled agent with human replays)
+human_replay_eval = False
+; If True, render random scenarios. Note: Doing this frequency will slow down the training.
+render_human_replay_eval = False
+render_self_play_eval = True
+
 ; Number of scenarios to process per batch
 wosac_batch_size = 32
 ; Target number of unique scenarios perform evaluation in
@@ -271,29 +270,30 @@ steer = 1.0
 acc = 1.0
 
 [render]
-; Mode to render a bunch of maps with a given policy
-; Path to dataset used for rendering
-map_dir = "resources/drive/binaries/training"
-; Directory to output rendered videos
+; Render a batch of maps offline using the in-process c_render (ffmpeg) pipeline.
+; Path to the .bin map directory
+map_dir = "resources/drive/binaries/carla_2D"
+; Directory to write output mp4 files
 output_dir = "resources/drive/render_videos"
-; Evaluation will run on the first num_maps maps in the map_dir directory
+; Number of maps to render (capped at files available in map_dir)
 num_maps = 100
-; "both", "topdown", "agent"; Other args are passed from train confs
-view_mode = "both"
+; View mode: sim_state (top-down, origin-centered), zoom_out (top-down, full map bbox), bev (agent BEV obs), persp (third-person follow-cam)
+; NOTE: "persp" and "bev" require active_agent_count > 0
+view_mode = "sim_state"
+; Whether to draw agent trajectory traces
+draw_traces = True
+; Maximum number of frames to render per map
+max_frames = 91
+; Override [env] init/control modes for rendering logged-trajectory (carla_2D) maps.
+; carla_2D maps carry recorded vehicle trajectories — use create_all_valid + control_vehicles.
+init_mode = "init_variable_agent_number"
+control_mode = "control_vehicles"
 ; Policy bin file used for rendering videos
 policy_path = "resources/drive/puffer_drive_weights_resampling_300.bin"
 ; Allows more than cpu cores workers for rendering
 overwork = True
-; If True, show exactly what the agent sees in agent observation
-obs_only = True
-; Show grid lines
-show_grid = True
-; Draws lines from ego agent observed ORUs and road elements to show detection range
-show_lasers = True
 ; Display human xy logs in the background
 show_human_logs = False
-; If True, zoom in on a part of the map. Otherwise, show full map
-zoom_in = True
 
 [sweep.train.learning_rate]
 distribution = log_normal
diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py
index 15b37208e..9f821cf11 100644
--- a/pufferlib/ocean/benchmark/evaluator.py
+++ b/pufferlib/ocean/benchmark/evaluator.py
@@ -1,5 +1,6 @@
 """WOSAC evaluation class for PufferDrive."""
 
+import copy
 import torch
 import numpy as np
 import pandas as pd
@@ -818,6 +819,146 @@ def rollout(self, args, puffer_env, policy):
                 return results
 
 
+class Evaluator:
+    """Evaluates policies in self_play or human_replay mode, with optional rendering.
+
+    Initializes the eval envs needed based on eval config flags:
+    - human_replay_eval: creates sp_env + hr_env
+    - render_eval: creates sp_env (if not already created)
+
+
+
+    """
+
+    def __init__(self, configs, logger=None):
+        self.configs = configs
+        self.logger = logger
+        self.sim_steps = 90
+        self.self_play_stats = None
+        self.human_replay_stats = None
+        self.sp_env = None
+        self.hr_env = None
+
+        self._unpack_eval_configs(configs)
+
+    def _unpack_eval_configs(self, configs):
+        eval_config = copy.deepcopy(configs)
+        # Create separate evaluation environments based on specified configs
+        eval_config["env"]["termination_mode"] = 0
+        backend = eval_config["eval"].get("backend", "PufferEnv")
+        eval_config["env"]["map_dir"] = eval_config["eval"]["map_dir"]
+        eval_config["env"]["num_agents"] = eval_config["eval"]["num_eval_agents"]
+        eval_config["env"]["episode_length"] = 91  # WOMD scenario length
+        eval_config["vec"] = dict(backend=backend, num_envs=1)
+
+        self.render_sp_rollout = self.configs["eval"]["render_self_play_eval"]
+        self.render_hr_rollout = self.configs["eval"]["render_human_replay_eval"]
+
+        self.hr_eval_config = copy.deepcopy(eval_config)
+        self.hr_eval_config["env"]["control_mode"] = "control_sdc_only"
+        self.hr_eval_config["env"]["render_mode"] = 1 if self.render_hr_rollout else 0
+
+        self.sp_eval_config = copy.deepcopy(eval_config)
+        self.sp_eval_config["env"]["control_mode"] = "control_agents"
+        self.sp_eval_config["env"]["render_mode"] = 1 if self.render_sp_rollout else 0
+
+    def rollout(self, policy, mode="self_play", render_rollout=False):
+        """Roll out the given policy in the specified eval env and collect statistics."""
+
+        env = self.hr_env if mode == "human_replay" else self.sp_env
+        render_eval = self.render_sp_rollout if mode == "self_play" else self.render_hr_rollout
+        driver = env.driver_env
+        num_agents = env.observation_space.shape[0]
+        device = self.configs["train"]["device"]
+
+        # Reset environment
+        obs, info = env.reset()
+
+        # Initialize RNN state if needed
+        state = {}
+        if self.configs["train"]["use_rnn"]:
+            state = dict(
+                lstm_h=torch.zeros(num_agents, policy.hidden_size, device=device),
+                lstm_c=torch.zeros(num_agents, policy.hidden_size, device=device),
+            )
+
+        # Rollout
+        for time_idx in range(self.sim_steps):
+            if render_rollout or render_eval:
+                driver.render()
+
+            # Get action from policy
+            with torch.no_grad():
+                ob_tensor = torch.as_tensor(obs).to(device)
+                logits, value = policy.forward_eval(ob_tensor, state)
+                action, logprob, _ = pufferlib.pytorch.sample_logits(logits)
+                action_np = action.cpu().numpy().reshape(env.action_space.shape)
+
+            # Clip continuous actions to valid range
+            if isinstance(logits, torch.distributions.Normal):
+                action_np = np.clip(action_np, env.action_space.low, env.action_space.high)
+
+            # Step environment
+            obs, rewards, dones, truncs, info_list = env.step(action_np)
+
+            if truncs.all():
+                break
+
+        # Aggregate final statistics
+        final_info = info_list[0] if info_list else {}
+
+        if mode == "self_play":
+            self.self_play_stats = final_info
+        elif mode == "human_replay":
+            self.human_replay_stats = final_info
+
+    def log_videos(self, eval_mode, epoch):
+        """Log all mp4s in local path to wandb after env close has flushed ffmpeg pipes."""
+        import os
+        import glob
+
+        if not (self.logger and hasattr(self.logger, "wandb") and self.logger.wandb):
+            # Still clean up even if not logging
+            for p in glob.glob("*.mp4"):
+                os.remove(p)
+            return
+
+        import wandb
+
+        video_files = glob.glob("*.mp4")
+        if not video_files:
+            print("Warning: no render videos found in local path")
+            return
+
+        for p in video_files:
+            scenario_id = os.path.splitext(os.path.basename(p))[0]
+            self.logger.wandb.log(
+                {f"render/{eval_mode}": wandb.Video(p, format="mp4", caption=f"scene_{scenario_id}_epoch_{epoch}")}
+            )
+
+        for p in video_files:
+            os.remove(p)
+
+    def log_stats(self):
+        if not (self.logger and hasattr(self.logger, "wandb") and self.logger.wandb):
+            return
+
+        eval_stats = {}
+
+        if self.human_replay_stats is not None:
+            eval_stats["eval/hr_collision_rate"] = self.human_replay_stats["collision_rate"]
+            eval_stats["eval/hr_score"] = self.human_replay_stats["score"]
+        if self.self_play_stats is not None:
+            eval_stats["eval/sp_collision_rate"] = self.self_play_stats["collision_rate"]
+            eval_stats["eval/sp_score"] = self.self_play_stats["score"]
+            eval_stats["eval/num_agents"] = self.self_play_stats["n"]
+
+        if not eval_stats:
+            return
+
+        self.logger.wandb.log(eval_stats)
+
+
 class SafeEvaluator:
     """Evaluates policies with fixed safe/law-abiding reward conditioning.
 
diff --git a/pufferlib/ocean/drive/drive.c b/pufferlib/ocean/drive/drive.c
index f3c1cd66b..aaae593bd 100644
--- a/pufferlib/ocean/drive/drive.c
+++ b/pufferlib/ocean/drive/drive.c
@@ -20,8 +20,7 @@ void test_drivenet() {
 
     // Weights* weights = load_weights("resources/drive/puffer_drive_weights.bin");
     Weights *weights = load_weights("puffer_drive_weights.bin");
-    int reward_conditioning = 0;
-    DriveNet *net = init_drivenet(weights, num_agents, CLASSIC, reward_conditioning);
+    DriveNet *net = init_drivenet(weights, num_agents, CLASSIC, 1);
 
     forward(net, observations, actions);
     for (int i = 0; i < num_agents * num_actions; i++) {
@@ -35,7 +34,7 @@ void test_drivenet() {
     free(weights);
 }
 
-void demo() {
+void demo(const char *map_name_arg, const char *policy_name_arg, int view_mode, int draw_traces) {
     // Read configuration from INI file
     env_init_config conf = {0};
     const char *ini_file = "pufferlib/config/ocean/drive.ini";
@@ -83,18 +82,22 @@ void demo() {
     };
 
     Drive env = {
-        .human_agent_idx = 0,
-        .action_type = 0, // Demo doesn't support continuous action space
+        .action_type = conf.action_type,
         .dynamics_model = conf.dynamics_model,
         .reward_vehicle_collision = conf.reward_vehicle_collision,
         .reward_offroad_collision = conf.reward_offroad_collision,
+        .reward_lane_align = conf.reward_lane_align,
+        .reward_lane_center = conf.reward_lane_center,
         .reward_goal = conf.reward_goal,
         .reward_goal_post_respawn = conf.reward_goal_post_respawn,
         .goal_radius = conf.goal_radius,
+        .min_goal_speed = conf.min_goal_speed,
         .goal_behavior = conf.goal_behavior,
+        .reward_randomization = conf.reward_randomization,
+        .reward_conditioning = conf.reward_conditioning,
+        .turn_off_normalization = conf.turn_off_normalization,
         .min_goal_distance = conf.min_goal_distance,
         .max_goal_distance = conf.max_goal_distance,
-        .min_goal_speed = conf.min_goal_speed,
         .max_goal_speed = conf.max_goal_speed,
         .dt = conf.dt,
         .episode_length = conf.episode_length,
@@ -108,8 +111,27 @@ void demo() {
         .init_mode = conf.init_mode,
         .control_mode = conf.control_mode,
         .spawn_settings = spawn_settings,
-        .map_name = "resources/drive/binaries/carla_2D/map_001.bin",
-        .reward_conditioning = conf.reward_conditioning,
+        .reward_bounds =
+            {
+                {conf.reward_bound_goal_radius_min, conf.reward_bound_goal_radius_max},
+                {conf.reward_bound_collision_min, conf.reward_bound_collision_max},
+                {conf.reward_bound_offroad_min, conf.reward_bound_offroad_max},
+                {conf.reward_bound_comfort_min, conf.reward_bound_comfort_max},
+                {conf.reward_bound_lane_align_min, conf.reward_bound_lane_align_max},
+                {conf.reward_bound_lane_center_min, conf.reward_bound_lane_center_max},
+                {conf.reward_bound_velocity_min, conf.reward_bound_velocity_max},
+                {conf.reward_bound_traffic_light_min, conf.reward_bound_traffic_light_max},
+                {conf.reward_bound_center_bias_min, conf.reward_bound_center_bias_max},
+                {conf.reward_bound_vel_align_min, conf.reward_bound_vel_align_max},
+                {conf.reward_bound_overspeed_min, conf.reward_bound_overspeed_max},
+                {conf.reward_bound_timestep_min, conf.reward_bound_timestep_max},
+                {conf.reward_bound_reverse_min, conf.reward_bound_reverse_max},
+                {conf.reward_bound_throttle_min, conf.reward_bound_throttle_max},
+                {conf.reward_bound_steer_min, conf.reward_bound_steer_max},
+                {conf.reward_bound_acc_min, conf.reward_bound_acc_max},
+            },
+        .map_name = "resources/drive/binaries/carla/carla_3D/map_001.bin",
+        .render_mode = RENDER_WINDOW,
         .partner_obs_radius = conf.partner_obs_radius,
     };
 
@@ -123,10 +145,11 @@ void demo() {
                 conf.init_mode);
         free_allocated(&env);
         return;
+        // return -1;
     }
     c_reset(&env);
-    c_render(&env);
-    Weights *weights = load_weights("resources/drive/puffer_drive_weights.bin");
+    c_render(&env, view_mode, draw_traces);
+    Weights *weights = load_weights(policy_name_arg ? policy_name_arg : "resources/drive/puffer_drive_weights.bin");
     DriveNet *net = init_drivenet(weights, env.active_agent_count, env.dynamics_model, env.reward_conditioning);
 
     int accel_delta = 1;
@@ -192,13 +215,14 @@ void demo() {
         }
 
         c_step(&env);
-        c_render(&env);
+        c_render(&env, view_mode, draw_traces);
     }
 
     close_client(env.client);
     free_allocated(&env);
     free_drivenet(net);
     free(weights);
+    return;
 }
 
 void performance_test() {
@@ -243,61 +267,19 @@ void performance_test() {
 }
 
 int main(int argc, char *argv[]) {
-    // Visualization-only parameters (not in [env] section)
-    int show_grid = 0;
-    int obs_only = 0;
-    int lasers = 0;
-    int show_human_logs = 0;
-    int frame_skip = 1;
-    int zoom_in = 0;
-    const char *view_mode = "both";
-
-    // File paths and num_maps (not in [env] section)
     const char *map_name = NULL;
-    const char *policy_name = "resources/drive/puffer_drive_weights.bin";
-    const char *output_topdown = NULL;
-    const char *output_agent = NULL;
-    int num_maps = 1;
+    const char *policy_name = NULL;
+    int view_mode = VIEW_MODE_SIM_STATE; // Default: full sim-state bird's-eye view
+    int draw_traces = 1;                 // Default: show logged trajectories
 
     // Parse command line arguments
     for (int i = 1; i < argc; i++) {
-        if (strcmp(argv[i], "--show-grid") == 0) {
-            show_grid = 1;
-        } else if (strcmp(argv[i], "--obs-only") == 0) {
-            obs_only = 1;
-        } else if (strcmp(argv[i], "--lasers") == 0) {
-            lasers = 1;
-        } else if (strcmp(argv[i], "--log-trajectories") == 0) {
-            show_human_logs = 1;
-        } else if (strcmp(argv[i], "--frame-skip") == 0) {
-            if (i + 1 < argc) {
-                frame_skip = atoi(argv[i + 1]);
-                i++;
-                if (frame_skip <= 0) {
-                    frame_skip = 1;
-                }
-            }
-        } else if (strcmp(argv[i], "--zoom-in") == 0) {
-            zoom_in = 1;
-        } else if (strcmp(argv[i], "--view") == 0) {
-            if (i + 1 < argc) {
-                view_mode = argv[i + 1];
-                i++;
-                if (strcmp(view_mode, "both") != 0 && strcmp(view_mode, "topdown") != 0 &&
-                    strcmp(view_mode, "agent") != 0) {
-                    fprintf(stderr, "Error: --view must be 'both', 'topdown', or 'agent'\n");
-                    return 1;
-                }
-            } else {
-                fprintf(stderr, "Error: --view option requires a value (both/topdown/agent)\n");
-                return 1;
-            }
-        } else if (strcmp(argv[i], "--map-name") == 0) {
+        if (strcmp(argv[i], "--map-name") == 0) {
             if (i + 1 < argc) {
                 map_name = argv[i + 1];
                 i++;
             } else {
-                fprintf(stderr, "Error: --map-name option requires a map file path\n");
+                fprintf(stderr, "Error: --map-name requires a map file path\n");
                 return 1;
             }
         } else if (strcmp(argv[i], "--policy-name") == 0) {
@@ -305,30 +287,36 @@ int main(int argc, char *argv[]) {
                 policy_name = argv[i + 1];
                 i++;
             } else {
-                fprintf(stderr, "Error: --policy-name option requires a policy file path\n");
+                fprintf(stderr, "Error: --policy-name requires a policy file path\n");
                 return 1;
             }
-        } else if (strcmp(argv[i], "--output-topdown") == 0) {
-            if (i + 1 < argc) {
-                output_topdown = argv[i + 1];
-                i++;
-            }
-        } else if (strcmp(argv[i], "--output-agent") == 0) {
-            if (i + 1 < argc) {
-                output_agent = argv[i + 1];
-                i++;
-            }
-        } else if (strcmp(argv[i], "--num-maps") == 0) {
+        } else if (strcmp(argv[i], "--view") == 0) {
             if (i + 1 < argc) {
-                num_maps = atoi(argv[i + 1]);
+                const char *v = argv[i + 1];
                 i++;
+                if (strcmp(v, "sim_state") == 0 || strcmp(v, "topdown") == 0) {
+                    view_mode = VIEW_MODE_SIM_STATE;
+                } else if (strcmp(v, "bev") == 0 || strcmp(v, "agent") == 0) {
+                    view_mode = VIEW_MODE_BEV_AGENT_OBS;
+                } else if (strcmp(v, "persp") == 0) {
+                    view_mode = VIEW_MODE_AGENT_PERSP;
+                } else if (strcmp(v, "zoom_out") == 0 || strcmp(v, "full_map") == 0) {
+                    view_mode = VIEW_MODE_SIM_STATE_ZOOMED_OUT;
+                } else {
+                    fprintf(stderr, "Error: --view must be 'sim_state', 'bev', 'persp', or 'zoom_out'\n");
+                    return 1;
+                }
+            } else {
+                fprintf(stderr, "Error: --view requires a value (sim_state/bev/persp/zoom_out)\n");
+                return 1;
             }
+        } else if (strcmp(argv[i], "--no-traces") == 0) {
+            draw_traces = 0;
         }
     }
 
     // performance_test();
-    demo(map_name, policy_name, show_grid, obs_only, lasers, show_human_logs, frame_skip, view_mode, output_topdown,
-         output_agent, num_maps, zoom_in);
+    demo(map_name, policy_name, view_mode, draw_traces);
     // test_drivenet();
     return 0;
 }
diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index 61722f483..33b8719ca 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -21,6 +21,8 @@
 #define VIEW_MODE_SIM_STATE 0
 #define VIEW_MODE_BEV_AGENT_OBS 1
 #define VIEW_MODE_AGENT_PERSP 2
+#define VIEW_MODE_SIM_STATE_ZOOMED_OUT                                                                                 \
+    3 // Full map, camera centered on map bbox (matches old visualize.c zoom_in=false)
 
 // // Order of entities in rendering (lower is rendered first)
 // #define Z_ROAD_SURFACE 0.0f
@@ -1091,6 +1093,24 @@ void load_map_binary(const char *filename, Drive *env) {
     if (!file)
         return;
 
+    // Populate scenario_id from the filename stem (e.g. "/path/to/abc123def456.bin" → "abc123def456")
+    // This is the string used by make_client to name the output mp4.
+    {
+        const char *base = filename;
+        // Find last '/' or '\\'
+        for (const char *p = filename; *p; p++) {
+            if (*p == '/' || *p == '\\')
+                base = p + 1;
+        }
+        // Copy up to SCENARIO_ID_STR_LENGTH chars, stopping at '.' or end
+        int i = 0;
+        while (i < SCENARIO_ID_STR_LENGTH - 1 && base[i] && base[i] != '.') {
+            env->scenario_id[i] = base[i];
+            i++;
+        }
+        env->scenario_id[i] = '\0';
+    }
+
     // Read sdc_track_index
     fread(&env->sdc_track_index, sizeof(int), 1, file);
 
@@ -2082,7 +2102,6 @@ int spawn_active_agents(Drive *env, int num_agents_to_create) {
 void spawn_agents_with_counts(Drive *env) {
     // Currently only creates active agents
     int num_agents_to_create = env->num_agents;
-
     int successfully_created = spawn_active_agents(env, num_agents_to_create);
     env->num_created_agents = successfully_created;
 
@@ -4337,6 +4356,45 @@ void c_render(Drive *env, int view_mode, int draw_traces) {
 
             draw_scene(env, client, 1, 0, 0, 0);
 
+        } else if (view_mode == VIEW_MODE_SIM_STATE_ZOOMED_OUT) {
+            // Orthographic bird's-eye view showing the full map, camera centered on the
+            // map bounding box — mirrors the old visualize.c renderTopDownView zoom_in=false path.
+            camera.position = (Vector3){env->grid_map->top_left_x, env->grid_map->bottom_right_y, 500.0f};
+            camera.target = (Vector3){env->grid_map->top_left_x, env->grid_map->bottom_right_y, 0.0f};
+            camera.up = (Vector3){0.0f, -1.0f, 0.0f};
+            camera.projection = CAMERA_ORTHOGRAPHIC;
+            camera.fovy = map_height * 2.0f; // 2× height shows the full map
+
+            BeginDrawing();
+            ClearBackground(ROAD_COLOR);
+            BeginMode3D(camera);
+
+            if (draw_traces) {
+                for (int i = 0; i < env->active_agent_count; i++) {
+                    int idx = env->active_agent_indices[i];
+                    for (int t = env->init_steps; t < env->episode_length; t++) {
+                        Color agent_color = LIGHTBLUE;
+                        if (env->agents[idx].type == PEDESTRIAN)
+                            agent_color = LIGHT_ORANGE;
+                        else if (env->agents[idx].type == CYCLIST)
+                            agent_color = LIGHT_PURPLE;
+                        DrawSphere((Vector3){env->agents[idx].log_trajectory_x[t], env->agents[idx].log_trajectory_y[t],
+                                             env->agents[idx].log_trajectory_z[t]},
+                                   0.15f, agent_color);
+                    }
+                }
+                for (int i = 0; i < env->expert_static_agent_count; i++) {
+                    int idx = env->expert_static_agent_indices[i];
+                    for (int t = env->init_steps; t < env->episode_length; t++) {
+                        DrawSphere((Vector3){env->agents[idx].log_trajectory_x[t], env->agents[idx].log_trajectory_y[t],
+                                             env->agents[idx].log_trajectory_z[t]},
+                                   0.15f, EXPERT_REPLAY);
+                    }
+                }
+            }
+
+            draw_scene(env, client, 1, 0, 0, 0);
+
         } else if (view_mode == VIEW_MODE_BEV_AGENT_OBS) {
             // Orthographic bird's-eye view centered on the selected agent,
             // showing only that agent's observations
diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py
index 539db8b81..94d2b8b6b 100644
--- a/pufferlib/ocean/drive/drive.py
+++ b/pufferlib/ocean/drive/drive.py
@@ -11,9 +11,10 @@
 
 
 class RenderView(IntEnum):
-    FULL_SIM_STATE = 0  # Orthographic top-down, fully observable simulator state
+    FULL_SIM_STATE = 0  # Orthographic top-down, fully observable simulator state (zoomed in, origin-centered)
     BEV_AGENT_OBS = 1  # Orthographic top-down, only show what the selected agent can observe
     AGENT_PERSP = 2  # Third-person perspective following selected agent
+    FULL_SIM_STATE_ZOOMED_OUT = 3  # Orthographic top-down, full map bbox view (mirrors old visualize.c zoom_in=false)
 
 
 class Drive(pufferlib.PufferEnv):
diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h
index 0fdb48428..a8db5d955 100644
--- a/pufferlib/ocean/env_binding.h
+++ b/pufferlib/ocean/env_binding.h
@@ -197,7 +197,7 @@ static PyObject *env_reset(PyObject *self, PyObject *args) {
 static PyObject *env_step(PyObject *self, PyObject *args) {
     int num_args = PyTuple_Size(args);
     if (num_args != 1) {
-        PyErr_SetString(PyExc_TypeError, "vec_render requires 1 argument");
+        PyErr_SetString(PyExc_TypeError, "env_step requires 1 argument");
         return NULL;
     }
 
diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py
index 424baf4f6..3d3d3bb52 100644
--- a/pufferlib/pufferl.py
+++ b/pufferlib/pufferl.py
@@ -15,6 +15,7 @@
 import random
 import shutil
 import subprocess
+import tempfile
 import argparse
 import importlib
 import configparser
@@ -37,6 +38,9 @@
 import pufferlib.pytorch
 import pufferlib.utils
 
+from pufferlib.ocean.benchmark.evaluator import Evaluator
+
+
 try:
     from pufferlib import _C
 except ImportError:
@@ -55,7 +59,6 @@
 import signal  # Aggressively exit on ctrl+c
 
 import multiprocessing
-import queue
 
 signal.signal(signal.SIGINT, lambda sig, frame: os._exit(0))
 
@@ -64,7 +67,8 @@
 
 
 class PuffeRL:
-    def __init__(self, config, vecenv, policy, logger=None):
+    def __init__(self, config, vecenv, policy, logger=None, full_args=None):
+        self.full_args = full_args
         # Backend perf optimization
         torch.set_float32_matmul_precision("high")
         torch.backends.cudnn.deterministic = config["torch_deterministic"]
@@ -124,17 +128,6 @@ def __init__(self, config, vecenv, policy, logger=None):
         self.ep_lengths = torch.zeros(total_agents, device=device, dtype=torch.int32)
         self.ep_indices = torch.arange(total_agents, device=device, dtype=torch.int32)
         self.free_idx = total_agents
-        self.render = config["render"]
-        self.render_async = config["render_async"]
-        self.render_interval = config["render_interval"]
-
-        safe_eval_renders = config.get("safe_eval", {}).get("enabled", False)
-        if self.render or safe_eval_renders:
-            ensure_drive_binary()
-
-        if self.render_async:
-            self.render_queue = multiprocessing.Queue()
-            self.render_processes = []
 
         # LSTM
         if config["use_rnn"]:
@@ -203,9 +196,6 @@ def __init__(self, config, vecenv, policy, logger=None):
         self.logger = logger
         if logger is None:
             self.logger = NoLogger(config)
-        if self.render_async and hasattr(self.logger, "wandb") and self.logger.wandb:
-            self.logger.wandb.define_metric("render_step", hidden=True)
-            self.logger.wandb.define_metric("render/*", step_metric="render_step")
 
         # Learning rate scheduler
         epochs = config["total_timesteps"] // config["batch_size"]
@@ -516,124 +506,38 @@ def train(self):
             self.save_checkpoint()
             self.msg = f"Checkpoint saved at update {self.epoch}"
 
-            if self.render and self.epoch % self.render_interval == 0:
-                model_dir = os.path.join(self.config["data_dir"], f"{self.config['env']}_{self.logger.run_id}")
-                model_files = glob.glob(os.path.join(model_dir, "model_*.pt"))
-
-                if model_files:
-                    # Take the latest checkpoint
-                    latest_cpt = max(model_files, key=os.path.getctime)
-                    bin_path = f"{model_dir}.bin"
-
-                    # Export to .bin for rendering with raylib
-                    try:
-                        export_args = {"env_name": self.config["env"], "load_model_path": latest_cpt, **self.config}
-
-                        export(
-                            args=export_args,
-                            env_name=self.config["env"],
-                            vecenv=self.vecenv,
-                            policy=self.uncompiled_policy,
-                            path=bin_path,
-                            silent=True,
-                        )
-
-                        bin_path_epoch = f"{model_dir}_epoch_{self.epoch:06d}.bin"
-                        shutil.copy2(bin_path, bin_path_epoch)
-
-                        driver_env = getattr(self.vecenv, "driver_env", None)
-                        render_ini = pufferlib.utils.generate_env_ini(
-                            self.config.get("env_config", {}), prefix="render_"
-                        )
-                        self._render_videos(
-                            bin_path=bin_path_epoch,
-                            num_maps=getattr(driver_env, "num_maps", None),
-                            map_dir=getattr(driver_env, "map_dir", None),
-                            wandb_prefix="render",
-                            config_path=render_ini,
-                            cleanup_files=[bin_path_epoch, bin_path, render_ini],
-                        )
-
-                    except Exception as e:
-                        print(f"Failed to export model weights: {e}")
-
-        if self.config["eval"]["wosac_realism_eval"] and (
-            (self.epoch - 1) % self.config["eval"]["eval_interval"] == 0 or done_training
-        ):
-            pufferlib.utils.run_wosac_eval_in_subprocess(self.config, self.logger, self.global_step)
+            if self.epoch % self.config["eval"]["eval_interval"] == 0 or done_training:
+                human_replay_eval = self.config["eval"]["human_replay_eval"]
+                self_play_eval = self.config["eval"]["self_play_eval"]
+                safe_eval = self.config["eval"]["safe_eval"]
+
+                self.evaluator = Evaluator(self.full_args, self.logger)
+                if human_replay_eval:
+                    self.evaluator.hr_env = load_env("puffer_drive", self.evaluator.hr_eval_config)
+                    self.evaluator.rollout(self.uncompiled_policy, mode="human_replay")
+                    self.evaluator.hr_env.close()
+                    self.evaluator.log_videos(eval_mode="human_replay", epoch=self.epoch)
+                if self_play_eval:
+                    self.evaluator.sp_env = load_env("puffer_drive", self.evaluator.sp_eval_config)
+                    self.evaluator.rollout(self.uncompiled_policy, mode="self_play")
+                    self.evaluator.sp_env.close()
+                    self.evaluator.log_videos(eval_mode="self_play", epoch=self.epoch)
+                if safe_eval:
+                    self.evaluator.safe_env = load_env("puffer_drive", self.evaluator.safe_eval_config)
+                    self.evaluator.rollout(self.uncompiled_policy, mode="safe_eval")
+                    self.evaluator.sp_env.close()
+                    self.evaluator.log_videos(eval_mode="safe_eval", epoch=self.epoch)
+                if human_replay_eval or self_play_eval or safe_eval:
+                    self.evaluator.log_stats()
+
+            if self.config["eval"]["wosac_realism_eval"]:
+                pufferlib.utils.run_wosac_eval_in_subprocess(self.config, self.logger, self.global_step)
 
         if self.config["eval"]["human_replay_eval"] and (
             (self.epoch - 1) % self.config["eval"]["eval_interval"] == 0 or done_training
         ):
             pufferlib.utils.run_human_replay_eval_in_subprocess(self.config, self.logger, self.global_step)
 
-        safe_eval_config = self.config.get("safe_eval", {})
-        safe_eval_enabled = safe_eval_config.get("enabled", False)
-        safe_eval_interval = int(safe_eval_config.get("interval", self.render_interval))
-        is_main = not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0
-        if (
-            is_main
-            and safe_eval_enabled
-            and safe_eval_interval > 0
-            and (self.epoch % safe_eval_interval == 0 or done_training)
-        ):
-            self._run_safe_eval()
-
-    def _render_videos(
-        self,
-        bin_path,
-        num_maps=None,
-        map_dir=None,
-        wandb_prefix="render",
-        config_path=None,
-        cleanup_files=None,
-    ):
-        """Render videos, either async (background process) or sync (blocking)."""
-        wandb_log = hasattr(self.logger, "wandb") and self.logger.wandb is not None
-        render_kwargs = dict(
-            config=self.config,
-            run_id=self.logger.run_id,
-            num_maps=num_maps,
-            map_dir=map_dir,
-            wandb_log=wandb_log,
-            epoch=self.epoch,
-            global_step=self.global_step,
-            bin_path=bin_path,
-            config_path=config_path,
-            wandb_prefix=wandb_prefix,
-        )
-
-        if self.render_async:
-            self._reap_render_processes()
-            max_processes = 3
-            if len(self.render_processes) >= max_processes:
-                print(f"Waiting for render processes to finish ({len(self.render_processes)}/{max_processes})...")
-            while len(self.render_processes) >= max_processes:
-                time.sleep(1)
-                self._reap_render_processes()
-
-            render_proc = multiprocessing.Process(
-                target=pufferlib.utils.render_videos_and_cleanup,
-                kwargs={
-                    **render_kwargs,
-                    "cleanup_files": cleanup_files or [],
-                    "render_async": True,
-                    "render_queue": self.render_queue,
-                },
-                daemon=True,
-            )
-            render_proc.start()
-            self.render_processes.append(render_proc)
-        else:
-            pufferlib.utils.render_videos(
-                **render_kwargs,
-                render_async=False,
-                wandb_run=self.logger.wandb if wandb_log else None,
-            )
-            for f in cleanup_files or []:
-                if os.path.exists(f):
-                    os.remove(f)
-
     def _run_safe_eval(self):
         """Run safe eval in-process using SafeEvaluator, then render videos."""
         import copy
@@ -709,56 +613,7 @@ def _run_safe_eval(self):
                         except OSError:
                             pass
 
-    def _reap_render_processes(self):
-        """Remove finished render processes from the tracking list."""
-        if not self.render_async:
-            return
-        alive = []
-        for p in self.render_processes:
-            if p.is_alive():
-                alive.append(p)
-            else:
-                p.join(timeout=1)
-        self.render_processes = alive
-
-    def check_render_queue(self):
-        """Check if any async render jobs finished and log them."""
-        if not self.render_async:
-            return
-        self._reap_render_processes()
-
-        try:
-            while True:
-                try:
-                    result = self.render_queue.get_nowait()
-                except queue.Empty:
-                    break
-
-                step = result["step"]
-                videos = result["videos"]
-                prefix = result.get("prefix", "render")
-
-                if hasattr(self.logger, "wandb") and self.logger.wandb:
-                    import wandb
-
-                    payload = {}
-                    if videos["output_topdown"]:
-                        payload[f"{prefix}/world_state"] = [
-                            wandb.Video(p, format="mp4") for p in videos["output_topdown"]
-                        ]
-                    if videos["output_agent"]:
-                        payload[f"{prefix}/agent_view"] = [wandb.Video(p, format="mp4") for p in videos["output_agent"]]
-
-                    if payload:
-                        payload["train_step"] = step
-                        self.logger.wandb.log(payload)
-        except Exception as e:
-            print(f"Error reading render queue: {e}")
-
     def mean_and_log(self):
-        # Check render queue for finished async jobs
-        self.check_render_queue()
-
         config = self.config
         for k in list(self.stats.keys()):
             v = self.stats[k]
@@ -795,22 +650,6 @@ def close(self):
         self.vecenv.close()
         self.utilization.stop()
 
-        if self.render_async:
-            # Drain any remaining async render results before closing
-            self.check_render_queue()
-            for p in self.render_processes:
-                try:
-                    if p.is_alive():
-                        p.terminate()
-                        p.join(timeout=5)
-                        if p.is_alive():
-                            p.kill()
-                except Exception:
-                    print(f"Failed to terminate render process {p.pid}")
-            self.render_processes = []
-            self.render_queue.close()
-            self.render_queue.join_thread()
-
         model_path = self.save_checkpoint()
         run_id = self.logger.run_id
         project_name = "puffer_drive"
@@ -1274,7 +1113,7 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None):
     )
     if "vec" in args and "num_workers" in args["vec"]:
         train_config["num_workers"] = args["vec"]["num_workers"]
-    pufferl = PuffeRL(train_config, vecenv, policy, logger)
+    pufferl = PuffeRL(train_config, vecenv, policy, logger, full_args=args)
 
     all_logs = []
     while pufferl.global_step < train_config["total_timesteps"]:
@@ -1365,38 +1204,6 @@ def eval(env_name, args=None, vecenv=None, policy=None):
         vecenv.close()
         return results_dict
 
-    elif human_replay_enabled:
-        args["env"]["map_dir"] = args["eval"]["map_dir"]
-        dataset_name = args["env"]["map_dir"].split("/")[-1]
-        print(f"Running human replay evaluation with {dataset_name} dataset.\n")
-        from pufferlib.ocean.benchmark.evaluator import HumanReplayEvaluator
-
-        backend = args["eval"].get("backend", "PufferEnv")
-        args["env"]["map_dir"] = args["eval"]["map_dir"]
-        args["env"]["num_agents"] = args["eval"]["human_replay_num_agents"]
-
-        args["vec"] = dict(backend=backend, num_envs=1)
-        args["env"]["control_mode"] = args["eval"]["human_replay_control_mode"]
-        args["env"]["episode_length"] = 91  # WOMD scenario length
-
-        vecenv = vecenv or load_env(env_name, args)
-        policy = policy or load_policy(args, vecenv, env_name)
-
-        print(f"Effective number of scenarios used: {len(vecenv.driver_env.agent_offsets) - 1}")
-
-        evaluator = HumanReplayEvaluator(args)
-
-        # Run rollouts with human replays
-        results = evaluator.rollout(args, vecenv, policy)
-
-        import json
-
-        print("HUMAN_REPLAY_METRICS_START")
-        print(json.dumps(results))
-        print("HUMAN_REPLAY_METRICS_END")
-
-        return results
-
     else:  # Standard evaluation: Render
         backend = args["vec"]["backend"]
         if backend != "PufferEnv":
@@ -1418,23 +1225,12 @@ def eval(env_name, args=None, vecenv=None, policy=None):
                 lstm_c=torch.zeros(num_agents, policy.hidden_size, device=device),
             )
 
-        frames = []
+        if driver.render_mode == 1:
+            max_frames = 91
+            frame_count = 0
+
         while True:
-            render = driver.render()
-            if len(frames) < args["save_frames"]:
-                frames.append(render)
-
-            # Screenshot Ocean envs with F12, gifs with control + F12
-            if driver.render_mode == "ansi":
-                print("\033[0;0H" + render + "\n")
-                time.sleep(1 / args["fps"])
-            elif driver.render_mode == "rgb_array":
-                pass
-                # import cv2
-                # render = cv2.cvtColor(render, cv2.COLOR_RGB2BGR)
-                # cv2.imshow('frame', render)
-                # cv2.waitKey(1)
-                # time.sleep(1/args['fps'])
+            driver.render()
 
             with torch.no_grad():
                 ob = torch.as_tensor(ob).to(device)
@@ -1445,13 +1241,14 @@ def eval(env_name, args=None, vecenv=None, policy=None):
             if isinstance(logits, torch.distributions.Normal):
                 action = np.clip(action, vecenv.action_space.low, vecenv.action_space.high)
 
-            ob = vecenv.step(action)[0]
+            ob, reward, done, truncated, info = vecenv.step(action)
 
-            if len(frames) > 0 and len(frames) == args["save_frames"]:
-                import imageio
+            if driver.render_mode == 1:
+                frame_count += 1
+                if frame_count >= max_frames or done.all() or truncated.all():
+                    break
 
-                imageio.mimsave(args["gif_path"], frames, fps=args["fps"], loop=0)
-                frames.append("Done")
+            vecenv.close()
 
 
 def sweep(args=None, env_name=None):
@@ -1641,27 +1438,6 @@ def export(args=None, env_name=None, vecenv=None, policy=None, path=None, silent
         print(f"Saved {len(weights)} weights to {path}")
 
 
-def ensure_drive_binary():
-    """Delete existing visualize binary and rebuild it. This ensures the
-    binary is always up-to-date with the latest code changes.
-    """
-    if os.path.exists("./visualize"):
-        os.remove("./visualize")
-
-    try:
-        result = subprocess.run(
-            ["bash", "scripts/build_ocean.sh", "visualize", "fast"], capture_output=True, text=True, timeout=300
-        )
-
-        if result.returncode != 0:
-            print(f"Build failed: {result.stderr}")
-            raise RuntimeError("Failed to build visualize binary for rendering")
-    except subprocess.TimeoutExpired:
-        raise RuntimeError("Build timed out")
-    except Exception as e:
-        raise RuntimeError(f"Build error: {e}")
-
-
 def autotune(args=None, env_name=None, vecenv=None, policy=None):
     package = args["package"]
     module_name = "pufferlib.ocean" if package == "ocean" else f"pufferlib.environments.{package}"
@@ -1731,6 +1507,7 @@ def load_config(env_name, config_dir=None):
         add_help=False,
     )
     parser.add_argument("--load-model-path", type=str, default=None, help="Path to a pretrained checkpoint")
+    parser.add_argument("--export-path", type=str, default=None, help="Output path for puffer export (.bin file)")
     parser.add_argument(
         "--load-id", type=str, default=None, help="Kickstart/eval from from a finished Wandb/Neptune run"
     )
@@ -1806,85 +1583,151 @@ def puffer_type(value):
 
 
 def render(env_name, args=None):
+    """Render rollouts for a batch of maps using the in-process c_render pipeline.
+
+    Each map is loaded as a separate environment with render_mode=1 (headless
+    ffmpeg). A policy rollout is run for max_frames steps, producing one
+    {scenario_id}.mp4 per env in the current working directory. The files are
+    then moved into output_dir.
+    """
+    from pufferlib.ocean.drive.drive import RenderView
+
     args = args or load_config(env_name)
     render_configs = args.get("render", {})
 
-    # Renders first num_maps from map_dir using visualize binary
     try:
         map_dir = render_configs["map_dir"]
         num_maps = render_configs.get("num_maps", 1)
-        view_mode = render_configs["view_mode"]
-        render_policy_path = render_configs["policy_path"]
-        overwork = render_configs.get("overwork", False)
-        num_workers = args["vec"]["num_workers"]
+        view_mode_str = str(render_configs.get("view_mode", "sim_state")).lower().strip('"').strip("'")
+        draw_traces = render_configs.get("draw_traces", True)
+        max_frames = render_configs.get("max_frames", 91)
         output_dir = render_configs["output_dir"]
+        # Allow [render] to override [env] init/control modes — important for logged-trajectory
+        # maps (e.g. carla_2D) which need create_all_valid + control_vehicles, not the training
+        # init_variable_agent_number mode that spawns agents on road lanes.
+        render_init_mode = (
+            str(render_configs["init_mode"]).strip('"').strip("'") if "init_mode" in render_configs else None
+        )
+        render_control_mode = (
+            str(render_configs["control_mode"]).strip('"').strip("'") if "control_mode" in render_configs else None
+        )
     except KeyError as e:
         raise pufferlib.APIUsageError(f"Missing render config: {e}")
 
-    cpu_cores = psutil.cpu_count(logical=False)
-    if num_workers > cpu_cores and not overwork:
-        raise pufferlib.APIUsageError(
-            " ".join(
-                [
-                    f"num_workers ({num_workers}) > hardware cores ({cpu_cores}) is disallowed by default.",
-                    "PufferLib multiprocessing is heavily optimized for 1 process per hardware core.",
-                    "If you really want to do this, set overwork=True (--vec-overwork in our demo.py).",
-                ]
-            )
-        )
+    # Map config string → RenderView enum
+    _VIEW_MODE_MAP = {
+        "sim_state": RenderView.FULL_SIM_STATE,
+        "topdown": RenderView.FULL_SIM_STATE,  # backward compat
+        "bev": RenderView.BEV_AGENT_OBS,
+        "agent": RenderView.BEV_AGENT_OBS,  # backward compat
+        "persp": RenderView.AGENT_PERSP,
+        "zoom_out": RenderView.FULL_SIM_STATE_ZOOMED_OUT,
+        "full_map": RenderView.FULL_SIM_STATE_ZOOMED_OUT,  # backward compat alias
+    }
+    view_mode = _VIEW_MODE_MAP.get(view_mode_str)
+    if view_mode is None:
+        raise pufferlib.APIUsageError(f"Unknown view_mode '{view_mode_str}'. Choose from: sim_state, bev, persp")
+
+    bin_files = sorted(f for f in os.listdir(map_dir) if f.endswith(".bin"))
+    if num_maps > len(bin_files):
+        num_maps = len(bin_files)
+    render_maps = [os.path.join(map_dir, f) for f in bin_files[:num_maps]]
 
-    if num_maps > len(os.listdir(map_dir)):
-        num_maps = len(os.listdir(map_dir))
-
-    render_maps = [os.path.join(map_dir, f) for f in sorted(os.listdir(map_dir)) if f.endswith(".bin")][:num_maps]
     os.makedirs(output_dir, exist_ok=True)
 
-    # Rebuild visualize binary
-    ensure_drive_binary()
+    # Fall back to CPU if the configured device is unavailable (e.g. no CUDA on this machine)
+    configured_device = args["train"]["device"]
+    if configured_device == "cuda" and not torch.cuda.is_available():
+        print("Warning: CUDA not available, falling back to CPU for render.")
+        configured_device = "cpu"
+    args["train"]["device"] = configured_device
+    device = configured_device
+
+    def render_one_map(map_path):
+        """Render a single map file and move the resulting mp4(s) to output_dir.
+
+        The map binary is symlinked into a private temp directory so the C
+        loader sees exactly one .bin file (num_maps=1). After env.close()
+        finalizes the ffmpeg pipe, we glob for new *.mp4 files in cwd and
+        move them — no fragile scenario_id filename prediction needed.
+        """
+        map_name = os.path.splitext(os.path.basename(map_path))[0]
+
+        with tempfile.TemporaryDirectory() as tmp_map_dir:
+            # Hard-link (or copy) the single .bin into an isolated dir
+            tmp_bin = os.path.join(tmp_map_dir, os.path.basename(map_path))
+            shutil.copy2(map_path, tmp_bin)
+
+            env_overrides = {
+                **args["env"],
+                "num_maps": 1,
+                "map_dir": tmp_map_dir,
+                "render_mode": 1,  # headless ffmpeg → writes {scenario_id}.mp4 in cwd
+            }
+            # Override init/control modes if [render] section specifies them.
+            # carla_2D logged-trajectory maps need create_all_valid + control_vehicles;
+            # the training default (init_variable_agent_number) spawns no agents on these maps.
+            if render_init_mode is not None:
+                env_overrides["init_mode"] = render_init_mode
+            if render_control_mode is not None:
+                env_overrides["control_mode"] = render_control_mode
+
+            map_args = {
+                **args,
+                "env": env_overrides,
+                "vec": {"backend": "Serial", "num_envs": 1},
+            }
+
+            env = load_env(env_name, map_args)
+            policy = load_policy(map_args, env, env_name)
+            policy.eval()
+
+            ob, _ = env.reset()
+            driver = env.driver_env
+
+            state = {}
+            if map_args["train"]["use_rnn"]:
+                n = env.agents_per_batch if hasattr(env, "agents_per_batch") else ob.shape[0]
+                state = dict(
+                    lstm_h=torch.zeros(n, policy.hidden_size, device=device),
+                    lstm_c=torch.zeros(n, policy.hidden_size, device=device),
+                )
+
+            # Snapshot which mp4s exist before this rollout
+            before = set(glob.glob(os.path.join(os.getcwd(), "*.mp4")))
 
-    def render_task(map_path):
-        base_cmd = (
-            ["./visualize"]
-            if sys.platform == "darwin"
-            else ["xvfb-run", "-a", "-s", "-screen 0 1280x720x24", "./visualize"]
-        )
-        cmd = base_cmd.copy()
-        cmd.extend(["--map-name", map_path])
-        if render_configs.get("show_grid", False):
-            cmd.append("--show-grid")
-        if render_configs.get("obs_only", False):
-            cmd.append("--obs-only")
-        if render_configs.get("show_lasers", False):
-            cmd.append("--lasers")
-        if render_configs.get("show_human_logs", False):
-            cmd.append("--show-human-logs")
-        if render_configs.get("zoom_in", False):
-            cmd.append("--zoom-in")
-        cmd.extend(["--view", view_mode])
-        if render_policy_path is not None:
-            cmd.extend(["--policy-name", render_policy_path])
-
-        map_name = os.path.basename(map_path).replace(".bin", "")
-
-        if view_mode == "topdown" or view_mode == "both":
-            cmd.extend(["--output-topdown", os.path.join(output_dir, f"topdown_{map_name}.mp4")])
-        if view_mode == "agent" or view_mode == "both":
-            cmd.extend(["--output-agent", os.path.join(output_dir, f"agent_{map_name}.mp4")])
-
-        env_vars = os.environ.copy()
-        env_vars["ASAN_OPTIONS"] = "exitcode=0"
-        try:
-            result = subprocess.run(cmd, cwd=os.getcwd(), capture_output=True, text=True, timeout=600, env=env_vars)
-            if result.returncode != 0:
-                print(f"Error rendering {map_name}: {result.stderr}")
-        except subprocess.TimeoutExpired:
-            print(f"Timeout rendering {map_name}: exceeded 600 seconds")
+            for _ in range(max_frames):
+                driver.render(view_mode=view_mode, draw_traces=draw_traces)
+
+                with torch.no_grad():
+                    ob_t = torch.as_tensor(ob).to(device)
+                    logits, _ = policy.forward_eval(ob_t, state)
+                    action, _, _ = pufferlib.pytorch.sample_logits(logits)
+                    action = action.cpu().numpy().reshape(env.action_space.shape)
+
+                ob, _, done, truncated, _ = env.step(action)
+                if done.all() or truncated.all():
+                    break
+
+            # env.close() finalizes the ffmpeg pipe and flushes the mp4 to disk
+            env.close()
+
+            # Move any new mp4s that appeared since before the rollout
+            after = set(glob.glob(os.path.join(os.getcwd(), "*.mp4")))
+            new_mp4s = after - before
+            if new_mp4s:
+                for src in sorted(new_mp4s):
+                    dst = os.path.join(output_dir, os.path.basename(src))
+                    shutil.move(src, dst)
+                    print(f"  Saved {dst}")
+            else:
+                print(f"  Warning: no mp4 produced for map {map_name}")
 
     if render_maps:
-        print(f"Rendering {len(render_maps)} from {map_dir} with {num_workers} workers...")
-        with ThreadPool(num_workers) as pool:
-            pool.map(render_task, render_maps)
-        print(f"Finished rendering videos to {output_dir}")
+        print(f"Rendering {len(render_maps)} map(s) from {map_dir} → {output_dir} ...")
+        for map_path in render_maps:
+            render_one_map(map_path)
+        print(f"Done. Videos written to {output_dir}")
 
 
 def main():
@@ -1907,7 +1750,8 @@ def main():
     elif mode == "profile":
         profile(env_name=env_name)
     elif mode == "export":
-        export(env_name=env_name)
+        args = load_config(env_name)
+        export(env_name=env_name, args=args, path=args.get("export_path"))
     elif mode == "sanity":
         sanity(env_name=env_name)
     elif mode == "render":
diff --git a/pufferlib/utils.py b/pufferlib/utils.py
index 9eda5827e..9c6c5185c 100644
--- a/pufferlib/utils.py
+++ b/pufferlib/utils.py
@@ -62,73 +62,6 @@ def generate_safe_eval_ini(safe_eval_config, base_ini_path="pufferlib/config/oce
     return tmp_path
 
 
-def run_human_replay_eval_in_subprocess(config, logger, global_step):
-    """
-    Run human replay evaluation in a subprocess and log metrics to wandb.
-
-    """
-    try:
-        run_id = logger.run_id
-        model_dir = os.path.join(config["data_dir"], f"{config['env']}_{run_id}")
-        model_files = glob.glob(os.path.join(model_dir, "model_*.pt"))
-
-        if not model_files:
-            print("No model files found for human replay evaluation")
-            return
-
-        latest_cpt = max(model_files, key=os.path.getctime)
-
-        # Prepare evaluation command
-        eval_config = config["eval"]
-        cmd = [
-            sys.executable,
-            "-m",
-            "pufferlib.pufferl",
-            "eval",
-            config["env"],
-            "--load-model-path",
-            latest_cpt,
-            "--eval.wosac-realism-eval",
-            "False",
-            "--eval.human-replay-eval",
-            "True",
-            "--eval.human-replay-num-agents",
-            str(eval_config["human_replay_num_agents"]),
-            "--eval.human-replay-control-mode",
-            str(eval_config["human_replay_control_mode"]),
-        ]
-
-        # Run human replay evaluation in subprocess
-        result = subprocess.run(cmd, capture_output=True, text=True, timeout=600, cwd=os.getcwd())
-
-        if result.returncode == 0:
-            # Extract JSON from stdout between markers
-            stdout = result.stdout
-            if "HUMAN_REPLAY_METRICS_START" in stdout and "HUMAN_REPLAY_METRICS_END" in stdout:
-                start = stdout.find("HUMAN_REPLAY_METRICS_START") + len("HUMAN_REPLAY_METRICS_START")
-                end = stdout.find("HUMAN_REPLAY_METRICS_END")
-                json_str = stdout[start:end].strip()
-                human_replay_metrics = json.loads(json_str)
-
-                # Log to wandb if available
-                if hasattr(logger, "wandb") and logger.wandb:
-                    logger.wandb.log(
-                        {
-                            "eval/human_replay_collision_rate": human_replay_metrics["collision_rate"],
-                            "eval/human_replay_offroad_rate": human_replay_metrics["offroad_rate"],
-                            "eval/human_replay_completion_rate": human_replay_metrics["completion_rate"],
-                        },
-                        step=global_step,
-                    )
-        else:
-            print(f"Human replay evaluation failed with exit code {result.returncode}: {result.stderr}")
-
-    except subprocess.TimeoutExpired:
-        print("Human replay evaluation timed out")
-    except Exception as e:
-        print(f"Failed to run human replay evaluation: {e}")
-
-
 def run_wosac_eval_in_subprocess(config, logger, global_step):
     """
     Run WOSAC evaluation in a subprocess and log metrics to wandb.
@@ -225,178 +158,3 @@ def run_wosac_eval_in_subprocess(config, logger, global_step):
         print(f"WOSAC evaluation ran out of memory. Skipping this evaluation: {e}")
     except Exception as e:
         print(f"Failed to run WOSAC evaluation: {type(e).__name__}: {e}")
-
-
-def render_videos(
-    config,
-    run_id,
-    wandb_log,
-    epoch,
-    global_step,
-    bin_path,
-    render_async,
-    render_queue=None,
-    wandb_run=None,
-    config_path=None,
-    wandb_prefix="render",
-    num_maps=None,
-    map_dir=None,
-):
-    """Generate and log training videos using C-based rendering."""
-    if not os.path.exists(bin_path):
-        print(f"Binary weights file does not exist: {bin_path}")
-        return
-
-    model_dir = os.path.join(config["data_dir"], f"{config['env']}_{run_id}")
-
-    # Now call the C rendering function
-    try:
-        # Create output directory for videos
-        video_output_dir = os.path.join(model_dir, "videos")
-        os.makedirs(video_output_dir, exist_ok=True)
-
-        # TODO: Fix memory leaks so that this is not needed
-        # Suppress AddressSanitizer exit code (temp)
-        env_vars = os.environ.copy()
-        env_vars["ASAN_OPTIONS"] = "exitcode=0"
-
-        # Base command with only visualization flags (env config comes from INI)
-        base_cmd = ["xvfb-run", "-a", "-s", "-screen 0 1280x720x24", "./visualize"]
-
-        if config_path:
-            base_cmd.extend(["--config", config_path])
-
-        # Visualization config flags only
-        if config.get("show_grid", False):
-            base_cmd.append("--show-grid")
-        if config.get("obs_only", False):
-            base_cmd.append("--obs-only")
-        if config.get("show_lasers", False):
-            base_cmd.append("--lasers")
-        if config.get("show_human_logs", False):
-            base_cmd.append("--log-trajectories")
-        if config.get("zoom_in", False):
-            base_cmd.append("--zoom-in")
-
-        # Frame skip for rendering performance
-        frame_skip = config.get("frame_skip", 1)
-        if frame_skip > 1:
-            base_cmd.extend(["--frame-skip", str(frame_skip)])
-
-        # View mode
-        view_mode = config.get("view_mode", "both")
-        base_cmd.extend(["--view", view_mode])
-
-        if num_maps:
-            base_cmd.extend(["--num-maps", str(num_maps)])
-
-        base_cmd.extend(["--policy-name", bin_path])
-
-        # Handle single or multiple map rendering
-        render_maps = config.get("render_map", None)
-        if render_maps is None or render_maps == "none":
-            pass  # use map_dir passed as parameter
-            if map_dir and os.path.isdir(map_dir):
-                bin_files = [f for f in os.listdir(map_dir) if f.endswith(".bin")]
-                if bin_files:
-                    render_maps = [os.path.join(map_dir, random.choice(bin_files))]
-                else:
-                    print(f"Warning: No .bin files found in {map_dir}, skipping render")
-                    return
-            else:
-                print(f"Warning: map_dir not found or invalid ({map_dir}), skipping render")
-                return
-        elif isinstance(render_maps, (str, os.PathLike)):
-            render_maps = [render_maps]
-        else:
-            render_maps = list(render_maps)
-
-        generated_videos = {"output_topdown": [], "output_agent": []}
-        output_topdown = f"resources/drive/{wandb_prefix}_output_topdown_{epoch}"
-        output_agent = f"resources/drive/{wandb_prefix}_output_agent_{epoch}"
-
-        for i, map_path in enumerate(render_maps):
-            cmd = list(base_cmd)  # copy
-            if os.path.exists(map_path):
-                cmd.extend(["--map-name", str(map_path)])
-
-            output_topdown_map = output_topdown + (f"_map{i:02d}.mp4" if len(render_maps) > 1 else ".mp4")
-            output_agent_map = output_agent + (f"_map{i:02d}.mp4" if len(render_maps) > 1 else ".mp4")
-
-            cmd.extend(["--output-topdown", output_topdown_map])
-            cmd.extend(["--output-agent", output_agent_map])
-
-            print(f"Running render: {' '.join(cmd[:6])}...")
-            result = subprocess.run(cmd, cwd=os.getcwd(), capture_output=True, text=True, timeout=1200, env=env_vars)
-
-            vids_exist = os.path.exists(output_topdown_map) and os.path.exists(output_agent_map)
-            print(f"Render exit code: {result.returncode}, vids_exist: {vids_exist}")
-            if result.returncode != 0 and result.stderr:
-                print(f"Render stderr: {result.stderr[-500:]}")
-
-            if result.returncode == 0 or (result.returncode == 1 and vids_exist):
-                videos = [
-                    ("output_topdown", output_topdown_map, f"epoch_{epoch:06d}_map{i:02d}_topdown.mp4"),
-                    ("output_agent", output_agent_map, f"epoch_{epoch:06d}_map{i:02d}_agent.mp4"),
-                ]
-
-                for vid_type, source_vid, target_filename in videos:
-                    if os.path.exists(source_vid):
-                        target_path = os.path.join(video_output_dir, target_filename)
-                        shutil.move(source_vid, target_path)
-                        generated_videos[vid_type].append(target_path)
-                    else:
-                        print(f"Video generation completed but {source_vid} not found")
-                        if result.stdout:
-                            print(f"StdOUT: {result.stdout}")
-                        if result.stderr:
-                            print(f"StdERR: {result.stderr}")
-            else:
-                print(f"C rendering failed (map index {i}) with exit code {result.returncode}: {result.stderr}")
-
-        if render_async:
-            render_queue.put(
-                {
-                    "videos": generated_videos,
-                    "step": global_step,
-                    "prefix": wandb_prefix,
-                }
-            )
-        elif wandb_log and wandb_run:
-            import wandb
-
-            payload = {}
-            if generated_videos["output_topdown"]:
-                payload[f"{wandb_prefix}/world_state"] = [
-                    wandb.Video(p, format="mp4") for p in generated_videos["output_topdown"]
-                ]
-            if generated_videos["output_agent"]:
-                payload[f"{wandb_prefix}/agent_view"] = [
-                    wandb.Video(p, format="mp4") for p in generated_videos["output_agent"]
-                ]
-            if payload:
-                print(f"Logging {len(payload)} video keys to wandb: {list(payload.keys())}")
-                payload["train_step"] = global_step
-                wandb_run.log(payload)
-
-    except subprocess.TimeoutExpired:
-        print("C rendering timed out")
-    except Exception as e:
-        print(f"Failed to render videos: {e}")
-
-
-def render_videos_and_cleanup(cleanup_files=None, **render_kwargs):
-    """Wrapper that runs render_videos then cleans up temp files.
-
-    Intended as the target for multiprocessing.Process so that temp files
-    (bin weights, generated INI) are cleaned up inside the spawned process.
-    """
-    try:
-        render_videos(**render_kwargs)
-    finally:
-        for f in cleanup_files or []:
-            try:
-                if os.path.exists(f):
-                    os.remove(f)
-            except OSError:
-                pass

From 33b4c93287aec0cd0d7931efc6764c244cee82e0 Mon Sep 17 00:00:00 2001
From: Aditya Gupta <adigupta2602@gmail.com>
Date: Wed, 25 Mar 2026 16:32:39 -0400
Subject: [PATCH 04/23] Fixing bug with top-down render

---
 pufferlib/config/ocean/drive.ini |  8 ++---
 pufferlib/ocean/drive/drive.c    |  2 --
 pufferlib/ocean/drive/drive.h    | 50 +++-----------------------------
 pufferlib/ocean/drive/drive.py   |  1 -
 pufferlib/pufferl.py             |  8 +++--
 5 files changed, 14 insertions(+), 55 deletions(-)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index bc78f4bcb..11f9cd793 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -65,7 +65,7 @@ control_mode = "control_agents"
 ; Options: "create_all_valid", "create_only_controlled", "init_variable_agent_number"(creates random number of controlled agents per env)
 init_mode = "init_variable_agent_number"
 ; Below options only valid for "init_variable_agent_number" init_mode
-min_agents_per_env = 1
+min_agents_per_env = 31
 max_agents_per_env = 32
 ; Dimension Ranges for agents
 spawn_width_min = 1.5
@@ -272,14 +272,14 @@ acc = 1.0
 [render]
 ; Render a batch of maps offline using the in-process c_render (ffmpeg) pipeline.
 ; Path to the .bin map directory
-map_dir = "resources/drive/binaries/carla_2D"
+map_dir = "resources/drive/binaries/carla/carla_3D"
 ; Directory to write output mp4 files
 output_dir = "resources/drive/render_videos"
 ; Number of maps to render (capped at files available in map_dir)
-num_maps = 100
+num_maps = 3
 ; View mode: sim_state (top-down, origin-centered), zoom_out (top-down, full map bbox), bev (agent BEV obs), persp (third-person follow-cam)
 ; NOTE: "persp" and "bev" require active_agent_count > 0
-view_mode = "sim_state"
+view_mode = "bev"
 ; Whether to draw agent trajectory traces
 draw_traces = True
 ; Maximum number of frames to render per map
diff --git a/pufferlib/ocean/drive/drive.c b/pufferlib/ocean/drive/drive.c
index aaae593bd..d1a408b5a 100644
--- a/pufferlib/ocean/drive/drive.c
+++ b/pufferlib/ocean/drive/drive.c
@@ -300,8 +300,6 @@ int main(int argc, char *argv[]) {
                     view_mode = VIEW_MODE_BEV_AGENT_OBS;
                 } else if (strcmp(v, "persp") == 0) {
                     view_mode = VIEW_MODE_AGENT_PERSP;
-                } else if (strcmp(v, "zoom_out") == 0 || strcmp(v, "full_map") == 0) {
-                    view_mode = VIEW_MODE_SIM_STATE_ZOOMED_OUT;
                 } else {
                     fprintf(stderr, "Error: --view must be 'sim_state', 'bev', 'persp', or 'zoom_out'\n");
                     return 1;
diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index 33b8719ca..8181e60e9 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -21,9 +21,6 @@
 #define VIEW_MODE_SIM_STATE 0
 #define VIEW_MODE_BEV_AGENT_OBS 1
 #define VIEW_MODE_AGENT_PERSP 2
-#define VIEW_MODE_SIM_STATE_ZOOMED_OUT                                                                                 \
-    3 // Full map, camera centered on map bbox (matches old visualize.c zoom_in=false)
-
 // // Order of entities in rendering (lower is rendered first)
 // #define Z_ROAD_SURFACE 0.0f
 // #define Z_ROAD_MARKINGS 0.05f // Lane lines, road lines, traces
@@ -4086,20 +4083,20 @@ void draw_scene(Drive *env, Client *client, int mode, int obs_only, int lasers,
                 // Draw the agent bounding boxes
                 Color agent_color = GRAY;
                 if (is_expert) {
-                    if (agent[i].type == PEDESTRIAN || agent[i].type == CYCLIST)
+                    if (agent->type == PEDESTRIAN || agent->type == CYCLIST)
                         agent_color = EXPERT_REPLAY_SMALL;
                     else
                         agent_color = EXPERT_REPLAY;
                 }
                 if (is_active_agent) {
-                    if (agent[i].type == PEDESTRIAN)
+                    if (agent->type == PEDESTRIAN)
                         agent_color = LIGHT_ORANGE;
-                    else if (agent[i].type == CYCLIST)
+                    else if (agent->type == CYCLIST)
                         agent_color = LIGHT_PURPLE;
                     else
                         agent_color = BLUE;
                 }
-                if (is_active_agent && agent[i].collision_state > 0)
+                if (is_active_agent && agent->collision_state > 0)
                     agent_color = RED;
 
                 rlPushMatrix();
@@ -4356,45 +4353,6 @@ void c_render(Drive *env, int view_mode, int draw_traces) {
 
             draw_scene(env, client, 1, 0, 0, 0);
 
-        } else if (view_mode == VIEW_MODE_SIM_STATE_ZOOMED_OUT) {
-            // Orthographic bird's-eye view showing the full map, camera centered on the
-            // map bounding box — mirrors the old visualize.c renderTopDownView zoom_in=false path.
-            camera.position = (Vector3){env->grid_map->top_left_x, env->grid_map->bottom_right_y, 500.0f};
-            camera.target = (Vector3){env->grid_map->top_left_x, env->grid_map->bottom_right_y, 0.0f};
-            camera.up = (Vector3){0.0f, -1.0f, 0.0f};
-            camera.projection = CAMERA_ORTHOGRAPHIC;
-            camera.fovy = map_height * 2.0f; // 2× height shows the full map
-
-            BeginDrawing();
-            ClearBackground(ROAD_COLOR);
-            BeginMode3D(camera);
-
-            if (draw_traces) {
-                for (int i = 0; i < env->active_agent_count; i++) {
-                    int idx = env->active_agent_indices[i];
-                    for (int t = env->init_steps; t < env->episode_length; t++) {
-                        Color agent_color = LIGHTBLUE;
-                        if (env->agents[idx].type == PEDESTRIAN)
-                            agent_color = LIGHT_ORANGE;
-                        else if (env->agents[idx].type == CYCLIST)
-                            agent_color = LIGHT_PURPLE;
-                        DrawSphere((Vector3){env->agents[idx].log_trajectory_x[t], env->agents[idx].log_trajectory_y[t],
-                                             env->agents[idx].log_trajectory_z[t]},
-                                   0.15f, agent_color);
-                    }
-                }
-                for (int i = 0; i < env->expert_static_agent_count; i++) {
-                    int idx = env->expert_static_agent_indices[i];
-                    for (int t = env->init_steps; t < env->episode_length; t++) {
-                        DrawSphere((Vector3){env->agents[idx].log_trajectory_x[t], env->agents[idx].log_trajectory_y[t],
-                                             env->agents[idx].log_trajectory_z[t]},
-                                   0.15f, EXPERT_REPLAY);
-                    }
-                }
-            }
-
-            draw_scene(env, client, 1, 0, 0, 0);
-
         } else if (view_mode == VIEW_MODE_BEV_AGENT_OBS) {
             // Orthographic bird's-eye view centered on the selected agent,
             // showing only that agent's observations
diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py
index 94d2b8b6b..e650b4e38 100644
--- a/pufferlib/ocean/drive/drive.py
+++ b/pufferlib/ocean/drive/drive.py
@@ -14,7 +14,6 @@ class RenderView(IntEnum):
     FULL_SIM_STATE = 0  # Orthographic top-down, fully observable simulator state (zoomed in, origin-centered)
     BEV_AGENT_OBS = 1  # Orthographic top-down, only show what the selected agent can observe
     AGENT_PERSP = 2  # Third-person perspective following selected agent
-    FULL_SIM_STATE_ZOOMED_OUT = 3  # Orthographic top-down, full map bbox view (mirrors old visualize.c zoom_in=false)
 
 
 class Drive(pufferlib.PufferEnv):
diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py
index 3d3d3bb52..cce8bf5ad 100644
--- a/pufferlib/pufferl.py
+++ b/pufferlib/pufferl.py
@@ -1621,8 +1621,6 @@ def render(env_name, args=None):
         "bev": RenderView.BEV_AGENT_OBS,
         "agent": RenderView.BEV_AGENT_OBS,  # backward compat
         "persp": RenderView.AGENT_PERSP,
-        "zoom_out": RenderView.FULL_SIM_STATE_ZOOMED_OUT,
-        "full_map": RenderView.FULL_SIM_STATE_ZOOMED_OUT,  # backward compat alias
     }
     view_mode = _VIEW_MODE_MAP.get(view_mode_str)
     if view_mode is None:
@@ -1693,6 +1691,12 @@ def render_one_map(map_path):
                     lstm_c=torch.zeros(n, policy.hidden_size, device=device),
                 )
 
+            # Remove any stale mp4 with this map's name from a previous run so that
+            # the set-difference detection below doesn't miss the newly written file.
+            stale = os.path.join(os.getcwd(), f"{map_name}.mp4")
+            if os.path.exists(stale):
+                os.remove(stale)
+
             # Snapshot which mp4s exist before this rollout
             before = set(glob.glob(os.path.join(os.getcwd(), "*.mp4")))
 

From 07e64b8a763a8aff2e0b4ccd43bafb250ccb7907 Mon Sep 17 00:00:00 2001
From: Aditya Gupta <adigupta2602@gmail.com>
Date: Wed, 25 Mar 2026 16:56:16 -0400
Subject: [PATCH 05/23] Updating over latest 2.0 first

---
 pufferlib/config/ocean/drive.ini       |  2 +
 pufferlib/ocean/benchmark/evaluator.py | 82 +++++++++++++++++++++-----
 pufferlib/ocean/drive/binding.c        |  2 +
 pufferlib/ocean/drive/drive.h          | 54 ++++++++++++++---
 pufferlib/ocean/drive/drive.py         | 36 +++++++----
 pufferlib/ocean/env_binding.h          | 37 ++++++++++++
 pufferlib/pufferl.py                   |  2 +
 7 files changed, 183 insertions(+), 32 deletions(-)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index 11f9cd793..7e7113f10 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -188,6 +188,8 @@ num_eval_agents = 64
 self_play_eval = True
 ; If True, enable human replay evaluation (pair policy-controlled agent with human replays)
 human_replay_eval = False
+; Which env to render during eval. Options: "first" (by index), "worst_collision", "random"
+render_select_mode = "first"
 ; If True, render random scenarios. Note: Doing this frequency will slow down the training.
 render_human_replay_eval = False
 render_self_play_eval = True
diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py
index 9f821cf11..29c4bfe6b 100644
--- a/pufferlib/ocean/benchmark/evaluator.py
+++ b/pufferlib/ocean/benchmark/evaluator.py
@@ -830,6 +830,11 @@ class Evaluator:
 
     """
 
+    RENDER_FIRST = "first"
+    RENDER_RANDOM = "random"
+    RENDER_WORST_SCORE = "worst_score"
+    RENDER_WORST_COLLISION = "worst_collision"
+
     def __init__(self, configs, logger=None):
         self.configs = configs
         self.logger = logger
@@ -861,13 +866,65 @@ def _unpack_eval_configs(self, configs):
         self.sp_eval_config = copy.deepcopy(eval_config)
         self.sp_eval_config["env"]["control_mode"] = "control_agents"
         self.sp_eval_config["env"]["render_mode"] = 1 if self.render_sp_rollout else 0
+        self.render_select_mode = self.configs["eval"]["render_select_mode"]
+
+    def select_render_env(self, env_logs):
+        """Select which environment to render based on per-env rollout statistics.
+        Args:
+            env_logs: List of dicts, one per environment. Each dict contains
+                aggregated agent statistics (score, collision_rate, offroad_rate, etc.)
+                with 'n' being the number of controlled agents in that env.
+                Empty dicts indicate no data was collected for that env.
+
+        Returns:
+            int: Index of the environment to render.
+        """
+        mode = self.render_select_mode
+        if mode == self.RENDER_FIRST:
+            return 0
+        if mode == self.RENDER_RANDOM:
+            return np.random.randint(len(env_logs))
+
+        populated = [(i, log) for i, log in enumerate(env_logs) if log]
 
-    def rollout(self, policy, mode="self_play", render_rollout=False):
+        if not populated:
+            return 0
+
+        if mode == self.RENDER_WORST_SCORE:
+            return min(populated, key=lambda x: x[1].get("score", 1.0))[0]
+        elif mode == self.RENDER_WORST_COLLISION:
+            return max(populated, key=lambda x: x[1].get("collision_rate", 0.0))[0]
+        # Add other modes based on desiderata here
+        return 0
+
+    def rollout(self, policy, mode="self_play"):
         """Roll out the given policy in the specified eval env and collect statistics."""
 
         env = self.hr_env if mode == "human_replay" else self.sp_env
         render_eval = self.render_sp_rollout if mode == "self_play" else self.render_hr_rollout
         driver = env.driver_env
+
+        needs_stats_first = render_eval and self.render_select_mode not in (self.RENDER_FIRST, self.RENDER_RANDOM)
+
+        if needs_stats_first:
+            env_logs = self._run_rollout(policy, env, per_env_logs=True)
+            render_env_idx = self.select_render_env(env_logs)
+        else:
+            render_env_idx = self.select_render_env([{}] * driver.num_envs)
+
+        info_list = self._run_rollout(policy, env, render_env_idx if render_eval else None)
+
+        final_info = info_list[0] if info_list else {}
+        if mode == "self_play":
+            self.self_play_stats = final_info
+            self.self_play_stats["render_env_idx"] = render_env_idx
+        elif mode == "human_replay":
+            self.human_replay_stats = final_info
+            self.human_replay_stats["render_env_idx"] = render_env_idx
+
+    def _run_rollout(self, policy, env, render_env_idx=None, per_env_logs=False):
+        """Run a single rollout. If render_env_idx is not None, render that env."""
+        driver = env.driver_env
         num_agents = env.observation_space.shape[0]
         device = self.configs["train"]["device"]
 
@@ -882,10 +939,10 @@ def rollout(self, policy, mode="self_play", render_rollout=False):
                 lstm_c=torch.zeros(num_agents, policy.hidden_size, device=device),
             )
 
-        # Rollout
+        info_list = []
         for time_idx in range(self.sim_steps):
-            if render_rollout or render_eval:
-                driver.render()
+            if render_env_idx is not None:
+                driver.render(env_id=render_env_idx)
 
             # Get action from policy
             with torch.no_grad():
@@ -899,18 +956,12 @@ def rollout(self, policy, mode="self_play", render_rollout=False):
                 action_np = np.clip(action_np, env.action_space.low, env.action_space.high)
 
             # Step environment
-            obs, rewards, dones, truncs, info_list = env.step(action_np)
+            obs, rewards, dones, truncs, info_list = env.step(action_np, per_env_logs=per_env_logs)
 
             if truncs.all():
                 break
 
-        # Aggregate final statistics
-        final_info = info_list[0] if info_list else {}
-
-        if mode == "self_play":
-            self.self_play_stats = final_info
-        elif mode == "human_replay":
-            self.human_replay_stats = final_info
+        return info_list
 
     def log_videos(self, eval_mode, epoch):
         """Log all mp4s in local path to wandb after env close has flushed ffmpeg pipes."""
@@ -930,12 +981,13 @@ def log_videos(self, eval_mode, epoch):
             print("Warning: no render videos found in local path")
             return
 
+        render_mode = self.render_select_mode
         for p in video_files:
             scenario_id = os.path.splitext(os.path.basename(p))[0]
-            self.logger.wandb.log(
-                {f"render/{eval_mode}": wandb.Video(p, format="mp4", caption=f"scene_{scenario_id}_epoch_{epoch}")}
-            )
+            caption = f"scene_{scenario_id}_epoch_{epoch}_select_{render_mode}"
+            self.logger.wandb.log({f"render/{eval_mode}": wandb.Video(p, format="mp4", caption=caption)})
 
+        # Clean up
         for p in video_files:
             os.remove(p)
 
diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c
index 8de367de4..1db5ee0e9 100644
--- a/pufferlib/ocean/drive/binding.c
+++ b/pufferlib/ocean/drive/binding.c
@@ -264,6 +264,7 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) {
                 free(env->active_agent_indices);
                 free(env->static_agent_indices);
                 free(env->expert_static_agent_indices);
+                free(env->tracks_to_predict_indices);
                 free(env);
                 Py_DECREF(agent_offsets);
                 Py_DECREF(map_ids);
@@ -285,6 +286,7 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) {
             free(env->active_agent_indices);
             free(env->static_agent_indices);
             free(env->expert_static_agent_indices);
+            free(env->tracks_to_predict_indices);
             free(env);
             continue;
         }
diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index 8181e60e9..c6ecb8510 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -1,3 +1,4 @@
+#include <signal.h>
 #include <sys/types.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -2341,6 +2342,8 @@ void c_close(Drive *env) {
     free(env->expert_static_agent_indices);
     free(env->tracks_to_predict_indices);
     free(env->ini_file);
+    free(env->tracks_to_predict_indices);
+    env->tracks_to_predict_indices = NULL;
 }
 
 void allocate(Drive *env) {
@@ -3539,20 +3542,49 @@ struct Client {
     Vector3 default_camera_target;
     int recorder_pipefd[2];
     pid_t recorder_pid;
+    pid_t xvfb_pid;
+    int xvfb_display_num;
 };
 
 Client *make_client(Drive *env) {
     Client *client = (Client *)calloc(1, sizeof(Client));
 
     if (env->render_mode == RENDER_HEADLESS && getenv("DISPLAY") == NULL) {
-        // Start Xvfb and set DISPLAY
-        pid_t xvfb_pid = fork();
-        if (xvfb_pid == 0) {
-            execlp("Xvfb", "Xvfb", ":99", "-screen", "0", "1x1x24", NULL);
+
+        // Kill any existing Xvfb first
+        system("pkill -9 Xvfb");
+        usleep(200000);
+        unlink("/tmp/.X99-lock");
+        unlink("/tmp/.X11-unix/X99");
+
+        // Hardcode to single display because we only run this in one process at once
+        client->xvfb_display_num = 99;
+
+        // Clean up stale lock if process is dead
+        FILE *f = fopen("/tmp/.X99-lock", "r");
+        if (f) {
+            pid_t pid = -1;
+            fscanf(f, "%d", &pid);
+            fclose(f);
+            if (pid > 0 && kill(pid, 0) != 0)
+                unlink("/tmp/.X99-lock");
+        }
+
+        client->xvfb_pid = fork();
+        if (client->xvfb_pid == 0) {
+            close(STDOUT_FILENO);
+            close(STDERR_FILENO);
+            execlp("Xvfb", "Xvfb", ":99", "-screen", "0", "1280x720x24", "+extension", "GLX", "-ac", "-noreset", NULL);
             _exit(1);
         }
+
         setenv("DISPLAY", ":99", 1);
-        usleep(500000); // Give Xvfb 500ms to start
+        // Xvfb starts asynchronously after fork(), so we poll until it creates its
+        // lock file (max 2s) then wait an extra 200ms for GLX to finish initializing.
+        // Without this, raylib's InitWindow() would try to connect before Xvfb is ready.
+        for (int i = 0; i < 20 && access("/tmp/.X99-lock", F_OK) != 0; i++)
+            usleep(100000);
+        usleep(200000);
     }
 
     if (env->render_mode == RENDER_WINDOW) {
@@ -4552,10 +4584,18 @@ void close_client(Client *client) {
         close(client->recorder_pipefd[1]);
         waitpid(client->recorder_pid, NULL, 0);
     }
-    for (int i = 0; i < 6; i++) {
+    for (int i = 0; i < 6; i++)
         UnloadModel(client->cars[i]);
-    }
+    UnloadModel(client->cyclist);
+    UnloadModel(client->pedestrian);
     CloseWindow();
+    if (client->xvfb_pid > 0) {
+        kill(client->xvfb_pid, SIGTERM);
+        waitpid(client->xvfb_pid, NULL, 0);
+        unlink("/tmp/.X99-lock");
+        unsetenv("DISPLAY");
+    }
+
     free(client);
 }
 
diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py
index e650b4e38..c464fddb6 100644
--- a/pufferlib/ocean/drive/drive.py
+++ b/pufferlib/ocean/drive/drive.py
@@ -352,7 +352,7 @@ def __init__(
         self.map_ids = map_ids
         self.num_envs = num_envs
         super().__init__(buf=buf)
-        env_ids = []
+        self.env_ids = []
         for i in range(num_envs):
             cur = agent_offsets[i]
             nxt = agent_offsets[i + 1]
@@ -439,9 +439,9 @@ def __init__(
                 spawn_height=self.spawn_height,
                 render_mode=render_mode,
             )
-            env_ids.append(env_id)
+            self.env_ids.append(env_id)
 
-        self.c_envs = binding.vectorize(*env_ids)
+        self.c_envs = binding.vectorize(*self.env_ids)
 
     def reset(self, seed=0):
         binding.vec_reset(self.c_envs, seed)
@@ -516,7 +516,7 @@ def resample_maps(self):
         self.agent_offsets = agent_offsets
         self.map_ids = map_ids
         self.num_envs = num_envs
-        env_ids = []
+        self.env_ids = []
         seed = np.random.randint(0, 2**32 - 1)
         for i in range(num_envs):
             cur = agent_offsets[i]
@@ -603,22 +603,29 @@ def resample_maps(self):
                 spawn_height=self.spawn_height,
                 render_mode=self.render_mode,
             )
-            env_ids.append(env_id)
-        self.c_envs = binding.vectorize(*env_ids)
+            self.env_ids.append(env_id)
+        self.c_envs = binding.vectorize(*self.env_ids)
+
+        self.c_envs = binding.vectorize(*self.env_ids)
 
         binding.vec_reset(self.c_envs, seed)
         self.truncations[:] = 1
 
-    def step(self, actions):
+    def step(self, actions, per_env_logs=False):
         self.terminals[:] = 0
         self.actions[:] = actions
         binding.vec_step(self.c_envs)
         self.tick += 1
         info = []
         if self.tick % self.report_interval == 0:
-            log = binding.vec_log(self.c_envs, self.num_agents)
-            if log:
-                info.append(log)
+            if per_env_logs:  # Get the stats for every separate env
+                logs = self.get_env_logs()
+                if any(logs):
+                    info = logs
+            else:  # Default: Aggregate across vectorized envs
+                log = binding.vec_log(self.c_envs, self.num_agents)
+                if log:
+                    info.append(log)
 
         if self.tick > 0 and self.resample_frequency > 0 and self.tick % self.resample_frequency == 0:
             self.resample_maps()
@@ -751,6 +758,15 @@ def render(self, view_mode: RenderView = RenderView.FULL_SIM_STATE, draw_traces:
     def close(self):
         binding.vec_close(self.c_envs)
 
+    def env_log(self, env_idx):
+        """Get log statistics for a single environment."""
+        num_agents = self.agent_offsets[env_idx + 1] - self.agent_offsets[env_idx]
+        return binding.env_log(self.env_ids[env_idx], num_agents)
+
+    def get_env_logs(self):
+        """Get log statistics for all environments (unaggregated)."""
+        return [self.env_log(i) for i in range(self.num_envs)]
+
     @property
     def scenario_ids(self) -> list[str]:
         """Return scenario ID string for each env, stripping null padding."""
diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h
index a8db5d955..e6bc576c2 100644
--- a/pufferlib/ocean/env_binding.h
+++ b/pufferlib/ocean/env_binding.h
@@ -660,6 +660,43 @@ static PyObject *vec_log(PyObject *self, PyObject *args) {
     return dict;
 }
 
+static PyObject *env_log(PyObject *self, PyObject *args) {
+    int num_args = PyTuple_Size(args);
+    if (num_args != 2) {
+        PyErr_SetString(PyExc_TypeError, "env_log requires 2 arguments");
+        return NULL;
+    }
+
+    Env *env = unpack_env(args);
+    if (!env) {
+        return NULL;
+    }
+
+    // Aggregate this env's per-agent logs (same as vec_log but for one env)
+    // Note: breaks horribly if you don't use floats
+    Log aggregate = {0};
+    int num_keys = sizeof(Log) / sizeof(float);
+    for (int j = 0; j < num_keys; j++) {
+        ((float *)&aggregate)[j] += ((float *)&env->log)[j];
+    }
+
+    PyObject *dict = PyDict_New();
+    if (aggregate.n == 0.0f) {
+        return dict;
+    }
+
+    // Average across agents in env
+    float n = aggregate.n;
+    for (int i = 0; i < num_keys; i++) {
+        ((float *)&aggregate)[i] /= n;
+    }
+    aggregate.n = (float)env->active_agent_count;
+
+    my_log(dict, &aggregate);
+
+    return dict;
+}
+
 static PyObject *vec_close(PyObject *self, PyObject *args) {
     VecEnv *vec = unpack_vecenv(args);
     if (!vec) {
diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py
index cce8bf5ad..f9fa08e83 100644
--- a/pufferlib/pufferl.py
+++ b/pufferlib/pufferl.py
@@ -530,6 +530,8 @@ def train(self):
                 if human_replay_eval or self_play_eval or safe_eval:
                     self.evaluator.log_stats()
 
+                del self.evaluator
+
             if self.config["eval"]["wosac_realism_eval"]:
                 pufferlib.utils.run_wosac_eval_in_subprocess(self.config, self.logger, self.global_step)
 

From 7be0267456b569ed3826dbb37cb714617094cf3d Mon Sep 17 00:00:00 2001
From: Aditya Gupta <adigupta2602@gmail.com>
Date: Thu, 26 Mar 2026 13:49:55 -0400
Subject: [PATCH 06/23] Rebased over latest 3.0 with some minor additional
 changes

---
 pufferlib/config/ocean/drive.ini       |   5 +-
 pufferlib/ocean/benchmark/evaluator.py | 205 ++++++-------------------
 pufferlib/pufferl.py                   |  79 +---------
 3 files changed, 54 insertions(+), 235 deletions(-)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index 7e7113f10..b85f953ba 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -188,11 +188,14 @@ num_eval_agents = 64
 self_play_eval = True
 ; If True, enable human replay evaluation (pair policy-controlled agent with human replays)
 human_replay_eval = False
+; If True, enable safe eval (policy runs with fixed safe reward conditioning)
+safe_eval = False
 ; Which env to render during eval. Options: "first" (by index), "worst_collision", "random"
 render_select_mode = "first"
 ; If True, render random scenarios. Note: Doing this frequency will slow down the training.
 render_human_replay_eval = False
 render_self_play_eval = True
+render_safe_eval = False
 
 ; Number of scenarios to process per batch
 wosac_batch_size = 32
@@ -223,8 +226,6 @@ wosac_sanity_check = False
 wosac_aggregate_results = True
 ; Evaluation mode: "policy", "ground_truth"
 wosac_eval_mode = "policy"
-; If True, enable human replay evaluation (pair policy-controlled agent with human replays)
-human_replay_eval = False
 ; Control only the self-driving car
 human_replay_control_mode = "control_sdc_only"
 ; Number of scenarios for human replay evaluation equals the number of agents
diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py
index 29c4bfe6b..2971d95b6 100644
--- a/pufferlib/ocean/benchmark/evaluator.py
+++ b/pufferlib/ocean/benchmark/evaluator.py
@@ -820,14 +820,12 @@ def rollout(self, args, puffer_env, policy):
 
 
 class Evaluator:
-    """Evaluates policies in self_play or human_replay mode, with optional rendering.
+    """Evaluates policies in self_play, human_replay, or safe_eval mode, with optional rendering.
 
     Initializes the eval envs needed based on eval config flags:
-    - human_replay_eval: creates sp_env + hr_env
-    - render_eval: creates sp_env (if not already created)
-
-
-
+    - human_replay_eval: creates hr_env (control_sdc_only)
+    - self_play_eval: creates sp_env (control_agents)
+    - safe_eval: creates safe_env (control_agents + fixed reward conditioning)
     """
 
     RENDER_FIRST = "first"
@@ -841,8 +839,10 @@ def __init__(self, configs, logger=None):
         self.sim_steps = 90
         self.self_play_stats = None
         self.human_replay_stats = None
+        self.safe_eval_stats = None
         self.sp_env = None
         self.hr_env = None
+        self.safe_env = None
 
         self._unpack_eval_configs(configs)
 
@@ -858,6 +858,7 @@ def _unpack_eval_configs(self, configs):
 
         self.render_sp_rollout = self.configs["eval"]["render_self_play_eval"]
         self.render_hr_rollout = self.configs["eval"]["render_human_replay_eval"]
+        self.render_safe_rollout = self.configs["eval"].get("render_safe_eval", False)
 
         self.hr_eval_config = copy.deepcopy(eval_config)
         self.hr_eval_config["env"]["control_mode"] = "control_sdc_only"
@@ -866,6 +867,33 @@ def _unpack_eval_configs(self, configs):
         self.sp_eval_config = copy.deepcopy(eval_config)
         self.sp_eval_config["env"]["control_mode"] = "control_agents"
         self.sp_eval_config["env"]["render_mode"] = 1 if self.render_sp_rollout else 0
+
+        # safe_eval: control_agents like self_play, but with fixed reward conditioning
+        # from [safe_eval] config section (map_dir, num_agents, reward bounds, etc.)
+        safe_cfg = configs.get("safe_eval", {})
+        self.safe_eval_config = copy.deepcopy(eval_config)
+        self.safe_eval_config["env"]["control_mode"] = "control_agents"
+        self.safe_eval_config["env"]["render_mode"] = 1 if self.render_safe_rollout else 0
+        self.safe_eval_config["env"]["reward_randomization"] = 1
+        self.safe_eval_config["env"]["reward_conditioning"] = 1
+        self.safe_eval_config["env"]["resample_frequency"] = 0
+        # Override map/episode settings from [safe_eval] section
+        for key in ("map_dir", "num_maps", "min_goal_distance", "max_goal_distance", "num_agents", "episode_length"):
+            if key in safe_cfg:
+                self.safe_eval_config["env"][key] = safe_cfg[key]
+        # Pin reward bounds: set min=max for each conditioning value
+        import re
+
+        valid_bounds = {
+            re.match(r"reward_bound_(.+)_min$", k).group(1)
+            for k in eval_config["env"]
+            if re.match(r"reward_bound_(.+)_min$", k)
+        }
+        for key, val in safe_cfg.items():
+            if key in valid_bounds:
+                self.safe_eval_config["env"][f"reward_bound_{key}_min"] = float(val)
+                self.safe_eval_config["env"][f"reward_bound_{key}_max"] = float(val)
+
         self.render_select_mode = self.configs["eval"]["render_select_mode"]
 
     def select_render_env(self, env_logs):
@@ -900,8 +928,15 @@ def select_render_env(self, env_logs):
     def rollout(self, policy, mode="self_play"):
         """Roll out the given policy in the specified eval env and collect statistics."""
 
-        env = self.hr_env if mode == "human_replay" else self.sp_env
-        render_eval = self.render_sp_rollout if mode == "self_play" else self.render_hr_rollout
+        if mode == "human_replay":
+            env = self.hr_env
+            render_eval = self.render_hr_rollout
+        elif mode == "safe_eval":
+            env = self.safe_env
+            render_eval = self.render_safe_rollout
+        else:  # self_play
+            env = self.sp_env
+            render_eval = self.render_sp_rollout
         driver = env.driver_env
 
         needs_stats_first = render_eval and self.render_select_mode not in (self.RENDER_FIRST, self.RENDER_RANDOM)
@@ -921,6 +956,9 @@ def rollout(self, policy, mode="self_play"):
         elif mode == "human_replay":
             self.human_replay_stats = final_info
             self.human_replay_stats["render_env_idx"] = render_env_idx
+        elif mode == "safe_eval":
+            self.safe_eval_stats = final_info
+            self.safe_eval_stats["render_env_idx"] = render_env_idx
 
     def _run_rollout(self, policy, env, render_env_idx=None, per_env_logs=False):
         """Run a single rollout. If render_env_idx is not None, render that env."""
@@ -1004,156 +1042,11 @@ def log_stats(self):
             eval_stats["eval/sp_collision_rate"] = self.self_play_stats["collision_rate"]
             eval_stats["eval/sp_score"] = self.self_play_stats["score"]
             eval_stats["eval/num_agents"] = self.self_play_stats["n"]
+        if self.safe_eval_stats is not None:
+            eval_stats["eval/safe_collision_rate"] = self.safe_eval_stats["collision_rate"]
+            eval_stats["eval/safe_score"] = self.safe_eval_stats["score"]
 
         if not eval_stats:
             return
 
         self.logger.wandb.log(eval_stats)
-
-
-class SafeEvaluator:
-    """Evaluates policies with fixed safe/law-abiding reward conditioning.
-
-    Runs the policy with deterministic reward bounds (reward_randomization=1,
-    min=max for each bound) so the conditioning observation vector is fixed to
-    safe driving values. Collects metrics over multiple episodes.
-
-    Re-parses the INI config internally so it doesn't need the full training
-    args dict — only the env_name, safe_eval config, and device.
-    """
-
-    def __init__(self, env_name: str, safe_eval_config: Dict, device="cuda", logger=None):
-        self.env_name = env_name
-        self.logger = logger
-        self.safe_eval_config = safe_eval_config
-        self.num_episodes = safe_eval_config.get("num_episodes", 100)
-        self.num_agents = safe_eval_config.get("num_agents", 64)
-        self.episode_length = safe_eval_config.get("episode_length", 1000)
-        if isinstance(device, int):
-            device = f"cuda:{device}"
-        self.device = device
-        self.stats = None
-
-    def _build_eval_env_config(self):
-        """Build env config with safe reward conditioning values applied.
-
-        Re-parses the INI file to get a fresh full config, then applies
-        safe_eval overrides for env, vec, and reward bounds.
-        """
-        import re
-        from pufferlib.pufferl import load_config
-
-        # Re-parse INI to get full config (env, vec, policy, rnn, etc.)
-        import sys
-
-        original_argv = sys.argv
-        sys.argv = ["pufferl"]
-        try:
-            eval_config = load_config(self.env_name)
-        finally:
-            sys.argv = original_argv
-
-        eval_config["vec"] = dict(backend="PufferEnv", num_envs=1)
-        eval_config["train"]["device"] = self.device
-        eval_config["env"]["num_agents"] = self.num_agents
-        eval_config["env"]["episode_length"] = self.episode_length
-        eval_config["env"]["resample_frequency"] = 0
-        eval_config["env"]["reward_randomization"] = 1
-        eval_config["env"]["reward_conditioning"] = 1
-
-        # Apply safe_eval overrides for map_dir, goal distances, etc.
-        for override_key in ("map_dir", "num_maps", "min_goal_distance", "max_goal_distance"):
-            if override_key in self.safe_eval_config:
-                eval_config["env"][override_key] = self.safe_eval_config[override_key]
-
-        # Discover valid reward bound names from env config
-        valid_bounds = set()
-        for key in eval_config["env"]:
-            m = re.match(r"reward_bound_(.+)_min$", key)
-            if m:
-                valid_bounds.add(m.group(1))
-
-        # Set min=max for each reward bound to fix the conditioning values
-        for key, val in self.safe_eval_config.items():
-            if key not in valid_bounds:
-                continue
-            eval_config["env"][f"reward_bound_{key}_min"] = float(val)
-            eval_config["env"][f"reward_bound_{key}_max"] = float(val)
-
-        return eval_config
-
-    def evaluate(self, vecenv, policy):
-        """Run evaluation with safe reward conditioning and collect metrics.
-
-        Args:
-            vecenv: Vectorized environment (created with safe eval config)
-            policy: Trained policy to evaluate
-
-        Returns:
-            dict: Averaged metrics over collected episodes
-        """
-        from collections import defaultdict
-
-        policy.eval()
-        num_agents = vecenv.observation_space.shape[0]
-        use_rnn = hasattr(policy, "hidden_size")
-
-        ob, _ = vecenv.reset()
-        state = {}
-        dones = torch.zeros(num_agents, device=self.device)
-        prev_rewards = torch.zeros(num_agents, device=self.device)
-        if use_rnn:
-            state = dict(
-                lstm_h=torch.zeros(num_agents, policy.hidden_size, device=self.device),
-                lstm_c=torch.zeros(num_agents, policy.hidden_size, device=self.device),
-            )
-
-        all_stats = defaultdict(list)
-        episodes_collected = 0
-        max_steps = (self.num_episodes // max(num_agents, 1) + 2) * self.episode_length
-
-        for step in range(max_steps):
-            if episodes_collected >= self.num_episodes:
-                break
-
-            with torch.no_grad():
-                ob_t = torch.as_tensor(ob).to(self.device)
-                if use_rnn:
-                    state["reward"] = prev_rewards
-                    state["done"] = dones
-                logits, value = policy.forward_eval(ob_t, state)
-                action, logprob, _ = pufferlib.pytorch.sample_logits(logits)
-                action = action.cpu().numpy().reshape(vecenv.action_space.shape)
-
-            if isinstance(logits, torch.distributions.Normal):
-                action = np.clip(action, vecenv.action_space.low, vecenv.action_space.high)
-
-            ob, rewards, terminals, truncations, infos = vecenv.step(action)
-            prev_rewards = torch.as_tensor(rewards).float().to(self.device)
-            dones = torch.as_tensor(np.maximum(terminals, truncations)).float().to(self.device)
-
-            for entry in infos:
-                if isinstance(entry, dict):
-                    episodes_collected += int(entry.get("n", 1))
-                    for k, v in entry.items():
-                        try:
-                            float(v)
-                            all_stats[k].append(v)
-                        except (TypeError, ValueError):
-                            pass
-
-        self.stats = {k: float(np.mean(v)) for k, v in all_stats.items() if len(v) > 0}
-        return self.stats
-
-    def log_stats(self, global_step=None):
-        """Log collected metrics to wandb."""
-        if self.stats is None:
-            return
-
-        if not (self.logger and hasattr(self.logger, "wandb") and self.logger.wandb):
-            return
-
-        payload = {f"eval/safe_{k}": v for k, v in self.stats.items()}
-        if global_step is not None:
-            payload["train_step"] = global_step
-        self.logger.wandb.log(payload)
diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py
index f9fa08e83..1c76a221d 100644
--- a/pufferlib/pufferl.py
+++ b/pufferlib/pufferl.py
@@ -509,7 +509,7 @@ def train(self):
             if self.epoch % self.config["eval"]["eval_interval"] == 0 or done_training:
                 human_replay_eval = self.config["eval"]["human_replay_eval"]
                 self_play_eval = self.config["eval"]["self_play_eval"]
-                safe_eval = self.config["eval"]["safe_eval"]
+                safe_eval = self.config.get("safe_eval", {}).get("enabled", False)
 
                 self.evaluator = Evaluator(self.full_args, self.logger)
                 if human_replay_eval:
@@ -525,7 +525,7 @@ def train(self):
                 if safe_eval:
                     self.evaluator.safe_env = load_env("puffer_drive", self.evaluator.safe_eval_config)
                     self.evaluator.rollout(self.uncompiled_policy, mode="safe_eval")
-                    self.evaluator.sp_env.close()
+                    self.evaluator.safe_env.close()
                     self.evaluator.log_videos(eval_mode="safe_eval", epoch=self.epoch)
                 if human_replay_eval or self_play_eval or safe_eval:
                     self.evaluator.log_stats()
@@ -540,81 +540,6 @@ def train(self):
         ):
             pufferlib.utils.run_human_replay_eval_in_subprocess(self.config, self.logger, self.global_step)
 
-    def _run_safe_eval(self):
-        """Run safe eval in-process using SafeEvaluator, then render videos."""
-        import copy
-        import traceback
-
-        vecenv = None
-        bin_path = None
-        safe_ini_path = None
-        render_handed_off = False
-        try:
-            from pufferlib.ocean.benchmark.evaluator import SafeEvaluator
-
-            self.msg = "Running safe eval..."
-            env_name = self.config["env"]
-            safe_eval_config = self.config.get("safe_eval", {})
-            evaluator = SafeEvaluator(env_name, safe_eval_config, device=self.config["device"], logger=self.logger)
-            eval_config = evaluator._build_eval_env_config()
-
-            vecenv = load_env(env_name, eval_config)
-            policy = load_policy(eval_config, vecenv, env_name)
-
-            # Copy weights from in-memory policy (no checkpoint dependency)
-            state_dict = copy.deepcopy(self.uncompiled_policy.state_dict())
-            # Strip DDP "module." prefix if present
-            state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
-            policy.load_state_dict(state_dict)
-
-            metrics = evaluator.evaluate(vecenv, policy)
-            evaluator.log_stats(global_step=self.global_step)
-
-            self.msg = f"Safe eval: {len(metrics)} metrics logged"
-
-            self.msg = "Spawning safe eval render..."
-            model_dir = os.path.join(self.config["data_dir"], f"{self.config['env']}_{self.logger.run_id}")
-            bin_path = f"{model_dir}_safe_eval_epoch_{self.epoch:06d}.bin"
-
-            export(
-                args={"env_name": env_name, "load_model_path": "unused", **self.config},
-                env_name=env_name,
-                vecenv=self.vecenv,
-                policy=self.uncompiled_policy,
-                path=bin_path,
-                silent=True,
-            )
-
-            safe_ini_path = pufferlib.utils.generate_safe_eval_ini(safe_eval_config)
-
-            self._render_videos(
-                bin_path=bin_path,
-                num_maps=safe_eval_config.get("num_maps"),
-                map_dir=safe_eval_config.get("map_dir"),
-                wandb_prefix="eval",
-                config_path=safe_ini_path,
-                cleanup_files=[bin_path, safe_ini_path],
-            )
-            render_handed_off = True
-            self.msg = f"Safe eval complete: {len(metrics)} metrics logged"
-
-        except Exception as e:
-            self.msg = f"Safe eval failed: {e}"
-            traceback.print_exc(file=sys.stderr)
-        finally:
-            if vecenv is not None:
-                try:
-                    vecenv.close()
-                except Exception:
-                    pass
-            if not render_handed_off:
-                for f in [bin_path, safe_ini_path]:
-                    if f and os.path.exists(f):
-                        try:
-                            os.remove(f)
-                        except OSError:
-                            pass
-
     def mean_and_log(self):
         config = self.config
         for k in list(self.stats.keys()):

From 615fcde4f11937c3e48b715d76ce237007cb2f7c Mon Sep 17 00:00:00 2001
From: Aditya Gupta <adigupta2602@gmail.com>
Date: Thu, 26 Mar 2026 14:14:49 -0400
Subject: [PATCH 07/23] Adding some render options

---
 pufferlib/config/ocean/drive.ini       |   1 +
 pufferlib/ocean/benchmark/evaluator.py |  25 ++--
 pufferlib/ocean/drive/drive.h          |   3 +-
 pufferlib/pufferl.py                   | 185 +++++++++++++------------
 4 files changed, 115 insertions(+), 99 deletions(-)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index b85f953ba..09456de2d 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -281,6 +281,7 @@ output_dir = "resources/drive/render_videos"
 ; Number of maps to render (capped at files available in map_dir)
 num_maps = 3
 ; View mode: sim_state (top-down, origin-centered), zoom_out (top-down, full map bbox), bev (agent BEV obs), persp (third-person follow-cam)
+; Multi-view: "both" = sim_state + persp, "all" = sim_state + persp + bev (runs a separate rollout per view)
 ; NOTE: "persp" and "bev" require active_agent_count > 0
 view_mode = "bev"
 ; Whether to draw agent trajectory traces
diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py
index 2971d95b6..60414959e 100644
--- a/pufferlib/ocean/benchmark/evaluator.py
+++ b/pufferlib/ocean/benchmark/evaluator.py
@@ -877,8 +877,10 @@ def _unpack_eval_configs(self, configs):
         self.safe_eval_config["env"]["reward_randomization"] = 1
         self.safe_eval_config["env"]["reward_conditioning"] = 1
         self.safe_eval_config["env"]["resample_frequency"] = 0
+        self.safe_eval_config["env"]["report_interval"] = 1  # ensure logs fire within 90-step rollout
         # Override map/episode settings from [safe_eval] section
-        for key in ("map_dir", "num_maps", "min_goal_distance", "max_goal_distance", "num_agents", "episode_length"):
+        # Note: episode_length is intentionally NOT overridden here — eval rollout is always 91 steps
+        for key in ("map_dir", "num_maps", "min_goal_distance", "max_goal_distance", "num_agents"):
             if key in safe_cfg:
                 self.safe_eval_config["env"][key] = safe_cfg[key]
         # Pin reward bounds: set min=max for each conditioning value
@@ -1036,15 +1038,22 @@ def log_stats(self):
         eval_stats = {}
 
         if self.human_replay_stats is not None:
-            eval_stats["eval/hr_collision_rate"] = self.human_replay_stats["collision_rate"]
-            eval_stats["eval/hr_score"] = self.human_replay_stats["score"]
+            if "collision_rate" in self.human_replay_stats:
+                eval_stats["eval/hr_collision_rate"] = self.human_replay_stats["collision_rate"]
+            if "score" in self.human_replay_stats:
+                eval_stats["eval/hr_score"] = self.human_replay_stats["score"]
         if self.self_play_stats is not None:
-            eval_stats["eval/sp_collision_rate"] = self.self_play_stats["collision_rate"]
-            eval_stats["eval/sp_score"] = self.self_play_stats["score"]
-            eval_stats["eval/num_agents"] = self.self_play_stats["n"]
+            if "collision_rate" in self.self_play_stats:
+                eval_stats["eval/sp_collision_rate"] = self.self_play_stats["collision_rate"]
+            if "score" in self.self_play_stats:
+                eval_stats["eval/sp_score"] = self.self_play_stats["score"]
+            if "n" in self.self_play_stats:
+                eval_stats["eval/num_agents"] = self.self_play_stats["n"]
         if self.safe_eval_stats is not None:
-            eval_stats["eval/safe_collision_rate"] = self.safe_eval_stats["collision_rate"]
-            eval_stats["eval/safe_score"] = self.safe_eval_stats["score"]
+            if "collision_rate" in self.safe_eval_stats:
+                eval_stats["eval/safe_collision_rate"] = self.safe_eval_stats["collision_rate"]
+            if "score" in self.safe_eval_stats:
+                eval_stats["eval/safe_score"] = self.safe_eval_stats["score"]
 
         if not eval_stats:
             return
diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index c6ecb8510..5c9127727 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -2341,9 +2341,8 @@ void c_close(Drive *env) {
     free(env->static_agent_indices);
     free(env->expert_static_agent_indices);
     free(env->tracks_to_predict_indices);
-    free(env->ini_file);
-    free(env->tracks_to_predict_indices);
     env->tracks_to_predict_indices = NULL;
+    free(env->ini_file);
 }
 
 void allocate(Drive *env) {
diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py
index 1c76a221d..6709ea066 100644
--- a/pufferlib/pufferl.py
+++ b/pufferlib/pufferl.py
@@ -1541,17 +1541,22 @@ def render(env_name, args=None):
     except KeyError as e:
         raise pufferlib.APIUsageError(f"Missing render config: {e}")
 
-    # Map config string → RenderView enum
+    # Map config string → list of RenderView enums to render
+    # "both" = sim_state + persp, "all" = sim_state + persp + bev
     _VIEW_MODE_MAP = {
-        "sim_state": RenderView.FULL_SIM_STATE,
-        "topdown": RenderView.FULL_SIM_STATE,  # backward compat
-        "bev": RenderView.BEV_AGENT_OBS,
-        "agent": RenderView.BEV_AGENT_OBS,  # backward compat
-        "persp": RenderView.AGENT_PERSP,
+        "sim_state": [RenderView.FULL_SIM_STATE],
+        "topdown": [RenderView.FULL_SIM_STATE],  # backward compat
+        "bev": [RenderView.BEV_AGENT_OBS],
+        "agent": [RenderView.BEV_AGENT_OBS],  # backward compat
+        "persp": [RenderView.AGENT_PERSP],
+        "both": [RenderView.FULL_SIM_STATE, RenderView.AGENT_PERSP],
+        "all": [RenderView.FULL_SIM_STATE, RenderView.AGENT_PERSP, RenderView.BEV_AGENT_OBS],
     }
-    view_mode = _VIEW_MODE_MAP.get(view_mode_str)
-    if view_mode is None:
-        raise pufferlib.APIUsageError(f"Unknown view_mode '{view_mode_str}'. Choose from: sim_state, bev, persp")
+    view_modes = _VIEW_MODE_MAP.get(view_mode_str)
+    if view_modes is None:
+        raise pufferlib.APIUsageError(
+            f"Unknown view_mode '{view_mode_str}'. Choose from: sim_state, bev, persp, both, all"
+        )
 
     bin_files = sorted(f for f in os.listdir(map_dir) if f.endswith(".bin"))
     if num_maps > len(bin_files):
@@ -1569,90 +1574,92 @@ def render(env_name, args=None):
     device = configured_device
 
     def render_one_map(map_path):
-        """Render a single map file and move the resulting mp4(s) to output_dir.
+        """Render a single map file in one or more view modes, moving resulting mp4(s) to output_dir.
 
-        The map binary is symlinked into a private temp directory so the C
-        loader sees exactly one .bin file (num_maps=1). After env.close()
-        finalizes the ffmpeg pipe, we glob for new *.mp4 files in cwd and
-        move them — no fragile scenario_id filename prediction needed.
+        Each view mode runs a separate rollout so that each gets its own ffmpeg
+        pipe and output file.  Output files are named {scenario_id}_{view}.mp4
+        when multiple views are requested, or {scenario_id}.mp4 for a single view.
         """
         map_name = os.path.splitext(os.path.basename(map_path))[0]
 
-        with tempfile.TemporaryDirectory() as tmp_map_dir:
-            # Hard-link (or copy) the single .bin into an isolated dir
-            tmp_bin = os.path.join(tmp_map_dir, os.path.basename(map_path))
-            shutil.copy2(map_path, tmp_bin)
-
-            env_overrides = {
-                **args["env"],
-                "num_maps": 1,
-                "map_dir": tmp_map_dir,
-                "render_mode": 1,  # headless ffmpeg → writes {scenario_id}.mp4 in cwd
-            }
-            # Override init/control modes if [render] section specifies them.
-            # carla_2D logged-trajectory maps need create_all_valid + control_vehicles;
-            # the training default (init_variable_agent_number) spawns no agents on these maps.
-            if render_init_mode is not None:
-                env_overrides["init_mode"] = render_init_mode
-            if render_control_mode is not None:
-                env_overrides["control_mode"] = render_control_mode
-
-            map_args = {
-                **args,
-                "env": env_overrides,
-                "vec": {"backend": "Serial", "num_envs": 1},
-            }
-
-            env = load_env(env_name, map_args)
-            policy = load_policy(map_args, env, env_name)
-            policy.eval()
-
-            ob, _ = env.reset()
-            driver = env.driver_env
-
-            state = {}
-            if map_args["train"]["use_rnn"]:
-                n = env.agents_per_batch if hasattr(env, "agents_per_batch") else ob.shape[0]
-                state = dict(
-                    lstm_h=torch.zeros(n, policy.hidden_size, device=device),
-                    lstm_c=torch.zeros(n, policy.hidden_size, device=device),
-                )
-
-            # Remove any stale mp4 with this map's name from a previous run so that
-            # the set-difference detection below doesn't miss the newly written file.
-            stale = os.path.join(os.getcwd(), f"{map_name}.mp4")
-            if os.path.exists(stale):
-                os.remove(stale)
-
-            # Snapshot which mp4s exist before this rollout
-            before = set(glob.glob(os.path.join(os.getcwd(), "*.mp4")))
-
-            for _ in range(max_frames):
-                driver.render(view_mode=view_mode, draw_traces=draw_traces)
-
-                with torch.no_grad():
-                    ob_t = torch.as_tensor(ob).to(device)
-                    logits, _ = policy.forward_eval(ob_t, state)
-                    action, _, _ = pufferlib.pytorch.sample_logits(logits)
-                    action = action.cpu().numpy().reshape(env.action_space.shape)
-
-                ob, _, done, truncated, _ = env.step(action)
-                if done.all() or truncated.all():
-                    break
-
-            # env.close() finalizes the ffmpeg pipe and flushes the mp4 to disk
-            env.close()
-
-            # Move any new mp4s that appeared since before the rollout
-            after = set(glob.glob(os.path.join(os.getcwd(), "*.mp4")))
-            new_mp4s = after - before
-            if new_mp4s:
-                for src in sorted(new_mp4s):
-                    dst = os.path.join(output_dir, os.path.basename(src))
-                    shutil.move(src, dst)
-                    print(f"  Saved {dst}")
-            else:
-                print(f"  Warning: no mp4 produced for map {map_name}")
+        # View-mode suffix labels used in output filenames
+        _VIEW_SUFFIX = {
+            RenderView.FULL_SIM_STATE: "sim_state",
+            RenderView.AGENT_PERSP: "persp",
+            RenderView.BEV_AGENT_OBS: "bev",
+        }
+        multi = len(view_modes) > 1
+
+        for view_mode in view_modes:
+            with tempfile.TemporaryDirectory() as tmp_map_dir:
+                tmp_bin = os.path.join(tmp_map_dir, os.path.basename(map_path))
+                shutil.copy2(map_path, tmp_bin)
+
+                env_overrides = {
+                    **args["env"],
+                    "num_maps": 1,
+                    "map_dir": tmp_map_dir,
+                    "render_mode": 1,  # headless ffmpeg → writes {scenario_id}.mp4 in cwd
+                }
+                if render_init_mode is not None:
+                    env_overrides["init_mode"] = render_init_mode
+                if render_control_mode is not None:
+                    env_overrides["control_mode"] = render_control_mode
+
+                map_args = {
+                    **args,
+                    "env": env_overrides,
+                    "vec": {"backend": "Serial", "num_envs": 1},
+                }
+
+                env = load_env(env_name, map_args)
+                policy = load_policy(map_args, env, env_name)
+                policy.eval()
+
+                ob, _ = env.reset()
+                driver = env.driver_env
+
+                state = {}
+                if map_args["train"]["use_rnn"]:
+                    n = env.agents_per_batch if hasattr(env, "agents_per_batch") else ob.shape[0]
+                    state = dict(
+                        lstm_h=torch.zeros(n, policy.hidden_size, device=device),
+                        lstm_c=torch.zeros(n, policy.hidden_size, device=device),
+                    )
+
+                # Remove any stale mp4 from a previous run
+                stale = os.path.join(os.getcwd(), f"{map_name}.mp4")
+                if os.path.exists(stale):
+                    os.remove(stale)
+
+                before = set(glob.glob(os.path.join(os.getcwd(), "*.mp4")))
+
+                for _ in range(max_frames):
+                    driver.render(view_mode=view_mode, draw_traces=draw_traces)
+
+                    with torch.no_grad():
+                        ob_t = torch.as_tensor(ob).to(device)
+                        logits, _ = policy.forward_eval(ob_t, state)
+                        action, _, _ = pufferlib.pytorch.sample_logits(logits)
+                        action = action.cpu().numpy().reshape(env.action_space.shape)
+
+                    ob, _, done, truncated, _ = env.step(action)
+                    if done.all() or truncated.all():
+                        break
+
+                env.close()
+
+                after = set(glob.glob(os.path.join(os.getcwd(), "*.mp4")))
+                new_mp4s = after - before
+                if new_mp4s:
+                    for src in sorted(new_mp4s):
+                        base = os.path.splitext(os.path.basename(src))[0]
+                        suffix = f"_{_VIEW_SUFFIX[view_mode]}" if multi else ""
+                        dst = os.path.join(output_dir, f"{base}{suffix}.mp4")
+                        shutil.move(src, dst)
+                        print(f"  Saved {dst}")
+                else:
+                    print(f"  Warning: no mp4 produced for map {map_name} view {_VIEW_SUFFIX[view_mode]}")
 
     if render_maps:
         print(f"Rendering {len(render_maps)} map(s) from {map_dir} → {output_dir} ...")

From 54f2792910c44cb53cc238e17be7979237798d87 Mon Sep 17 00:00:00 2001
From: Aditya Gupta <adigupta2602@gmail.com>
Date: Thu, 26 Mar 2026 14:25:08 -0400
Subject: [PATCH 08/23] Adding some render options

---
 pufferlib/config/ocean/drive.ini       |  3 ++
 pufferlib/ocean/benchmark/evaluator.py | 75 +++++++++++++++++++++++---
 2 files changed, 70 insertions(+), 8 deletions(-)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index 09456de2d..0d0f11064 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -192,6 +192,9 @@ human_replay_eval = False
 safe_eval = False
 ; Which env to render during eval. Options: "first" (by index), "worst_collision", "random"
 render_select_mode = "first"
+; View mode(s) for eval rendering. Options: sim_state, bev, persp, both (sim_state+persp), all (sim_state+persp+bev)
+; Multi-view options run a separate rollout per view, each producing its own wandb video under render/{mode}/{view}
+render_view_mode = "sim_state"
 ; If True, render random scenarios. Note: Doing this frequency will slow down the training.
 render_human_replay_eval = False
 render_self_play_eval = True
diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py
index 60414959e..8ddff8a39 100644
--- a/pufferlib/ocean/benchmark/evaluator.py
+++ b/pufferlib/ocean/benchmark/evaluator.py
@@ -847,6 +847,23 @@ def __init__(self, configs, logger=None):
         self._unpack_eval_configs(configs)
 
     def _unpack_eval_configs(self, configs):
+        from pufferlib.ocean.drive.drive import RenderView
+
+        _VIEW_MODE_MAP = {
+            "sim_state": [RenderView.FULL_SIM_STATE],
+            "topdown": [RenderView.FULL_SIM_STATE],
+            "bev": [RenderView.BEV_AGENT_OBS],
+            "agent": [RenderView.BEV_AGENT_OBS],
+            "persp": [RenderView.AGENT_PERSP],
+            "both": [RenderView.FULL_SIM_STATE, RenderView.AGENT_PERSP],
+            "all": [RenderView.FULL_SIM_STATE, RenderView.AGENT_PERSP, RenderView.BEV_AGENT_OBS],
+        }
+        _VIEW_SUFFIX = {
+            RenderView.FULL_SIM_STATE: "sim_state",
+            RenderView.AGENT_PERSP: "persp",
+            RenderView.BEV_AGENT_OBS: "bev",
+        }
+
         eval_config = copy.deepcopy(configs)
         # Create separate evaluation environments based on specified configs
         eval_config["env"]["termination_mode"] = 0
@@ -860,6 +877,10 @@ def _unpack_eval_configs(self, configs):
         self.render_hr_rollout = self.configs["eval"]["render_human_replay_eval"]
         self.render_safe_rollout = self.configs["eval"].get("render_safe_eval", False)
 
+        view_mode_str = str(self.configs["eval"].get("render_view_mode", "sim_state")).lower().strip('"').strip("'")
+        self.render_view_modes = _VIEW_MODE_MAP.get(view_mode_str, [RenderView.FULL_SIM_STATE])
+        self.render_view_suffix = _VIEW_SUFFIX
+
         self.hr_eval_config = copy.deepcopy(eval_config)
         self.hr_eval_config["env"]["control_mode"] = "control_sdc_only"
         self.hr_eval_config["env"]["render_mode"] = 1 if self.render_hr_rollout else 0
@@ -928,7 +949,12 @@ def select_render_env(self, env_logs):
         return 0
 
     def rollout(self, policy, mode="self_play"):
-        """Roll out the given policy in the specified eval env and collect statistics."""
+        """Roll out the given policy in the specified eval env and collect statistics.
+
+        If rendering is enabled and multiple view modes are configured, a separate
+        rollout is run for each view mode. The first pass also collects stats;
+        subsequent passes are render-only (stats already captured).
+        """
 
         if mode == "human_replay":
             env = self.hr_env
@@ -949,9 +975,21 @@ def rollout(self, policy, mode="self_play"):
         else:
             render_env_idx = self.select_render_env([{}] * driver.num_envs)
 
-        info_list = self._run_rollout(policy, env, render_env_idx if render_eval else None)
+        view_modes = self.render_view_modes if render_eval else [None]
+        multi_view = len(view_modes) > 1
+
+        final_info = {}
+        for i, view_mode in enumerate(view_modes):
+            info_list = self._run_rollout(
+                policy,
+                env,
+                render_env_idx=render_env_idx if render_eval else None,
+                view_mode=view_mode,
+                view_suffix=self.render_view_suffix.get(view_mode, "") if multi_view else "",
+            )
+            if i == 0:  # capture stats from the first pass only
+                final_info = info_list[0] if info_list else {}
 
-        final_info = info_list[0] if info_list else {}
         if mode == "self_play":
             self.self_play_stats = final_info
             self.self_play_stats["render_env_idx"] = render_env_idx
@@ -962,11 +1000,19 @@ def rollout(self, policy, mode="self_play"):
             self.safe_eval_stats = final_info
             self.safe_eval_stats["render_env_idx"] = render_env_idx
 
-    def _run_rollout(self, policy, env, render_env_idx=None, per_env_logs=False):
-        """Run a single rollout. If render_env_idx is not None, render that env."""
+    def _run_rollout(self, policy, env, render_env_idx=None, per_env_logs=False, view_mode=None, view_suffix=""):
+        """Run a single rollout. If render_env_idx is not None, render that env.
+
+        view_mode: RenderView enum to use when rendering (defaults to FULL_SIM_STATE).
+        view_suffix: appended to the mp4 filename when logging multiple views, e.g. "_bev".
+        """
+        from pufferlib.ocean.drive.drive import RenderView
+
         driver = env.driver_env
         num_agents = env.observation_space.shape[0]
         device = self.configs["train"]["device"]
+        if view_mode is None:
+            view_mode = RenderView.FULL_SIM_STATE
 
         # Reset environment
         obs, info = env.reset()
@@ -982,7 +1028,7 @@ def _run_rollout(self, policy, env, render_env_idx=None, per_env_logs=False):
         info_list = []
         for time_idx in range(self.sim_steps):
             if render_env_idx is not None:
-                driver.render(env_id=render_env_idx)
+                driver.render(view_mode=view_mode, env_id=render_env_idx)
 
             # Get action from policy
             with torch.no_grad():
@@ -1022,10 +1068,23 @@ def log_videos(self, eval_mode, epoch):
             return
 
         render_mode = self.render_select_mode
+        multi_view = len(self.render_view_modes) > 1
+        _known_suffixes = {"_sim_state", "_persp", "_bev"}
+
         for p in video_files:
-            scenario_id = os.path.splitext(os.path.basename(p))[0]
+            stem = os.path.splitext(os.path.basename(p))[0]
+            # Extract view suffix from filename if present (e.g. "abc123_bev" → view="bev")
+            view_tag = ""
+            if multi_view:
+                for s in _known_suffixes:
+                    if stem.endswith(s):
+                        view_tag = s[1:]  # strip leading "_"
+                        stem = stem[: -len(s)]
+                        break
+            scenario_id = stem
             caption = f"scene_{scenario_id}_epoch_{epoch}_select_{render_mode}"
-            self.logger.wandb.log({f"render/{eval_mode}": wandb.Video(p, format="mp4", caption=caption)})
+            wandb_key = f"render/{eval_mode}/{view_tag}" if view_tag else f"render/{eval_mode}"
+            self.logger.wandb.log({wandb_key: wandb.Video(p, format="mp4", caption=caption)})
 
         # Clean up
         for p in video_files:

From e01df36bf9d9abca82efb5f26a0b1ffff71a2342 Mon Sep 17 00:00:00 2001
From: Aditya Gupta <adigupta2602@gmail.com>
Date: Fri, 27 Mar 2026 21:51:28 -0400
Subject: [PATCH 09/23] Adding full safe_eval functionality

---
 pufferlib/config/ocean/drive.ini       |   9 +-
 pufferlib/ocean/benchmark/evaluator.py | 256 ++++++++++++++++++-------
 pufferlib/ocean/drive/drive.h          |   8 +-
 pufferlib/ocean/drive/drive.py         |   8 +
 pufferlib/ocean/env_binding.h          |  31 +++
 pufferlib/pufferl.py                   |  59 +++++-
 6 files changed, 290 insertions(+), 81 deletions(-)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index 0d0f11064..494ee9a4d 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -188,17 +188,15 @@ num_eval_agents = 64
 self_play_eval = True
 ; If True, enable human replay evaluation (pair policy-controlled agent with human replays)
 human_replay_eval = False
-; If True, enable safe eval (policy runs with fixed safe reward conditioning)
-safe_eval = False
 ; Which env to render during eval. Options: "first" (by index), "worst_collision", "random"
 render_select_mode = "first"
 ; View mode(s) for eval rendering. Options: sim_state, bev, persp, both (sim_state+persp), all (sim_state+persp+bev)
 ; Multi-view options run a separate rollout per view, each producing its own wandb video under render/{mode}/{view}
-render_view_mode = "sim_state"
+render_view_mode = "all"
 ; If True, render random scenarios. Note: Doing this frequency will slow down the training.
 render_human_replay_eval = False
 render_self_play_eval = True
-render_safe_eval = False
+
 
 ; Number of scenarios to process per batch
 wosac_batch_size = 32
@@ -239,6 +237,7 @@ human_replay_num_agents = 16
 enabled = True
 ; How often to run safe eval (in training epochs). Defaults to render_interval.
 interval = 250
+render_safe_eval = False
 ; Number of agents to run in the eval environment
 num_agents = 50
 ; Number of episodes to collect metrics over
@@ -286,7 +285,7 @@ num_maps = 3
 ; View mode: sim_state (top-down, origin-centered), zoom_out (top-down, full map bbox), bev (agent BEV obs), persp (third-person follow-cam)
 ; Multi-view: "both" = sim_state + persp, "all" = sim_state + persp + bev (runs a separate rollout per view)
 ; NOTE: "persp" and "bev" require active_agent_count > 0
-view_mode = "bev"
+view_mode = "persp"
 ; Whether to draw agent trajectory traces
 draw_traces = True
 ; Maximum number of frames to render per map
diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py
index 8ddff8a39..7b158368d 100644
--- a/pufferlib/ocean/benchmark/evaluator.py
+++ b/pufferlib/ocean/benchmark/evaluator.py
@@ -820,12 +820,11 @@ def rollout(self, args, puffer_env, policy):
 
 
 class Evaluator:
-    """Evaluates policies in self_play, human_replay, or safe_eval mode, with optional rendering.
+    """Evaluates policies in self_play, human_replay, with optional rendering.
 
     Initializes the eval envs needed based on eval config flags:
     - human_replay_eval: creates hr_env (control_sdc_only)
     - self_play_eval: creates sp_env (control_agents)
-    - safe_eval: creates safe_env (control_agents + fixed reward conditioning)
     """
 
     RENDER_FIRST = "first"
@@ -839,10 +838,8 @@ def __init__(self, configs, logger=None):
         self.sim_steps = 90
         self.self_play_stats = None
         self.human_replay_stats = None
-        self.safe_eval_stats = None
         self.sp_env = None
         self.hr_env = None
-        self.safe_env = None
 
         self._unpack_eval_configs(configs)
 
@@ -870,12 +867,12 @@ def _unpack_eval_configs(self, configs):
         backend = eval_config["eval"].get("backend", "PufferEnv")
         eval_config["env"]["map_dir"] = eval_config["eval"]["map_dir"]
         eval_config["env"]["num_agents"] = eval_config["eval"]["num_eval_agents"]
-        eval_config["env"]["episode_length"] = 91  # WOMD scenario length
+        if self.config["eval"]["human_replay_eval"]:
+            eval_config["env"]["episode_length"] = 91  # WOMD scenario length
         eval_config["vec"] = dict(backend=backend, num_envs=1)
 
         self.render_sp_rollout = self.configs["eval"]["render_self_play_eval"]
         self.render_hr_rollout = self.configs["eval"]["render_human_replay_eval"]
-        self.render_safe_rollout = self.configs["eval"].get("render_safe_eval", False)
 
         view_mode_str = str(self.configs["eval"].get("render_view_mode", "sim_state")).lower().strip('"').strip("'")
         self.render_view_modes = _VIEW_MODE_MAP.get(view_mode_str, [RenderView.FULL_SIM_STATE])
@@ -883,39 +880,15 @@ def _unpack_eval_configs(self, configs):
 
         self.hr_eval_config = copy.deepcopy(eval_config)
         self.hr_eval_config["env"]["control_mode"] = "control_sdc_only"
-        self.hr_eval_config["env"]["render_mode"] = 1 if self.render_hr_rollout else 0
+        self.hr_eval_config["env"]["render_mode"] = (
+            0  # primary env: stats only; render envs created per-view in rollout()
+        )
 
         self.sp_eval_config = copy.deepcopy(eval_config)
         self.sp_eval_config["env"]["control_mode"] = "control_agents"
-        self.sp_eval_config["env"]["render_mode"] = 1 if self.render_sp_rollout else 0
-
-        # safe_eval: control_agents like self_play, but with fixed reward conditioning
-        # from [safe_eval] config section (map_dir, num_agents, reward bounds, etc.)
-        safe_cfg = configs.get("safe_eval", {})
-        self.safe_eval_config = copy.deepcopy(eval_config)
-        self.safe_eval_config["env"]["control_mode"] = "control_agents"
-        self.safe_eval_config["env"]["render_mode"] = 1 if self.render_safe_rollout else 0
-        self.safe_eval_config["env"]["reward_randomization"] = 1
-        self.safe_eval_config["env"]["reward_conditioning"] = 1
-        self.safe_eval_config["env"]["resample_frequency"] = 0
-        self.safe_eval_config["env"]["report_interval"] = 1  # ensure logs fire within 90-step rollout
-        # Override map/episode settings from [safe_eval] section
-        # Note: episode_length is intentionally NOT overridden here — eval rollout is always 91 steps
-        for key in ("map_dir", "num_maps", "min_goal_distance", "max_goal_distance", "num_agents"):
-            if key in safe_cfg:
-                self.safe_eval_config["env"][key] = safe_cfg[key]
-        # Pin reward bounds: set min=max for each conditioning value
-        import re
-
-        valid_bounds = {
-            re.match(r"reward_bound_(.+)_min$", k).group(1)
-            for k in eval_config["env"]
-            if re.match(r"reward_bound_(.+)_min$", k)
-        }
-        for key, val in safe_cfg.items():
-            if key in valid_bounds:
-                self.safe_eval_config["env"][f"reward_bound_{key}_min"] = float(val)
-                self.safe_eval_config["env"][f"reward_bound_{key}_max"] = float(val)
+        self.sp_eval_config["env"]["render_mode"] = (
+            0  # primary env: stats only; render envs created per-view in rollout()
+        )
 
         self.render_select_mode = self.configs["eval"]["render_select_mode"]
 
@@ -951,19 +924,19 @@ def select_render_env(self, env_logs):
     def rollout(self, policy, mode="self_play"):
         """Roll out the given policy in the specified eval env and collect statistics.
 
-        If rendering is enabled and multiple view modes are configured, a separate
-        rollout is run for each view mode. The first pass also collects stats;
-        subsequent passes are render-only (stats already captured).
+        Stats are collected using the primary env (already loaded by pufferl.py).
+        If rendering is enabled, each view mode gets its own temporary env with
+        render_mode=1, so each view has its own ffmpeg pipe and uniquely named mp4.
         """
+        from pufferlib.pufferl import load_env
 
         if mode == "human_replay":
             env = self.hr_env
+            eval_config = self.hr_eval_config
             render_eval = self.render_hr_rollout
-        elif mode == "safe_eval":
-            env = self.safe_env
-            render_eval = self.render_safe_rollout
         else:  # self_play
             env = self.sp_env
+            eval_config = self.sp_eval_config
             render_eval = self.render_sp_rollout
         driver = env.driver_env
 
@@ -975,20 +948,30 @@ def rollout(self, policy, mode="self_play"):
         else:
             render_env_idx = self.select_render_env([{}] * driver.num_envs)
 
-        view_modes = self.render_view_modes if render_eval else [None]
-        multi_view = len(view_modes) > 1
-
-        final_info = {}
-        for i, view_mode in enumerate(view_modes):
-            info_list = self._run_rollout(
-                policy,
-                env,
-                render_env_idx=render_env_idx if render_eval else None,
-                view_mode=view_mode,
-                view_suffix=self.render_view_suffix.get(view_mode, "") if multi_view else "",
-            )
-            if i == 0:  # capture stats from the first pass only
-                final_info = info_list[0] if info_list else {}
+        # Collect stats from the primary env (no rendering)
+        info_list = self._run_rollout(policy, env)
+        final_info = info_list[0] if info_list else {}
+
+        # Render each view in its own temporary env so each gets its own ffmpeg pipe
+        # and uniquely named mp4 (e.g. {scenario_id}_bev.mp4).
+        if render_eval:
+            view_modes = self.render_view_modes
+            multi_view = len(view_modes) > 1
+            for view_mode in view_modes:
+                suffix = f"_{self.render_view_suffix[view_mode]}" if multi_view else ""
+                render_cfg = copy.deepcopy(eval_config)
+                render_cfg["env"]["render_mode"] = 1
+                render_env = load_env("puffer_drive", render_cfg)
+                try:
+                    self._run_rollout(
+                        policy,
+                        render_env,
+                        render_env_idx=render_env_idx,
+                        view_mode=view_mode,
+                        view_suffix=suffix,
+                    )
+                finally:
+                    render_env.close()
 
         if mode == "self_play":
             self.self_play_stats = final_info
@@ -996,9 +979,6 @@ def rollout(self, policy, mode="self_play"):
         elif mode == "human_replay":
             self.human_replay_stats = final_info
             self.human_replay_stats["render_env_idx"] = render_env_idx
-        elif mode == "safe_eval":
-            self.safe_eval_stats = final_info
-            self.safe_eval_stats["render_env_idx"] = render_env_idx
 
     def _run_rollout(self, policy, env, render_env_idx=None, per_env_logs=False, view_mode=None, view_suffix=""):
         """Run a single rollout. If render_env_idx is not None, render that env.
@@ -1014,6 +994,11 @@ def _run_rollout(self, policy, env, render_env_idx=None, per_env_logs=False, vie
         if view_mode is None:
             view_mode = RenderView.FULL_SIM_STATE
 
+        # Set video filename suffix in C before the first render call of this rollout.
+        # This ensures each view produces a uniquely named mp4 (e.g. {scenario_id}_bev.mp4).
+        if render_env_idx is not None and view_suffix:
+            driver.set_video_suffix(view_suffix, env_id=render_env_idx)
+
         # Reset environment
         obs, info = env.reset()
 
@@ -1108,13 +1093,156 @@ def log_stats(self):
                 eval_stats["eval/sp_score"] = self.self_play_stats["score"]
             if "n" in self.self_play_stats:
                 eval_stats["eval/num_agents"] = self.self_play_stats["n"]
-        if self.safe_eval_stats is not None:
-            if "collision_rate" in self.safe_eval_stats:
-                eval_stats["eval/safe_collision_rate"] = self.safe_eval_stats["collision_rate"]
-            if "score" in self.safe_eval_stats:
-                eval_stats["eval/safe_score"] = self.safe_eval_stats["score"]
 
         if not eval_stats:
             return
 
         self.logger.wandb.log(eval_stats)
+
+
+class SafeEvaluator:
+    """Evaluates policies with fixed safe/law-abiding reward conditioning.
+
+    Runs the policy with deterministic reward bounds (reward_randomization=1,
+    min=max for each bound) so the conditioning observation vector is fixed to
+    safe driving values. Collects metrics over multiple episodes.
+
+    Re-parses the INI config internally so it doesn't need the full training
+    args dict — only the env_name, safe_eval config, and device.
+    """
+
+    def __init__(self, env_name: str, safe_eval_config: Dict, device="cuda", logger=None):
+        self.env_name = env_name
+        self.logger = logger
+        self.safe_eval_config = safe_eval_config
+        self.num_episodes = safe_eval_config.get("num_episodes", 100)
+        self.num_agents = safe_eval_config.get("num_agents", 64)
+        self.episode_length = safe_eval_config.get("episode_length", 1000)
+        if isinstance(device, int):
+            device = f"cuda:{device}"
+        self.device = device
+        self.stats = None
+
+    def _build_eval_env_config(self):
+        """Build env config with safe reward conditioning values applied.
+
+        Re-parses the INI file to get a fresh full config, then applies
+        safe_eval overrides for env, vec, and reward bounds.
+        """
+        import re
+        from pufferlib.pufferl import load_config
+
+        # Re-parse INI to get full config (env, vec, policy, rnn, etc.)
+        import sys
+
+        original_argv = sys.argv
+        sys.argv = ["pufferl"]
+        try:
+            eval_config = load_config(self.env_name)
+        finally:
+            sys.argv = original_argv
+
+        eval_config["vec"] = dict(backend="PufferEnv", num_envs=1)
+        eval_config["train"]["device"] = self.device
+        eval_config["env"]["num_agents"] = self.num_agents
+        eval_config["env"]["episode_length"] = self.episode_length
+        eval_config["env"]["resample_frequency"] = 0
+        eval_config["env"]["reward_randomization"] = 1
+        eval_config["env"]["reward_conditioning"] = 1
+
+        # Apply safe_eval overrides for map_dir, goal distances, etc.
+        for override_key in ("map_dir", "num_maps", "min_goal_distance", "max_goal_distance"):
+            if override_key in self.safe_eval_config:
+                eval_config["env"][override_key] = self.safe_eval_config[override_key]
+
+        # Discover valid reward bound names from env config
+        valid_bounds = set()
+        for key in eval_config["env"]:
+            m = re.match(r"reward_bound_(.+)_min$", key)
+            if m:
+                valid_bounds.add(m.group(1))
+
+        # Set min=max for each reward bound to fix the conditioning values
+        for key, val in self.safe_eval_config.items():
+            if key not in valid_bounds:
+                continue
+            eval_config["env"][f"reward_bound_{key}_min"] = float(val)
+            eval_config["env"][f"reward_bound_{key}_max"] = float(val)
+
+        return eval_config
+
+    def evaluate(self, vecenv, policy):
+        """Run evaluation with safe reward conditioning and collect metrics.
+
+        Args:
+            vecenv: Vectorized environment (created with safe eval config)
+            policy: Trained policy to evaluate
+
+        Returns:
+            dict: Averaged metrics over collected episodes
+        """
+        from collections import defaultdict
+
+        policy.eval()
+        num_agents = vecenv.observation_space.shape[0]
+        use_rnn = hasattr(policy, "hidden_size")
+
+        ob, _ = vecenv.reset()
+        state = {}
+        dones = torch.zeros(num_agents, device=self.device)
+        prev_rewards = torch.zeros(num_agents, device=self.device)
+        if use_rnn:
+            state = dict(
+                lstm_h=torch.zeros(num_agents, policy.hidden_size, device=self.device),
+                lstm_c=torch.zeros(num_agents, policy.hidden_size, device=self.device),
+            )
+
+        all_stats = defaultdict(list)
+        episodes_collected = 0
+        max_steps = (self.num_episodes // max(num_agents, 1) + 2) * self.episode_length
+
+        for step in range(max_steps):
+            if episodes_collected >= self.num_episodes:
+                break
+
+            with torch.no_grad():
+                ob_t = torch.as_tensor(ob).to(self.device)
+                if use_rnn:
+                    state["reward"] = prev_rewards
+                    state["done"] = dones
+                logits, value = policy.forward_eval(ob_t, state)
+                action, logprob, _ = pufferlib.pytorch.sample_logits(logits)
+                action = action.cpu().numpy().reshape(vecenv.action_space.shape)
+
+            if isinstance(logits, torch.distributions.Normal):
+                action = np.clip(action, vecenv.action_space.low, vecenv.action_space.high)
+
+            ob, rewards, terminals, truncations, infos = vecenv.step(action)
+            prev_rewards = torch.as_tensor(rewards).float().to(self.device)
+            dones = torch.as_tensor(np.maximum(terminals, truncations)).float().to(self.device)
+
+            for entry in infos:
+                if isinstance(entry, dict):
+                    episodes_collected += int(entry.get("n", 1))
+                    for k, v in entry.items():
+                        try:
+                            float(v)
+                            all_stats[k].append(v)
+                        except (TypeError, ValueError):
+                            pass
+
+        self.stats = {k: float(np.mean(v)) for k, v in all_stats.items() if len(v) > 0}
+        return self.stats
+
+    def log_stats(self, global_step=None):
+        """Log collected metrics to wandb."""
+        if self.stats is None:
+            return
+
+        if not (self.logger and hasattr(self.logger, "wandb") and self.logger.wandb):
+            return
+
+        payload = {f"eval/safe_{k}": v for k, v in self.stats.items()}
+        if global_step is not None:
+            payload["train_step"] = global_step
+        self.logger.wandb.log(payload)
diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index 5c9127727..4e4e06142 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -375,6 +375,7 @@ struct Drive {
     float max_goal_distance;
     char *ini_file;
     char scenario_id[SCENARIO_ID_STR_LENGTH];
+    char video_suffix[64]; // Optional suffix appended to mp4 filename (e.g. "_bev")
     int collision_behavior;
     int offroad_behavior;
     float observation_window_size;
@@ -3650,8 +3651,11 @@ Client *make_client(Drive *env) {
         char size_str[64];
         snprintf(size_str, sizeof(size_str), "%dx%d", (int)client->width, (int)client->height);
 
-        char filename[256];
-        snprintf(filename, sizeof(filename), "%s.mp4", env->scenario_id);
+        char filename[320];
+        if (env->video_suffix[0] != '\0')
+            snprintf(filename, sizeof(filename), "%s%s.mp4", env->scenario_id, env->video_suffix);
+        else
+            snprintf(filename, sizeof(filename), "%s.mp4", env->scenario_id);
 
         client->recorder_pid = fork();
         if (client->recorder_pid == -1) {
diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py
index c464fddb6..6ca81b885 100644
--- a/pufferlib/ocean/drive/drive.py
+++ b/pufferlib/ocean/drive/drive.py
@@ -755,6 +755,14 @@ def get_road_edge_polylines(self):
     def render(self, view_mode: RenderView = RenderView.FULL_SIM_STATE, draw_traces: bool = True, env_id: int = 0):
         binding.vec_render(self.c_envs, int(view_mode), draw_traces, env_id)
 
+    def set_video_suffix(self, suffix: str, env_id: int = 0):
+        """Set the suffix appended to the mp4 filename for headless rendering.
+
+        Must be called before the first render() call of a rollout.
+        E.g. set_video_suffix("_bev", env_id=0) → {scenario_id}_bev.mp4
+        """
+        binding.vec_set_video_suffix(self.c_envs, env_id, suffix)
+
     def close(self):
         binding.vec_close(self.c_envs)
 
diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h
index e6bc576c2..b6c89712a 100644
--- a/pufferlib/ocean/env_binding.h
+++ b/pufferlib/ocean/env_binding.h
@@ -550,6 +550,36 @@ static PyObject *vec_step(PyObject *self, PyObject *arg) {
     Py_RETURN_NONE;
 }
 
+static PyObject *vec_set_video_suffix(PyObject *self, PyObject *args) {
+    // Set a suffix appended to the mp4 filename for headless rendering.
+    // Call this before the first vec_render of each rollout when using multi-view.
+    // Args: (vec_env_ptr, env_id, suffix_str)
+    int num_args = PyTuple_Size(args);
+    if (num_args != 3) {
+        PyErr_SetString(PyExc_TypeError, "vec_set_video_suffix requires 3 arguments: (vec, env_id, suffix)");
+        return NULL;
+    }
+    VecEnv *vec = (VecEnv *)PyLong_AsVoidPtr(PyTuple_GetItem(args, 0));
+    if (!vec) {
+        PyErr_SetString(PyExc_ValueError, "Invalid vec_env handle");
+        return NULL;
+    }
+    int env_id = (int)PyLong_AsLong(PyTuple_GetItem(args, 1));
+    const char *suffix = PyUnicode_AsUTF8(PyTuple_GetItem(args, 2));
+    if (!suffix) {
+        PyErr_SetString(PyExc_TypeError, "suffix must be a string");
+        return NULL;
+    }
+    if (env_id < 0 || env_id >= vec->num_envs) {
+        PyErr_SetString(PyExc_IndexError, "env_id out of range");
+        return NULL;
+    }
+    Drive *env = (Drive *)vec->envs[env_id];
+    strncpy(env->video_suffix, suffix, sizeof(env->video_suffix) - 1);
+    env->video_suffix[sizeof(env->video_suffix) - 1] = '\0';
+    Py_RETURN_NONE;
+}
+
 static PyObject *vec_render(PyObject *self, PyObject *args) {
     int num_args = PyTuple_Size(args);
     if (num_args != 4) {
@@ -1071,6 +1101,7 @@ static PyMethodDef methods[] = {
     {"vec_step", vec_step, METH_VARARGS, "Step the vector of environments"},
     {"vec_log", vec_log, METH_VARARGS, "Log the vector of environments"},
     {"vec_render", vec_render, METH_VARARGS, "Render the vector of environments"},
+    {"vec_set_video_suffix", vec_set_video_suffix, METH_VARARGS, "Set mp4 filename suffix for the given env"},
     {"vec_close", vec_close, METH_VARARGS, "Close the vector of environments"},
     {"vec_get_scenario_ids", vec_get_scenario_ids, METH_VARARGS, "Get scenario IDs for all envs"},
     {"shared", (PyCFunction)my_shared, METH_VARARGS | METH_KEYWORDS, "Shared state"},
diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py
index 6709ea066..f44ead043 100644
--- a/pufferlib/pufferl.py
+++ b/pufferlib/pufferl.py
@@ -509,7 +509,6 @@ def train(self):
             if self.epoch % self.config["eval"]["eval_interval"] == 0 or done_training:
                 human_replay_eval = self.config["eval"]["human_replay_eval"]
                 self_play_eval = self.config["eval"]["self_play_eval"]
-                safe_eval = self.config.get("safe_eval", {}).get("enabled", False)
 
                 self.evaluator = Evaluator(self.full_args, self.logger)
                 if human_replay_eval:
@@ -522,12 +521,7 @@ def train(self):
                     self.evaluator.rollout(self.uncompiled_policy, mode="self_play")
                     self.evaluator.sp_env.close()
                     self.evaluator.log_videos(eval_mode="self_play", epoch=self.epoch)
-                if safe_eval:
-                    self.evaluator.safe_env = load_env("puffer_drive", self.evaluator.safe_eval_config)
-                    self.evaluator.rollout(self.uncompiled_policy, mode="safe_eval")
-                    self.evaluator.safe_env.close()
-                    self.evaluator.log_videos(eval_mode="safe_eval", epoch=self.epoch)
-                if human_replay_eval or self_play_eval or safe_eval:
+                if human_replay_eval or self_play_eval:
                     self.evaluator.log_stats()
 
                 del self.evaluator
@@ -535,10 +529,55 @@ def train(self):
             if self.config["eval"]["wosac_realism_eval"]:
                 pufferlib.utils.run_wosac_eval_in_subprocess(self.config, self.logger, self.global_step)
 
-        if self.config["eval"]["human_replay_eval"] and (
-            (self.epoch - 1) % self.config["eval"]["eval_interval"] == 0 or done_training
+        safe_eval_config = self.config.get("safe_eval", {})
+        safe_eval_enabled = safe_eval_config.get("enabled", False)
+        safe_eval_interval = int(safe_eval_config.get("interval", self.render_interval))
+        is_main = not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0
+        if (
+            is_main
+            and safe_eval_enabled
+            and safe_eval_interval > 0
+            and (self.epoch % safe_eval_interval == 0 or done_training)
         ):
-            pufferlib.utils.run_human_replay_eval_in_subprocess(self.config, self.logger, self.global_step)
+            self._run_safe_eval()
+
+    def _run_safe_eval(self):
+        """Run safe eval in-process using SafeEvaluator."""
+        import copy
+        import traceback
+
+        vecenv = None
+        try:
+            from pufferlib.ocean.benchmark.evaluator import SafeEvaluator
+
+            self.msg = "Running safe eval..."
+            env_name = self.config["env"]
+            safe_eval_config = self.config.get("safe_eval", {})
+            evaluator = SafeEvaluator(env_name, safe_eval_config, device=self.config["device"], logger=self.logger)
+            eval_config = evaluator._build_eval_env_config()
+
+            vecenv = load_env(env_name, eval_config)
+            policy = load_policy(eval_config, vecenv, env_name)
+
+            # Copy weights from in-memory policy (no checkpoint dependency)
+            state_dict = copy.deepcopy(self.uncompiled_policy.state_dict())
+            # Strip DDP "module." prefix if present
+            state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
+            policy.load_state_dict(state_dict)
+
+            metrics = evaluator.evaluate(vecenv, policy)
+            evaluator.log_stats(global_step=self.global_step)
+
+            self.msg = f"Safe eval: {len(metrics)} metrics logged"
+        except Exception as e:
+            self.msg = f"Safe eval failed: {e}"
+            traceback.print_exc(file=sys.stderr)
+        finally:
+            if vecenv is not None:
+                try:
+                    vecenv.close()
+                except Exception:
+                    pass
 
     def mean_and_log(self):
         config = self.config

From ee80a1df2843ef35518ae4357999e28aa9cd85ea Mon Sep 17 00:00:00 2001
From: Aditya Gupta <adigupta2602@gmail.com>
Date: Sat, 28 Mar 2026 00:10:41 -0400
Subject: [PATCH 10/23] Further bug fixes

---
 pufferlib/ocean/benchmark/evaluator.py | 2 +-
 pufferlib/pufferl.py                   | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py
index 7b158368d..49ac86f97 100644
--- a/pufferlib/ocean/benchmark/evaluator.py
+++ b/pufferlib/ocean/benchmark/evaluator.py
@@ -867,7 +867,7 @@ def _unpack_eval_configs(self, configs):
         backend = eval_config["eval"].get("backend", "PufferEnv")
         eval_config["env"]["map_dir"] = eval_config["eval"]["map_dir"]
         eval_config["env"]["num_agents"] = eval_config["eval"]["num_eval_agents"]
-        if self.config["eval"]["human_replay_eval"]:
+        if self.configs["eval"]["human_replay_eval"]:
             eval_config["env"]["episode_length"] = 91  # WOMD scenario length
         eval_config["vec"] = dict(backend=backend, num_envs=1)
 
diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py
index f44ead043..5fd632808 100644
--- a/pufferlib/pufferl.py
+++ b/pufferlib/pufferl.py
@@ -128,6 +128,7 @@ def __init__(self, config, vecenv, policy, logger=None, full_args=None):
         self.ep_lengths = torch.zeros(total_agents, device=device, dtype=torch.int32)
         self.ep_indices = torch.arange(total_agents, device=device, dtype=torch.int32)
         self.free_idx = total_agents
+        self.render_interval = config["eval"]["eval_interval"]
 
         # LSTM
         if config["use_rnn"]:

From 2099537f746b084187ba97b500e37d4d44e0a86c Mon Sep 17 00:00:00 2001
From: Aditya Gupta <adigupta2602@gmail.com>
Date: Sat, 28 Mar 2026 10:29:45 -0400
Subject: [PATCH 11/23] Cleaning up some commented dead code

---
 pufferlib/config/ocean/drive.ini | 10 ++--
 pufferlib/ocean/drive/drive.c    |  1 -
 pufferlib/ocean/drive/drive.h    | 85 --------------------------------
 3 files changed, 5 insertions(+), 91 deletions(-)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index 494ee9a4d..a94c974b4 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -54,19 +54,19 @@ polyline_max_segment_length = 10.0
 episode_length = 300
 resample_frequency = 300
 termination_mode = 1 # 0 - terminate at episode_length, 1 - terminate after all agents have been reset
-map_dir = "resources/drive/binaries/carla_3D"
+map_dir = "resources/drive/binaries/carla_2D"
 num_maps = 3
 ; If True, allows training with fewer maps than requested (warns instead of erroring)
 allow_fewer_maps = True
 ; Determines which step of the trajectory to initialize the agents at upon reset
 init_steps = 0
 ; Options: "control_vehicles", "control_agents", "control_wosac", "control_sdc_only"
-control_mode = "control_agents"
+control_mode = "control_vehicles"
 ; Options: "create_all_valid", "create_only_controlled", "init_variable_agent_number"(creates random number of controlled agents per env)
 init_mode = "init_variable_agent_number"
 ; Below options only valid for "init_variable_agent_number" init_mode
-min_agents_per_env = 31
-max_agents_per_env = 32
+min_agents_per_env = 1
+max_agents_per_env = 128
 ; Dimension Ranges for agents
 spawn_width_min = 1.5
 spawn_width_max = 2.5
@@ -295,7 +295,7 @@ max_frames = 91
 init_mode = "init_variable_agent_number"
 control_mode = "control_vehicles"
 ; Policy bin file used for rendering videos
-policy_path = "resources/drive/puffer_drive_weights_resampling_300.bin"
+policy_path = "resources/drive/puffer_drive_weights.bin"
 ; Allows more than cpu cores workers for rendering
 overwork = True
 ; Display human xy logs in the background
diff --git a/pufferlib/ocean/drive/drive.c b/pufferlib/ocean/drive/drive.c
index d1a408b5a..57f38623e 100644
--- a/pufferlib/ocean/drive/drive.c
+++ b/pufferlib/ocean/drive/drive.c
@@ -145,7 +145,6 @@ void demo(const char *map_name_arg, const char *policy_name_arg, int view_mode,
                 conf.init_mode);
         free_allocated(&env);
         return;
-        // return -1;
     }
     c_reset(&env);
     c_render(&env, view_mode, draw_traces);
diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index 4e4e06142..5f48e5bd6 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -22,11 +22,6 @@
 #define VIEW_MODE_SIM_STATE 0
 #define VIEW_MODE_BEV_AGENT_OBS 1
 #define VIEW_MODE_AGENT_PERSP 2
-// // Order of entities in rendering (lower is rendered first)
-// #define Z_ROAD_SURFACE 0.0f
-// #define Z_ROAD_MARKINGS 0.05f // Lane lines, road lines, traces
-// #define Z_AGENT_DETAILS 0.4f  // Arrow, goal markers, obs overlays
-// #define Z_AGENTS 0.6f         // Vehicles, cyclists, pedestrians
 
 // constants for strings, data etc.
 #define SCENARIO_ID_STR_LENGTH 16
@@ -4500,86 +4495,6 @@ void c_render(Drive *env, int view_mode, int draw_traces) {
         DrawText(TextFormat("Grid Cols: %d", env->grid_map->grid_cols), 10, status_y + 20, 20, PUFF_WHITE);
         EndDrawing();
     }
-
-    // BeginDrawing();
-    // Color road = (Color){35, 35, 37, 255};
-    // ClearBackground(road);
-    // BeginMode3D(client->camera);
-    // handle_camera_controls(env->client);
-    // draw_scene(env, client, 0, 0, 0, 0);
-
-    // if (IsKeyPressed(KEY_TAB) && env->active_agent_count > 0) {
-    //     env->human_agent_idx = (env->human_agent_idx + 1) % env->active_agent_count;
-    // }
-
-    // // Draw debug info
-    // DrawText(TextFormat("Camera Position: (%.2f, %.2f, %.2f)", client->camera.position.x, client->camera.position.y,
-    //                     client->camera.position.z),
-    //          10, 10, 20, PUFF_WHITE);
-    // DrawText(TextFormat("Camera Target: (%.2f, %.2f, %.2f)", client->camera.target.x, client->camera.target.y,
-    //                     client->camera.target.z),
-    //          10, 30, 20, PUFF_WHITE);
-    // DrawText(TextFormat("Timestep: %d", env->timestep), 10, 50, 20, PUFF_WHITE);
-
-    // int human_idx = env->active_agent_indices[env->human_agent_idx];
-    // DrawText(TextFormat("Controlling Agent: %d", env->human_agent_idx), 10, 70, 20, PUFF_WHITE);
-    // DrawText(TextFormat("Agent Index: %d", human_idx), 10, 90, 20, PUFF_WHITE);
-
-    // // Display current action values - yellow when controlling, white otherwise
-    // Color action_color = IsKeyDown(KEY_LEFT_SHIFT) ? YELLOW : PUFF_WHITE;
-
-    // if (env->action_type == 0) { // discrete
-    //     int *action_array = (int *)env->actions;
-    //     int action_val = action_array[env->human_agent_idx];
-
-    //     if (env->dynamics_model == CLASSIC) {
-    //         int num_steer = 13;
-    //         int accel_idx = action_val / num_steer;
-    //         int steer_idx = action_val % num_steer;
-    //         float accel_value = ACCELERATION_VALUES[accel_idx];
-    //         float steer_value = STEERING_VALUES[steer_idx];
-
-    //         DrawText(TextFormat("Acceleration: %.2f m/s^2", accel_value), 10, 110, 20, action_color);
-    //         DrawText(TextFormat("Steering: %.3f", steer_value), 10, 130, 20, action_color);
-    //     } else if (env->dynamics_model == JERK) {
-    //         int num_lat = 3;
-    //         int jerk_long_idx = action_val / num_lat;
-    //         int jerk_lat_idx = action_val % num_lat;
-    //         float jerk_long_value = JERK_LONG[jerk_long_idx];
-    //         float jerk_lat_value = JERK_LAT[jerk_lat_idx];
-
-    //         DrawText(TextFormat("Longitudinal Jerk: %.2f m/s^3", jerk_long_value), 10, 110, 20, action_color);
-    //         DrawText(TextFormat("Lateral Jerk: %.2f m/s^3", jerk_lat_value), 10, 130, 20, action_color);
-    //     }
-    // } else { // continuous
-    //     float (*action_array_f)[2] = (float (*)[2])env->actions;
-    //     DrawText(TextFormat("Acceleration: %.2f", action_array_f[env->human_agent_idx][0]), 10, 110, 20,
-    //     action_color); DrawText(TextFormat("Steering: %.2f", action_array_f[env->human_agent_idx][1]), 10, 130, 20,
-    //     action_color);
-    // }
-
-    // // Show key press status
-    // int status_y = 150;
-    // if (IsKeyDown(KEY_LEFT_SHIFT)) {
-    //     DrawText("[shift pressed]", 10, status_y, 20, YELLOW);
-    //     status_y += 20;
-    // }
-    // if (IsKeyDown(KEY_SPACE)) {
-    //     DrawText("[space pressed]", 10, status_y, 20, YELLOW);
-    //     status_y += 20;
-    // }
-    // if (IsKeyDown(KEY_LEFT_CONTROL)) {
-    //     DrawText("[ctrl pressed]", 10, status_y, 20, YELLOW);
-    //     status_y += 20;
-    // }
-
-    // // Controls help
-    // DrawText("Controls: SHIFT + W/S - Accelerate/Brake, SHIFT + A/D - Steer, TAB - Switch Agent", 10,
-    //          client->height - 30, 20, PUFF_WHITE);
-
-    // DrawText(TextFormat("Grid Rows: %d", env->grid_map->grid_rows), 10, status_y, 20, PUFF_WHITE);
-    // DrawText(TextFormat("Grid Cols: %d", env->grid_map->grid_cols), 10, status_y + 20, 20, PUFF_WHITE);
-    // EndDrawing();
 }
 
 void close_client(Client *client) {

From ab8adf0390f2e210b748271897935550506ec431 Mon Sep 17 00:00:00 2001
From: Aditya Gupta <adigupta2602@gmail.com>
Date: Sun, 29 Mar 2026 20:01:50 -0400
Subject: [PATCH 12/23] Fixing bugs based on PR comments

---
 pufferlib/config/ocean/drive.ini |  8 +++---
 pufferlib/ocean/drive/drive.h    | 44 ++++++++++++++++++--------------
 pufferlib/ocean/drive/drive.py   |  2 --
 pufferlib/ocean/env_binding.h    |  1 +
 pufferlib/pufferl.py             |  6 ++---
 5 files changed, 32 insertions(+), 29 deletions(-)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index a94c974b4..2a3eab604 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -193,7 +193,7 @@ render_select_mode = "first"
 ; View mode(s) for eval rendering. Options: sim_state, bev, persp, both (sim_state+persp), all (sim_state+persp+bev)
 ; Multi-view options run a separate rollout per view, each producing its own wandb video under render/{mode}/{view}
 render_view_mode = "all"
-; If True, render random scenarios. Note: Doing this frequency will slow down the training.
+; If True, render random scenarios. Note: Doing this frequently will slow down the training.
 render_human_replay_eval = False
 render_self_play_eval = True
 
@@ -276,8 +276,8 @@ acc = 1.0
 
 [render]
 ; Render a batch of maps offline using the in-process c_render (ffmpeg) pipeline.
-; Path to the .bin map directory
-map_dir = "resources/drive/binaries/carla/carla_3D"
+; Path to the .bin map directoryz
+map_dir = "resources/drive/binaries/carla_2D"
 ; Directory to write output mp4 files
 output_dir = "resources/drive/render_videos"
 ; Number of maps to render (capped at files available in map_dir)
@@ -294,8 +294,6 @@ max_frames = 91
 ; carla_2D maps carry recorded vehicle trajectories — use create_all_valid + control_vehicles.
 init_mode = "init_variable_agent_number"
 control_mode = "control_vehicles"
-; Policy bin file used for rendering videos
-policy_path = "resources/drive/puffer_drive_weights.bin"
 ; Allows more than cpu cores workers for rendering
 overwork = True
 ; Display human xy logs in the background
diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index 5f48e5bd6..79f06e940 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -2636,10 +2636,9 @@ void compute_agent_metrics(Drive *env, int agent_idx) {
 
     // Check for vehicle collisions (skip for pedestrians)
     int car_collided_with_index = -1;
-    if (agent->type != PEDESTRIAN) {
-        car_collided_with_index = collision_check(env, agent_idx);
-        if (car_collided_with_index != -1)
-            collided = VEHICLE_COLLISION;
+    car_collided_with_index = collision_check(env, agent_idx);
+    if (car_collided_with_index != -1) {
+        collided = VEHICLE_COLLISION;
     }
     agent->collision_state = collided;
 
@@ -3546,38 +3545,41 @@ Client *make_client(Drive *env) {
 
     if (env->render_mode == RENDER_HEADLESS && getenv("DISPLAY") == NULL) {
 
-        // Kill any existing Xvfb first
-        system("pkill -9 Xvfb");
-        usleep(200000);
-        unlink("/tmp/.X99-lock");
-        unlink("/tmp/.X11-unix/X99");
+        // Use a per-process display number so multiple rendering jobs on the same
+        // node don't collide. Range :100-:999 avoids the system default :0.
+        client->xvfb_display_num = 100 + (getpid() % 900);
 
-        // Hardcode to single display because we only run this in one process at once
-        client->xvfb_display_num = 99;
+        char lock_file[32], socket_file[32], display_str[16];
+        snprintf(display_str, sizeof(display_str), ":%d", client->xvfb_display_num);
+        snprintf(lock_file, sizeof(lock_file), "/tmp/.X%d-lock", client->xvfb_display_num);
+        snprintf(socket_file, sizeof(socket_file), "/tmp/.X11-unix/X%d", client->xvfb_display_num);
 
-        // Clean up stale lock if process is dead
-        FILE *f = fopen("/tmp/.X99-lock", "r");
+        // Clean up a stale lock only if the owning process is already dead
+        FILE *f = fopen(lock_file, "r");
         if (f) {
             pid_t pid = -1;
             fscanf(f, "%d", &pid);
             fclose(f);
-            if (pid > 0 && kill(pid, 0) != 0)
-                unlink("/tmp/.X99-lock");
+            if (pid > 0 && kill(pid, 0) != 0) {
+                unlink(lock_file);
+                unlink(socket_file);
+            }
         }
 
         client->xvfb_pid = fork();
         if (client->xvfb_pid == 0) {
             close(STDOUT_FILENO);
             close(STDERR_FILENO);
-            execlp("Xvfb", "Xvfb", ":99", "-screen", "0", "1280x720x24", "+extension", "GLX", "-ac", "-noreset", NULL);
+            execlp("Xvfb", "Xvfb", display_str, "-screen", "0", "1280x720x24", "+extension", "GLX", "-ac", "-noreset",
+                   NULL);
             _exit(1);
         }
 
-        setenv("DISPLAY", ":99", 1);
+        setenv("DISPLAY", display_str, 1);
         // Xvfb starts asynchronously after fork(), so we poll until it creates its
         // lock file (max 2s) then wait an extra 200ms for GLX to finish initializing.
         // Without this, raylib's InitWindow() would try to connect before Xvfb is ready.
-        for (int i = 0; i < 20 && access("/tmp/.X99-lock", F_OK) != 0; i++)
+        for (int i = 0; i < 20 && access(lock_file, F_OK) != 0; i++)
             usleep(100000);
         usleep(200000);
     }
@@ -4510,7 +4512,11 @@ void close_client(Client *client) {
     if (client->xvfb_pid > 0) {
         kill(client->xvfb_pid, SIGTERM);
         waitpid(client->xvfb_pid, NULL, 0);
-        unlink("/tmp/.X99-lock");
+        char lock_file[32], socket_file[32];
+        snprintf(lock_file, sizeof(lock_file), "/tmp/.X%d-lock", client->xvfb_display_num);
+        snprintf(socket_file, sizeof(socket_file), "/tmp/.X11-unix/X%d", client->xvfb_display_num);
+        unlink(lock_file);
+        unlink(socket_file);
         unsetenv("DISPLAY");
     }
 
diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py
index 6ca81b885..46726a942 100644
--- a/pufferlib/ocean/drive/drive.py
+++ b/pufferlib/ocean/drive/drive.py
@@ -606,8 +606,6 @@ def resample_maps(self):
             self.env_ids.append(env_id)
         self.c_envs = binding.vectorize(*self.env_ids)
 
-        self.c_envs = binding.vectorize(*self.env_ids)
-
         binding.vec_reset(self.c_envs, seed)
         self.truncations[:] = 1
 
diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h
index b6c89712a..86be97971 100644
--- a/pufferlib/ocean/env_binding.h
+++ b/pufferlib/ocean/env_binding.h
@@ -1114,6 +1114,7 @@ static PyMethodDef methods[] = {
      "Get road edge polyline counts from vectorized env"},
     {"vec_get_road_edge_polylines", vec_get_road_edge_polylines, METH_VARARGS,
      "Get road edge polylines from vectorized env"},
+    {"env_log", env_log, METH_VARARGS, "Log a single environment"},
     MY_METHODS,
     {NULL, NULL, 0, NULL}};
 
diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py
index 5fd632808..31dbda7d8 100644
--- a/pufferlib/pufferl.py
+++ b/pufferlib/pufferl.py
@@ -128,7 +128,7 @@ def __init__(self, config, vecenv, policy, logger=None, full_args=None):
         self.ep_lengths = torch.zeros(total_agents, device=device, dtype=torch.int32)
         self.ep_indices = torch.arange(total_agents, device=device, dtype=torch.int32)
         self.free_idx = total_agents
-        self.render_interval = config["eval"]["eval_interval"]
+        self.eval_interval = config["eval"]["eval_interval"]
 
         # LSTM
         if config["use_rnn"]:
@@ -532,7 +532,7 @@ def train(self):
 
         safe_eval_config = self.config.get("safe_eval", {})
         safe_eval_enabled = safe_eval_config.get("enabled", False)
-        safe_eval_interval = int(safe_eval_config.get("interval", self.render_interval))
+        safe_eval_interval = int(safe_eval_config.get("interval", self.eval_interval))
         is_main = not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0
         if (
             is_main
@@ -1215,7 +1215,7 @@ def eval(env_name, args=None, vecenv=None, policy=None):
                 if frame_count >= max_frames or done.all() or truncated.all():
                     break
 
-            vecenv.close()
+        vecenv.close()
 
 
 def sweep(args=None, env_name=None):

From e61ccecda440b514f8715f84adf0a8312574e3b5 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@users.noreply.github.com>
Date: Sun, 29 Mar 2026 20:13:24 -0400
Subject: [PATCH 13/23] Update pufferlib/pufferl.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 pufferlib/pufferl.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py
index 31dbda7d8..d573ef37c 100644
--- a/pufferlib/pufferl.py
+++ b/pufferlib/pufferl.py
@@ -128,7 +128,8 @@ def __init__(self, config, vecenv, policy, logger=None, full_args=None):
         self.ep_lengths = torch.zeros(total_agents, device=device, dtype=torch.int32)
         self.ep_indices = torch.arange(total_agents, device=device, dtype=torch.int32)
         self.free_idx = total_agents
-        self.eval_interval = config["eval"]["eval_interval"]
+        # Use a safe default when eval settings are absent
+        self.eval_interval = config.get("eval", {}).get("eval_interval", 0)
 
         # LSTM
         if config["use_rnn"]:

From da25579e0871d53e52378d03dc568fc884fa6371 Mon Sep 17 00:00:00 2001
From: Aditya Gupta <adigupta2602@gmail.com>
Date: Sun, 5 Apr 2026 10:03:16 -0400
Subject: [PATCH 14/23] Some bug fixes

---
 pufferlib/ocean/env_config.h | 2 ++
 pufferlib/pufferl.py         | 8 +++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/pufferlib/ocean/env_config.h b/pufferlib/ocean/env_config.h
index 0d83d9762..8aebe15ea 100644
--- a/pufferlib/ocean/env_config.h
+++ b/pufferlib/ocean/env_config.h
@@ -111,6 +111,8 @@ static int handler(void *config, const char *section, const char *name, const ch
             printf("Warning: Unknown dynamics_model value '%s', defaulting to JERK\n", value);
             env_config->dynamics_model = 1; // Default to JERK
         }
+    } else if (MATCH("env", "render_mode")) {
+        env_config->render_mode = atoi(value);
     } else if (MATCH("env", "goal_behavior")) {
         env_config->goal_behavior = atoi(value);
     } else if (MATCH("env", "reward_randomization")) {
diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py
index d573ef37c..23a5025ac 100644
--- a/pufferlib/pufferl.py
+++ b/pufferlib/pufferl.py
@@ -514,12 +514,12 @@ def train(self):
 
                 self.evaluator = Evaluator(self.full_args, self.logger)
                 if human_replay_eval:
-                    self.evaluator.hr_env = load_env("puffer_drive", self.evaluator.hr_eval_config)
+                    self.evaluator.hr_env = load_env(self.config["env"], self.evaluator.hr_eval_config)
                     self.evaluator.rollout(self.uncompiled_policy, mode="human_replay")
                     self.evaluator.hr_env.close()
                     self.evaluator.log_videos(eval_mode="human_replay", epoch=self.epoch)
                 if self_play_eval:
-                    self.evaluator.sp_env = load_env("puffer_drive", self.evaluator.sp_eval_config)
+                    self.evaluator.sp_env = load_env(self.config["env"], self.evaluator.sp_eval_config)
                     self.evaluator.rollout(self.uncompiled_policy, mode="self_play")
                     self.evaluator.sp_env.close()
                     self.evaluator.log_videos(eval_mode="self_play", epoch=self.epoch)
@@ -528,7 +528,9 @@ def train(self):
 
                 del self.evaluator
 
-            if self.config["eval"]["wosac_realism_eval"]:
+            if self.config["eval"]["wosac_realism_eval"] and (
+                (self.epoch) % self.config["eval"]["eval_interval"] == 0 or done_training
+            ):
                 pufferlib.utils.run_wosac_eval_in_subprocess(self.config, self.logger, self.global_step)
 
         safe_eval_config = self.config.get("safe_eval", {})

From 04f9304c7e30f31401f1025679061ad618e9f95f Mon Sep 17 00:00:00 2001
From: Aditya Gupta <adigupta2602@gmail.com>
Date: Sun, 5 Apr 2026 10:26:57 -0400
Subject: [PATCH 15/23] Updating over latest 3.0 and some minor bug fixes

---
 pufferlib/ocean/drive/drive.h | 13 +++----------
 pufferlib/ocean/env_binding.h | 21 +++++++++++++++++++--
 2 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index 79f06e940..4336b0161 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -128,8 +128,8 @@
 #define MAX_CHECKED_LANES 32
 
 // Ego features depend on dynamics model
-#define EGO_FEATURES_CLASSIC 14
-#define EGO_FEATURES_JERK 17
+#define EGO_FEATURES_CLASSIC 13
+#define EGO_FEATURES_JERK 16
 
 // Observation normalization constants
 #define MAX_SPEED 100.0f
@@ -1505,11 +1505,6 @@ int collision_check(Drive *env, int agent_idx) {
     if (agent->sim_x == INVALID_POSITION)
         return -1;
 
-    // Skip collision checking for pedestrians because they are often too
-    // close to other entities at initialization.
-    if (agent->type == PEDESTRIAN)
-        return -1;
-
     int car_collided_with_index = -1;
 
     if (agent->respawn_timestep != -1)
@@ -1604,7 +1599,7 @@ static bool check_offroad(Drive *env, Agent *agent) {
         entity = &env->road_elements[entity_list[i].entity_idx];
 
         // Check for offroad collision with road edges (only for vehicles and cyclists)
-        if (entity->type == ROAD_EDGE && agent->type != PEDESTRIAN) {
+        if (entity->type == ROAD_EDGE) {
             int geometry_idx = entity_list[i].geometry_idx;
             if (entity->z[geometry_idx] > agent->sim_z + agent->sim_height / 2.0f ||
                 entity->z[geometry_idx] < agent->sim_z - agent->sim_height / 2.0f)
@@ -2805,7 +2800,6 @@ void compute_observations(Drive *env) {
             obs[13] = fminf(SPEED_LIMIT / MAX_SPEED, 1.0f);
             obs[14] = lane_center_dist;
             obs[15] = ego_entity->metrics_array[LANE_ANGLE_IDX];
-            obs[16] = ego_entity->type / 3.0f;
         } else {
             obs[7] = (ego_entity->respawn_timestep != -1) ? 1 : 0;
             obs[8] = normalized_goal_speed_min;
@@ -2813,7 +2807,6 @@ void compute_observations(Drive *env) {
             obs[10] = fminf(SPEED_LIMIT / MAX_SPEED, 1.0f);
             obs[11] = lane_center_dist;
             obs[12] = ego_entity->metrics_array[LANE_ANGLE_IDX];
-            obs[13] = ego_entity->type / 3.0f;
         }
         int obs_idx = (env->reward_conditioning == 1) ? ego_dim - NUM_REWARD_COEFS : ego_dim;
         // Placeholder for reward conditioning encoder -
diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h
index 86be97971..d4090ea46 100644
--- a/pufferlib/ocean/env_binding.h
+++ b/pufferlib/ocean/env_binding.h
@@ -702,6 +702,9 @@ static PyObject *env_log(PyObject *self, PyObject *args) {
         return NULL;
     }
 
+    PyObject *num_agents_arg = PyTuple_GetItem(args, 1);
+    float num_agents = (float)PyLong_AsLong(num_agents_arg);
+
     // Aggregate this env's per-agent logs (same as vec_log but for one env)
     // Note: breaks horribly if you don't use floats
     Log aggregate = {0};
@@ -711,18 +714,32 @@ static PyObject *env_log(PyObject *self, PyObject *args) {
     }
 
     PyObject *dict = PyDict_New();
-    if (aggregate.n == 0.0f) {
+
+    // Mirror vec_log: only report when enough data has accumulated
+    if (aggregate.n < num_agents) {
         return dict;
     }
 
+    // Reset log now that we've consumed it, mirroring vec_log
+    for (int j = 0; j < num_keys; j++) {
+        ((float *)&env->log)[j] = 0.0f;
+    }
+
     // Average across agents in env
     float n = aggregate.n;
     for (int i = 0; i < num_keys; i++) {
         ((float *)&aggregate)[i] /= n;
     }
-    aggregate.n = (float)env->active_agent_count;
+
+    // Compute completion_rate from raw counts (mirrors vec_log)
+    if (aggregate.goals_attempted_this_episode > 0.0f) {
+        aggregate.completion_rate = aggregate.goals_reached_this_episode / aggregate.goals_attempted_this_episode;
+    } else {
+        aggregate.completion_rate = 0.0f;
+    }
 
     my_log(dict, &aggregate);
+    assign_to_dict(dict, "n", n);
 
     return dict;
 }

From 293f9d2b1ffcef7f29888da11e40ec3adc17c5b1 Mon Sep 17 00:00:00 2001
From: Aditya Gupta <adigupta2602@gmail.com>
Date: Tue, 7 Apr 2026 11:20:40 -0400
Subject: [PATCH 16/23] Adding safe eval renders

---
 pufferlib/ocean/benchmark/evaluator.py | 105 ++++++++++++++++++++++++-
 pufferlib/pufferl.py                   |   9 ++-
 2 files changed, 112 insertions(+), 2 deletions(-)

diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py
index 49ac86f97..8a416d2e3 100644
--- a/pufferlib/ocean/benchmark/evaluator.py
+++ b/pufferlib/ocean/benchmark/evaluator.py
@@ -1111,7 +1111,7 @@ class SafeEvaluator:
     args dict — only the env_name, safe_eval config, and device.
     """
 
-    def __init__(self, env_name: str, safe_eval_config: Dict, device="cuda", logger=None):
+    def __init__(self, env_name: str, safe_eval_config: Dict, device="cuda", logger=None, full_config=None):
         self.env_name = env_name
         self.logger = logger
         self.safe_eval_config = safe_eval_config
@@ -1122,6 +1122,30 @@ def __init__(self, env_name: str, safe_eval_config: Dict, device="cuda", logger=
             device = f"cuda:{device}"
         self.device = device
         self.stats = None
+        self.render_safe_eval = safe_eval_config.get("render_safe_eval", False)
+        # Resolve view modes from [eval].render_view_mode
+        from pufferlib.ocean.drive.drive import RenderView
+
+        _VIEW_MODE_MAP = {
+            "sim_state": [RenderView.FULL_SIM_STATE],
+            "topdown": [RenderView.FULL_SIM_STATE],
+            "bev": [RenderView.BEV_AGENT_OBS],
+            "agent": [RenderView.BEV_AGENT_OBS],
+            "persp": [RenderView.AGENT_PERSP],
+            "both": [RenderView.FULL_SIM_STATE, RenderView.AGENT_PERSP],
+            "all": [RenderView.FULL_SIM_STATE, RenderView.AGENT_PERSP, RenderView.BEV_AGENT_OBS],
+        }
+        self._view_suffix = {
+            RenderView.FULL_SIM_STATE: "sim_state",
+            RenderView.AGENT_PERSP: "persp",
+            RenderView.BEV_AGENT_OBS: "bev",
+        }
+        view_mode_str = "sim_state"
+        if full_config is not None:
+            view_mode_str = (
+                str(full_config.get("eval", {}).get("render_view_mode", "sim_state")).lower().strip('"').strip("'")
+            )
+        self.render_view_modes = _VIEW_MODE_MAP.get(view_mode_str, [RenderView.FULL_SIM_STATE])
 
     def _build_eval_env_config(self):
         """Build env config with safe reward conditioning values applied.
@@ -1234,6 +1258,85 @@ def evaluate(self, vecenv, policy):
         self.stats = {k: float(np.mean(v)) for k, v in all_stats.items() if len(v) > 0}
         return self.stats
 
+    def render(self, eval_config, policy):
+        """Run a single rollout with rendering enabled, one env per view mode.
+
+        Always renders env index 0 (first env). Uses [eval].render_view_mode.
+        Produces .mp4 files on disk (flushed when each render_env is closed).
+        """
+        from pufferlib.ocean.drive.drive import RenderView
+        from pufferlib.pufferl import load_env
+        import copy
+
+        multi_view = len(self.render_view_modes) > 1
+        for view_mode in self.render_view_modes:
+            suffix = f"_{self._view_suffix[view_mode]}" if multi_view else ""
+            render_cfg = copy.deepcopy(eval_config)
+            render_cfg["env"]["render_mode"] = 1
+            render_env = load_env(self.env_name, render_cfg)
+            driver = render_env.driver_env
+            if suffix:
+                driver.set_video_suffix(suffix, env_id=0)
+            try:
+                num_agents = render_env.observation_space.shape[0]
+                use_rnn = hasattr(policy, "hidden_size")
+                ob, _ = render_env.reset()
+                state = {}
+                if use_rnn:
+                    state = dict(
+                        lstm_h=torch.zeros(num_agents, policy.hidden_size, device=self.device),
+                        lstm_c=torch.zeros(num_agents, policy.hidden_size, device=self.device),
+                    )
+                for _ in range(self.episode_length):
+                    driver.render(view_mode=view_mode, env_id=0)
+                    with torch.no_grad():
+                        ob_t = torch.as_tensor(ob).to(self.device)
+                        logits, value = policy.forward_eval(ob_t, state)
+                        action, _, _ = pufferlib.pytorch.sample_logits(logits)
+                        action_np = action.cpu().numpy().reshape(render_env.action_space.shape)
+                    if isinstance(logits, torch.distributions.Normal):
+                        action_np = np.clip(action_np, render_env.action_space.low, render_env.action_space.high)
+                    ob, _, _, truncs, _ = render_env.step(action_np)
+                    if truncs.all():
+                        break
+            finally:
+                render_env.close()
+
+    def log_videos(self, epoch):
+        """Glob .mp4 files produced by render() and log them to wandb under render/safe_eval."""
+        import os
+        import glob
+
+        if not (self.logger and hasattr(self.logger, "wandb") and self.logger.wandb):
+            for p in glob.glob("*.mp4"):
+                os.remove(p)
+            return
+
+        import wandb
+
+        video_files = glob.glob("*.mp4")
+        if not video_files:
+            print("Warning: safe_eval render produced no mp4 files")
+            return
+
+        multi_view = len(self.render_view_modes) > 1
+        _known_suffixes = {"_sim_state", "_persp", "_bev"}
+        for p in video_files:
+            stem = os.path.splitext(os.path.basename(p))[0]
+            view_tag = ""
+            if multi_view:
+                for s in _known_suffixes:
+                    if stem.endswith(s):
+                        view_tag = s[1:]
+                        stem = stem[: -len(s)]
+                        break
+            caption = f"scene_{stem}_epoch_{epoch}_safe_eval"
+            wandb_key = f"render/safe_eval/{view_tag}" if view_tag else "render/safe_eval"
+            self.logger.wandb.log({wandb_key: wandb.Video(p, format="mp4", caption=caption)})
+
+        for p in video_files:
+            os.remove(p)
+
     def log_stats(self, global_step=None):
         """Log collected metrics to wandb."""
         if self.stats is None:
diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py
index 23a5025ac..6aba814b7 100644
--- a/pufferlib/pufferl.py
+++ b/pufferlib/pufferl.py
@@ -557,7 +557,9 @@ def _run_safe_eval(self):
             self.msg = "Running safe eval..."
             env_name = self.config["env"]
             safe_eval_config = self.config.get("safe_eval", {})
-            evaluator = SafeEvaluator(env_name, safe_eval_config, device=self.config["device"], logger=self.logger)
+            evaluator = SafeEvaluator(
+                env_name, safe_eval_config, device=self.config["device"], logger=self.logger, full_config=self.config
+            )
             eval_config = evaluator._build_eval_env_config()
 
             vecenv = load_env(env_name, eval_config)
@@ -572,6 +574,11 @@ def _run_safe_eval(self):
             metrics = evaluator.evaluate(vecenv, policy)
             evaluator.log_stats(global_step=self.global_step)
 
+            if evaluator.render_safe_eval:
+                self.msg = "Rendering safe eval..."
+                evaluator.render(eval_config, policy)
+                evaluator.log_videos(epoch=self.epoch)
+
             self.msg = f"Safe eval: {len(metrics)} metrics logged"
         except Exception as e:
             self.msg = f"Safe eval failed: {e}"

From 90361f54ada6cc17358667a4621c7962a663ec95 Mon Sep 17 00:00:00 2001
From: Aditya Gupta <adigupta2602@gmail.com>
Date: Tue, 7 Apr 2026 12:07:34 -0400
Subject: [PATCH 17/23] Fixing some episoden length related bugs

---
 pufferlib/ocean/benchmark/evaluator.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py
index 8a416d2e3..c54ce2406 100644
--- a/pufferlib/ocean/benchmark/evaluator.py
+++ b/pufferlib/ocean/benchmark/evaluator.py
@@ -835,7 +835,6 @@ class Evaluator:
     def __init__(self, configs, logger=None):
         self.configs = configs
         self.logger = logger
-        self.sim_steps = 90
         self.self_play_stats = None
         self.human_replay_stats = None
         self.sp_env = None
@@ -867,8 +866,6 @@ def _unpack_eval_configs(self, configs):
         backend = eval_config["eval"].get("backend", "PufferEnv")
         eval_config["env"]["map_dir"] = eval_config["eval"]["map_dir"]
         eval_config["env"]["num_agents"] = eval_config["eval"]["num_eval_agents"]
-        if self.configs["eval"]["human_replay_eval"]:
-            eval_config["env"]["episode_length"] = 91  # WOMD scenario length
         eval_config["vec"] = dict(backend=backend, num_envs=1)
 
         self.render_sp_rollout = self.configs["eval"]["render_self_play_eval"]
@@ -883,6 +880,8 @@ def _unpack_eval_configs(self, configs):
         self.hr_eval_config["env"]["render_mode"] = (
             0  # primary env: stats only; render envs created per-view in rollout()
         )
+        if self.configs["eval"]["human_replay_eval"]:
+            self.hr_eval_config["env"]["episode_length"] = 91  # WOMD scenario length
 
         self.sp_eval_config = copy.deepcopy(eval_config)
         self.sp_eval_config["env"]["control_mode"] = "control_agents"
@@ -1011,7 +1010,8 @@ def _run_rollout(self, policy, env, render_env_idx=None, per_env_logs=False, vie
             )
 
         info_list = []
-        for time_idx in range(self.sim_steps):
+        episode_length = env.driver_env.episode_length
+        for time_idx in range(episode_length):
             if render_env_idx is not None:
                 driver.render(view_mode=view_mode, env_id=render_env_idx)
 

From eb2c53038500e082bfcdc8bf8bb2a370bf993204 Mon Sep 17 00:00:00 2001
From: Aditya Gupta <adigupta2602@gmail.com>
Date: Tue, 7 Apr 2026 12:44:58 -0400
Subject: [PATCH 18/23] Fixing bugs with human replay with variable agent mode

---
 pufferlib/config/ocean/drive.ini       | 15 +++++++++------
 pufferlib/ocean/benchmark/evaluator.py | 24 +++++++++++++++++++++---
 2 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index 2a3eab604..5da2a7f15 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -180,14 +180,21 @@ checkpoint_interval = 1000
 
 [eval]
 eval_interval = 1000
-; Path to dataset used for evaluation
-map_dir = "resources/drive/binaries/training"
+; Map directory for self-play evaluation (Carla maps)
+sp_map_dir = "resources/drive/binaries/carla_2D"
+; Map directory for human replay evaluation (WOMD training scenarios)
+hr_map_dir = "resources/drive/binaries/training"
 
+; Number of agents for self-play evaluation
 num_eval_agents = 64
+; Number of agents for human replay evaluation (one SDC per scenario)
+human_replay_num_agents = 16
 ; If True, enable self-play evaluation (pair policy-controlled agent with a copy of itself)
 self_play_eval = True
 ; If True, enable human replay evaluation (pair policy-controlled agent with human replays)
 human_replay_eval = False
+; Control mode for human replay: "control_sdc_only" controls only the SDC; others replay logged trajectories
+human_replay_control_mode = "control_sdc_only"
 ; Which env to render during eval. Options: "first" (by index), "worst_collision", "random"
 render_select_mode = "first"
 ; View mode(s) for eval rendering. Options: sim_state, bev, persp, both (sim_state+persp), all (sim_state+persp+bev)
@@ -227,10 +234,6 @@ wosac_sanity_check = False
 wosac_aggregate_results = True
 ; Evaluation mode: "policy", "ground_truth"
 wosac_eval_mode = "policy"
-; Control only the self-driving car
-human_replay_control_mode = "control_sdc_only"
-; Number of scenarios for human replay evaluation equals the number of agents
-human_replay_num_agents = 16
 
 [safe_eval]
 ; If True, periodically run policy with safe/law-abiding reward conditioning and log metrics
diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py
index c54ce2406..3ebe97c38 100644
--- a/pufferlib/ocean/benchmark/evaluator.py
+++ b/pufferlib/ocean/benchmark/evaluator.py
@@ -864,8 +864,6 @@ def _unpack_eval_configs(self, configs):
         # Create separate evaluation environments based on specified configs
         eval_config["env"]["termination_mode"] = 0
         backend = eval_config["eval"].get("backend", "PufferEnv")
-        eval_config["env"]["map_dir"] = eval_config["eval"]["map_dir"]
-        eval_config["env"]["num_agents"] = eval_config["eval"]["num_eval_agents"]
         eval_config["vec"] = dict(backend=backend, num_envs=1)
 
         self.render_sp_rollout = self.configs["eval"]["render_self_play_eval"]
@@ -875,15 +873,35 @@ def _unpack_eval_configs(self, configs):
         self.render_view_modes = _VIEW_MODE_MAP.get(view_mode_str, [RenderView.FULL_SIM_STATE])
         self.render_view_suffix = _VIEW_SUFFIX
 
+        # --- Human replay config ---
+        hr_control_mode = (
+            str(self.configs["eval"].get("human_replay_control_mode", "control_sdc_only")).strip('"').strip("'")
+        )
+        hr_num_agents = int(self.configs["eval"].get("human_replay_num_agents", 16))
+        hr_map_dir = (
+            str(self.configs["eval"].get("hr_map_dir", "resources/drive/binaries/training")).strip('"').strip("'")
+        )
+
         self.hr_eval_config = copy.deepcopy(eval_config)
-        self.hr_eval_config["env"]["control_mode"] = "control_sdc_only"
+        self.hr_eval_config["env"]["map_dir"] = hr_map_dir
+        self.hr_eval_config["env"]["num_agents"] = hr_num_agents
+        self.hr_eval_config["env"]["control_mode"] = hr_control_mode
+        self.hr_eval_config["env"]["init_mode"] = "create_all_valid"
         self.hr_eval_config["env"]["render_mode"] = (
             0  # primary env: stats only; render envs created per-view in rollout()
         )
         if self.configs["eval"]["human_replay_eval"]:
             self.hr_eval_config["env"]["episode_length"] = 91  # WOMD scenario length
 
+        # --- Self-play config ---
+        sp_map_dir = (
+            str(self.configs["eval"].get("sp_map_dir", "resources/drive/binaries/carla_2D")).strip('"').strip("'")
+        )
+        sp_num_agents = int(self.configs["eval"].get("num_eval_agents", 64))
+
         self.sp_eval_config = copy.deepcopy(eval_config)
+        self.sp_eval_config["env"]["map_dir"] = sp_map_dir
+        self.sp_eval_config["env"]["num_agents"] = sp_num_agents
         self.sp_eval_config["env"]["control_mode"] = "control_agents"
         self.sp_eval_config["env"]["render_mode"] = (
             0  # primary env: stats only; render envs created per-view in rollout()

From be85a6f6a236c4e319ed0b283ef93135624d96eb Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@users.noreply.github.com>
Date: Sat, 11 Apr 2026 10:26:57 -0400
Subject: [PATCH 19/23] Update drive.ini

nit: spell check
---
 pufferlib/config/ocean/drive.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index 5da2a7f15..6453d0bb9 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -279,7 +279,7 @@ acc = 1.0
 
 [render]
 ; Render a batch of maps offline using the in-process c_render (ffmpeg) pipeline.
-; Path to the .bin map directoryz
+; Path to the .bin map directory
 map_dir = "resources/drive/binaries/carla_2D"
 ; Directory to write output mp4 files
 output_dir = "resources/drive/render_videos"

From 849be1f92290bb463f6993a0cf9a0ac2163999e5 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@users.noreply.github.com>
Date: Sat, 11 Apr 2026 10:27:46 -0400
Subject: [PATCH 20/23] Update drive.ini

turn on safe eval render
---
 pufferlib/config/ocean/drive.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index 6453d0bb9..9ff2fa63d 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -240,7 +240,7 @@ wosac_eval_mode = "policy"
 enabled = True
 ; How often to run safe eval (in training epochs). Defaults to render_interval.
 interval = 250
-render_safe_eval = False
+render_safe_eval = True
 ; Number of agents to run in the eval environment
 num_agents = 50
 ; Number of episodes to collect metrics over

From f418fc10f4eee30e35777c4e482762571a94d23d Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@users.noreply.github.com>
Date: Sat, 11 Apr 2026 10:29:25 -0400
Subject: [PATCH 21/23] Update drive.h

remove stale comment
---
 pufferlib/ocean/drive/drive.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index 4336b0161..741a9307e 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -2629,7 +2629,7 @@ void compute_agent_metrics(Drive *env, int agent_idx) {
     // FORMAT COMES IN
     agent->metrics_array[SPEED_LIMIT_IDX] = (speed_magnitude > SPEED_LIMIT + 2.0f) ? 1.0f : 0.0f;
 
-    // Check for vehicle collisions (skip for pedestrians)
+    // Check for vehicle collisions
     int car_collided_with_index = -1;
     car_collided_with_index = collision_check(env, agent_idx);
     if (car_collided_with_index != -1) {

From face788be77a6f03631bae4acb220568589446b3 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@users.noreply.github.com>
Date: Sat, 11 Apr 2026 10:31:28 -0400
Subject: [PATCH 22/23] Update drive.ini

render should not override training settings
---
 pufferlib/config/ocean/drive.ini | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index 9ff2fa63d..e68a53185 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -291,12 +291,6 @@ num_maps = 3
 view_mode = "persp"
 ; Whether to draw agent trajectory traces
 draw_traces = True
-; Maximum number of frames to render per map
-max_frames = 91
-; Override [env] init/control modes for rendering logged-trajectory (carla_2D) maps.
-; carla_2D maps carry recorded vehicle trajectories — use create_all_valid + control_vehicles.
-init_mode = "init_variable_agent_number"
-control_mode = "control_vehicles"
 ; Allows more than cpu cores workers for rendering
 overwork = True
 ; Display human xy logs in the background

From 02d8ecf89895597b70095da634876c923f7b79e0 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@users.noreply.github.com>
Date: Sat, 11 Apr 2026 14:10:56 -0400
Subject: [PATCH 23/23] Unify rollout loop across Evaluator, render(), and
 SafeEvaluator (#395)

Unify rollout loop across Evaluator, render(), and SafeEvaluator
---
 pufferlib/ocean/benchmark/evaluator.py | 119 ++++++++-------------
 pufferlib/ocean/drive/rollout.py       | 142 +++++++++++++++++++++++++
 pufferlib/pufferl.py                   |  64 ++++++-----
 3 files changed, 217 insertions(+), 108 deletions(-)
 create mode 100644 pufferlib/ocean/drive/rollout.py

diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py
index 3ebe97c38..55b0f5a3e 100644
--- a/pufferlib/ocean/benchmark/evaluator.py
+++ b/pufferlib/ocean/benchmark/evaluator.py
@@ -1000,57 +1000,29 @@ def rollout(self, policy, mode="self_play"):
     def _run_rollout(self, policy, env, render_env_idx=None, per_env_logs=False, view_mode=None, view_suffix=""):
         """Run a single rollout. If render_env_idx is not None, render that env.
 
-        view_mode: RenderView enum to use when rendering (defaults to FULL_SIM_STATE).
-        view_suffix: appended to the mp4 filename when logging multiple views, e.g. "_bev".
+        Thin wrapper around :func:`pufferlib.ocean.drive.rollout.rollout_loop`.
+        The shared helper owns the actual forward-sample-step-break loop; this
+        method just builds the RenderContext when rendering is requested.
         """
         from pufferlib.ocean.drive.drive import RenderView
-
-        driver = env.driver_env
-        num_agents = env.observation_space.shape[0]
-        device = self.configs["train"]["device"]
-        if view_mode is None:
-            view_mode = RenderView.FULL_SIM_STATE
-
-        # Set video filename suffix in C before the first render call of this rollout.
-        # This ensures each view produces a uniquely named mp4 (e.g. {scenario_id}_bev.mp4).
-        if render_env_idx is not None and view_suffix:
-            driver.set_video_suffix(view_suffix, env_id=render_env_idx)
-
-        # Reset environment
-        obs, info = env.reset()
-
-        # Initialize RNN state if needed
-        state = {}
-        if self.configs["train"]["use_rnn"]:
-            state = dict(
-                lstm_h=torch.zeros(num_agents, policy.hidden_size, device=device),
-                lstm_c=torch.zeros(num_agents, policy.hidden_size, device=device),
+        from pufferlib.ocean.drive.rollout import RenderContext, rollout_loop
+
+        render_ctx = None
+        if render_env_idx is not None:
+            render_ctx = RenderContext(
+                view_mode=view_mode if view_mode is not None else RenderView.FULL_SIM_STATE,
+                env_id=render_env_idx,
+                video_suffix=view_suffix,
             )
 
-        info_list = []
-        episode_length = env.driver_env.episode_length
-        for time_idx in range(episode_length):
-            if render_env_idx is not None:
-                driver.render(view_mode=view_mode, env_id=render_env_idx)
-
-            # Get action from policy
-            with torch.no_grad():
-                ob_tensor = torch.as_tensor(obs).to(device)
-                logits, value = policy.forward_eval(ob_tensor, state)
-                action, logprob, _ = pufferlib.pytorch.sample_logits(logits)
-                action_np = action.cpu().numpy().reshape(env.action_space.shape)
-
-            # Clip continuous actions to valid range
-            if isinstance(logits, torch.distributions.Normal):
-                action_np = np.clip(action_np, env.action_space.low, env.action_space.high)
-
-            # Step environment
-            obs, rewards, dones, truncs, info_list = env.step(action_np, per_env_logs=per_env_logs)
-
-            if truncs.all():
-                break
-
-        return info_list
+        return rollout_loop(
+            policy=policy,
+            env=env,
+            device=self.configs["train"]["device"],
+            use_rnn=self.configs["train"]["use_rnn"],
+            render_ctx=render_ctx,
+            per_env_logs=per_env_logs,
+        )
 
     def log_videos(self, eval_mode, epoch):
         """Log all mp4s in local path to wandb after env close has flushed ffmpeg pipes."""
@@ -1165,6 +1137,14 @@ def __init__(self, env_name: str, safe_eval_config: Dict, device="cuda", logger=
             )
         self.render_view_modes = _VIEW_MODE_MAP.get(view_mode_str, [RenderView.FULL_SIM_STATE])
 
+        # Authoritative RNN flag comes from `full_config`, which PuffeRL passes
+        # in flattened form for this evaluator. In this code path the flag
+        # lives at `full_config["use_rnn"]` (no `train` nesting) — it is set
+        # during config loading from `rnn_name`. We previously used
+        # hasattr(policy, "hidden_size") as a proxy, which is fragile because
+        # non-RNN policies can also expose hidden_size.
+        self.use_rnn = bool(full_config.get("use_rnn", False)) if full_config is not None else False
+
     def _build_eval_env_config(self):
         """Build env config with safe reward conditioning values applied.
 
@@ -1227,7 +1207,7 @@ def evaluate(self, vecenv, policy):
 
         policy.eval()
         num_agents = vecenv.observation_space.shape[0]
-        use_rnn = hasattr(policy, "hidden_size")
+        use_rnn = self.use_rnn
 
         ob, _ = vecenv.reset()
         state = {}
@@ -1281,42 +1261,33 @@ def render(self, eval_config, policy):
 
         Always renders env index 0 (first env). Uses [eval].render_view_mode.
         Produces .mp4 files on disk (flushed when each render_env is closed).
+
+        Thin wrapper around :func:`pufferlib.ocean.drive.rollout.rollout_loop`.
         """
-        from pufferlib.ocean.drive.drive import RenderView
-        from pufferlib.pufferl import load_env
         import copy
 
+        from pufferlib.pufferl import load_env
+        from pufferlib.ocean.drive.rollout import RenderContext, rollout_loop
+
         multi_view = len(self.render_view_modes) > 1
         for view_mode in self.render_view_modes:
             suffix = f"_{self._view_suffix[view_mode]}" if multi_view else ""
             render_cfg = copy.deepcopy(eval_config)
             render_cfg["env"]["render_mode"] = 1
             render_env = load_env(self.env_name, render_cfg)
-            driver = render_env.driver_env
-            if suffix:
-                driver.set_video_suffix(suffix, env_id=0)
             try:
-                num_agents = render_env.observation_space.shape[0]
-                use_rnn = hasattr(policy, "hidden_size")
-                ob, _ = render_env.reset()
-                state = {}
-                if use_rnn:
-                    state = dict(
-                        lstm_h=torch.zeros(num_agents, policy.hidden_size, device=self.device),
-                        lstm_c=torch.zeros(num_agents, policy.hidden_size, device=self.device),
-                    )
-                for _ in range(self.episode_length):
-                    driver.render(view_mode=view_mode, env_id=0)
-                    with torch.no_grad():
-                        ob_t = torch.as_tensor(ob).to(self.device)
-                        logits, value = policy.forward_eval(ob_t, state)
-                        action, _, _ = pufferlib.pytorch.sample_logits(logits)
-                        action_np = action.cpu().numpy().reshape(render_env.action_space.shape)
-                    if isinstance(logits, torch.distributions.Normal):
-                        action_np = np.clip(action_np, render_env.action_space.low, render_env.action_space.high)
-                    ob, _, _, truncs, _ = render_env.step(action_np)
-                    if truncs.all():
-                        break
+                rollout_loop(
+                    policy=policy,
+                    env=render_env,
+                    device=self.device,
+                    use_rnn=self.use_rnn,
+                    max_steps=self.episode_length,
+                    render_ctx=RenderContext(
+                        view_mode=view_mode,
+                        env_id=0,
+                        video_suffix=suffix,
+                    ),
+                )
             finally:
                 render_env.close()
 
diff --git a/pufferlib/ocean/drive/rollout.py b/pufferlib/ocean/drive/rollout.py
new file mode 100644
index 000000000..5c5c1a29c
--- /dev/null
+++ b/pufferlib/ocean/drive/rollout.py
@@ -0,0 +1,142 @@
+"""Shared rollout loop for Drive evaluation and rendering.
+
+Single source of truth for the forward-sample-step-break cycle. Used by:
+  - ``Evaluator._run_rollout`` — periodic training eval, optional env-selective render
+  - ``pufferl.render`` — offline batch rendering, one video per map
+  - ``SafeEvaluator.render`` — safe-eval-time rendering
+
+Extracting this eliminates three hand-maintained copies of the same loop that
+had drifted in subtle ways:
+  * ``render_one_map`` was missing continuous-action clipping
+  * ``render_one_map`` used ``done.all() or truncated.all()`` while the others used ``truncs.all()``
+  * ``render_one_map`` inferred video filenames via a ``glob`` diff instead of calling ``set_video_suffix``
+  * ``SafeEvaluator`` was using ``hasattr(policy, "hidden_size")`` as a proxy
+    for "is an RNN policy", which is fragile (many non-RNN policies also
+    expose ``hidden_size``). Callers now pass the authoritative ``use_rnn``
+    flag from the training config.
+
+Callers pass a ``RenderContext`` to turn on rendering; pass ``None`` for a pure
+stats rollout.
+"""
+
+from dataclasses import dataclass
+from typing import Optional
+
+import numpy as np
+import torch
+
+import pufferlib.pytorch
+
+
+@dataclass
+class RenderContext:
+    """Enables rendering inside :func:`rollout_loop`.
+
+    Attributes:
+        view_mode: ``RenderView`` enum value passed to ``driver.render``.
+        env_id: which sub-env in the vecenv to record from (default 0).
+        draw_traces: whether the C renderer should overlay trajectory traces.
+        video_suffix: appended to the mp4 filename; applied once before the
+            first render via ``set_video_suffix`` so multi-view rollouts don't
+            collide on output paths.
+    """
+
+    view_mode: int
+    env_id: int = 0
+    draw_traces: bool = True
+    video_suffix: str = ""
+
+
+def rollout_loop(
+    policy,
+    env,
+    device,
+    use_rnn: bool,
+    max_steps: Optional[int] = None,
+    render_ctx: Optional[RenderContext] = None,
+    per_env_logs: bool = False,
+):
+    """Run a single policy rollout in a Drive vecenv.
+
+    Args:
+        policy: the policy to run. Caller is responsible for calling ``.eval()``.
+        env: a ``PufferEnv``-compatible vecenv wrapping one or more Drive sub-envs.
+        device: torch device for observation / state tensors.
+        use_rnn: whether to allocate and carry LSTM hidden state.
+        max_steps: loop iteration cap. Defaults to ``env.driver_env.episode_length``.
+        render_ctx: if set, render the specified env/view every step before
+            sampling actions. Filename suffix is applied via ``set_video_suffix``.
+        per_env_logs: passed through to ``env.step`` so callers can get
+            unaggregated per-env logs instead of the default ``vec_log`` aggregate.
+
+    Returns:
+        The last ``info`` returned by ``env.step``. For stats rollouts this is
+        the aggregated ``vec_log`` dict or per-env logs list; for render
+        rollouts callers typically ignore it.
+    """
+    driver = env.driver_env
+    num_agents = env.observation_space.shape[0]
+
+    # Set (or clear) the video filename suffix before the first render call so
+    # the C binding names the mp4 correctly up front (e.g. {scenario_id}_bev.mp4).
+    # Without this, callers have to glob-diff cwd and rename post hoc. We call
+    # this unconditionally whenever render_ctx is provided — including the
+    # empty-string case — so reused envs can't leak a stale suffix from a
+    # prior rollout into the default-filename path.
+    if render_ctx is not None:
+        driver.set_video_suffix(render_ctx.video_suffix, env_id=render_ctx.env_id)
+
+    obs, _ = env.reset()
+
+    state = {}
+    if use_rnn:
+        state = dict(
+            lstm_h=torch.zeros(num_agents, policy.hidden_size, device=device),
+            lstm_c=torch.zeros(num_agents, policy.hidden_size, device=device),
+        )
+
+    if max_steps is None:
+        max_steps = driver.episode_length
+
+    info = []
+    for _ in range(max_steps):
+        # Render BEFORE the step so each frame shows the state the policy was
+        # conditioned on.
+        if render_ctx is not None:
+            driver.render(
+                view_mode=render_ctx.view_mode,
+                draw_traces=render_ctx.draw_traces,
+                env_id=render_ctx.env_id,
+            )
+
+        with torch.no_grad():
+            ob_t = torch.as_tensor(obs).to(device)
+            logits, _ = policy.forward_eval(ob_t, state)
+            action, _, _ = pufferlib.pytorch.sample_logits(logits)
+            action_np = action.cpu().numpy().reshape(env.action_space.shape)
+
+        # Clip continuous actions to the valid range. This was previously
+        # missing in render_one_map, so continuous policies could emit OOB
+        # actions during offline rendering.
+        if isinstance(logits, torch.distributions.Normal):
+            action_np = np.clip(action_np, env.action_space.low, env.action_space.high)
+
+        # Only pass per_env_logs when the caller explicitly asked for it.
+        # pufferlib.vector.Serial.step == pufferlib.vector.step, whose
+        # signature is (vecenv, actions) with no kwargs — passing per_env_logs
+        # unconditionally would raise TypeError on render_one_map's Serial
+        # backend. The per_env_logs=True path is only exercised by the
+        # Evaluator with a native PufferEnv backend where Drive.step does
+        # accept the kwarg.
+        if per_env_logs:
+            obs, _, _, truncs, info = env.step(action_np, per_env_logs=True)
+        else:
+            obs, _, _, truncs, info = env.step(action_np)
+
+        # truncs.all() is set in the single c_step where the env auto-resets
+        # (time limit or early termination). Breaking here exits the loop
+        # exactly when the current episode wraps.
+        if truncs.all():
+            break
+
+    return info
diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py
index 6aba814b7..e9d84b41f 100644
--- a/pufferlib/pufferl.py
+++ b/pufferlib/pufferl.py
@@ -1626,13 +1626,17 @@ def render(env_name, args=None):
     def render_one_map(map_path):
         """Render a single map file in one or more view modes, moving resulting mp4(s) to output_dir.
 
-        Each view mode runs a separate rollout so that each gets its own ffmpeg
-        pipe and output file.  Output files are named {scenario_id}_{view}.mp4
-        when multiple views are requested, or {scenario_id}.mp4 for a single view.
+        Each view mode runs a separate rollout via the shared
+        :func:`pufferlib.ocean.drive.rollout.rollout_loop` helper, so that each
+        gets its own ffmpeg pipe and output file. Output files are named
+        ``{scenario_id}_{view}.mp4`` for multi-view runs or ``{scenario_id}.mp4``
+        for single-view runs — the C binding writes the correct name directly
+        via ``set_video_suffix``, no post-hoc renaming required.
         """
+        from pufferlib.ocean.drive.rollout import RenderContext, rollout_loop
+
         map_name = os.path.splitext(os.path.basename(map_path))[0]
 
-        # View-mode suffix labels used in output filenames
         _VIEW_SUFFIX = {
             RenderView.FULL_SIM_STATE: "sim_state",
             RenderView.AGENT_PERSP: "persp",
@@ -1649,7 +1653,7 @@ def render_one_map(map_path):
                     **args["env"],
                     "num_maps": 1,
                     "map_dir": tmp_map_dir,
-                    "render_mode": 1,  # headless ffmpeg → writes {scenario_id}.mp4 in cwd
+                    "render_mode": 1,  # headless ffmpeg → writes {scenario_id}[suffix].mp4 in cwd
                 }
                 if render_init_mode is not None:
                     env_overrides["init_mode"] = render_init_mode
@@ -1666,46 +1670,38 @@ def render_one_map(map_path):
                 policy = load_policy(map_args, env, env_name)
                 policy.eval()
 
-                ob, _ = env.reset()
-                driver = env.driver_env
-
-                state = {}
-                if map_args["train"]["use_rnn"]:
-                    n = env.agents_per_batch if hasattr(env, "agents_per_batch") else ob.shape[0]
-                    state = dict(
-                        lstm_h=torch.zeros(n, policy.hidden_size, device=device),
-                        lstm_c=torch.zeros(n, policy.hidden_size, device=device),
-                    )
-
-                # Remove any stale mp4 from a previous run
-                stale = os.path.join(os.getcwd(), f"{map_name}.mp4")
+                # Clean up any stale mp4 from a previous run and snapshot cwd so
+                # we can find the new file(s) created during this rollout.
+                suffix = f"_{_VIEW_SUFFIX[view_mode]}" if multi else ""
+                stale = os.path.join(os.getcwd(), f"{map_name}{suffix}.mp4")
                 if os.path.exists(stale):
                     os.remove(stale)
-
                 before = set(glob.glob(os.path.join(os.getcwd(), "*.mp4")))
 
-                for _ in range(max_frames):
-                    driver.render(view_mode=view_mode, draw_traces=draw_traces)
-
-                    with torch.no_grad():
-                        ob_t = torch.as_tensor(ob).to(device)
-                        logits, _ = policy.forward_eval(ob_t, state)
-                        action, _, _ = pufferlib.pytorch.sample_logits(logits)
-                        action = action.cpu().numpy().reshape(env.action_space.shape)
-
-                    ob, _, done, truncated, _ = env.step(action)
-                    if done.all() or truncated.all():
-                        break
+                rollout_loop(
+                    policy=policy,
+                    env=env,
+                    device=device,
+                    use_rnn=map_args["train"]["use_rnn"],
+                    max_steps=max_frames,
+                    render_ctx=RenderContext(
+                        view_mode=view_mode,
+                        env_id=0,
+                        draw_traces=draw_traces,
+                        video_suffix=suffix,
+                    ),
+                )
 
                 env.close()
 
+                # Move the newly produced mp4(s) to output_dir. With
+                # set_video_suffix, filenames are correct by construction — no
+                # post-hoc renaming needed.
                 after = set(glob.glob(os.path.join(os.getcwd(), "*.mp4")))
                 new_mp4s = after - before
                 if new_mp4s:
                     for src in sorted(new_mp4s):
-                        base = os.path.splitext(os.path.basename(src))[0]
-                        suffix = f"_{_VIEW_SUFFIX[view_mode]}" if multi else ""
-                        dst = os.path.join(output_dir, f"{base}{suffix}.mp4")
+                        dst = os.path.join(output_dir, os.path.basename(src))
                         shutil.move(src, dst)
                         print(f"  Saved {dst}")
                 else: