Emerge-Lab · mpragnay · Apr 6, 2026 · Mar 23, 2026 · Mar 20, 2026 · Mar 23, 2026
diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
@@ -77,6 +77,12 @@ spawn_height = 1.5
 turn_off_normalization = 1
 ; Options: 0 - Use normalized reward coefs as input to NN, 1 - Dont
 
+; Radius (meters) within which partner agents are included in observations
+partner_obs_radius = 100.0
+; Normalization scale for partner agent relative positions
+partner_obs_norm = 0.02
+; Normalization scale for road element relative positions
+road_obs_norm = 0.02
 
 ; Reward settings
 reward_randomization = 1

diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c
@@ -129,6 +129,9 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) {
     float reward_bound_acc_max = unpack(kwargs, "reward_bound_acc_max");
 
     float min_avg_speed_to_consider_goal_attempt = unpack(kwargs, "min_avg_speed_to_consider_goal_attempt");
+    float partner_obs_radius = unpack(kwargs, "partner_obs_radius");
+    float partner_obs_norm = unpack(kwargs, "partner_obs_norm");
+    float road_obs_norm = unpack(kwargs, "road_obs_norm");
 
     int use_all_maps = unpack(kwargs, "use_all_maps");
     int min_agents_per_env = unpack(kwargs, "min_agents_per_env");
@@ -206,6 +209,9 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) {
         env->polyline_reduction_threshold = polyline_reduction_threshold;
         env->polyline_max_segment_length = polyline_max_segment_length;
         env->min_avg_speed_to_consider_goal_attempt = min_avg_speed_to_consider_goal_attempt;
+        env->partner_obs_radius = partner_obs_radius;
+        env->partner_obs_norm = partner_obs_norm;
+        env->road_obs_norm = road_obs_norm;
         // reward randomization bounds
         env->reward_bounds[REWARD_COEF_GOAL_RADIUS] =
             (RewardBound){reward_bound_goal_radius_min, reward_bound_goal_radius_max};
@@ -350,6 +356,9 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) {
     env->observation_window_size = (float)unpack(kwargs, "observation_window_size");
     env->polyline_reduction_threshold = (float)unpack(kwargs, "polyline_reduction_threshold");
     env->polyline_max_segment_length = (float)unpack(kwargs, "polyline_max_segment_length");
+    env->partner_obs_radius = (float)unpack(kwargs, "partner_obs_radius");
+    env->partner_obs_norm = (float)unpack(kwargs, "partner_obs_norm");
+    env->road_obs_norm = (float)unpack(kwargs, "road_obs_norm");
 
     // reward randomization bounds
     env->reward_bounds[REWARD_COEF_GOAL_RADIUS] = (RewardBound){(float)unpack(kwargs, "reward_bound_goal_radius_min"),
@@ -434,6 +443,7 @@ static int my_log(PyObject *dict, Log *log) {
     assign_to_dict(dict, "avg_speed_per_agent", log->avg_speed_per_agent);
     assign_to_dict(dict, "max_observation_distance", log->max_observation_distance);
     assign_to_dict(dict, "observation_coverage", log->observation_coverage);
+    assign_to_dict(dict, "partner_obs_coverage", log->partner_obs_coverage);
     // assign_to_dict(dict, "avg_displacement_error", log->avg_displacement_error);
     return 0;
 }
diff --git a/pufferlib/ocean/drive/drive.c b/pufferlib/ocean/drive/drive.c
@@ -3,7 +3,6 @@
 #include "libgen.h"
 #include "../env_config.h"
 #include <string.h>
-#include "../env_config.h"
 
 // Use this test if the network changes to ensure that the forward pass
 // matches the torch implementation to the 3rd or ideally 4th decimal place
@@ -21,7 +20,8 @@ void test_drivenet() {
 
     // Weights* weights = load_weights("resources/drive/puffer_drive_weights.bin");
     Weights *weights = load_weights("puffer_drive_weights.bin");
-    DriveNet *net = init_drivenet(weights, num_agents, CLASSIC, 0);
+    int reward_conditioning = 0;
+    DriveNet *net = init_drivenet(weights, num_agents, CLASSIC, reward_conditioning);
 
     forward(net, observations, actions);
     for (int i = 0; i < num_agents * num_actions; i++) {
@@ -44,6 +44,9 @@ void demo() {
         exit(1);
     }
 
+    // Set different seed each time
+    srand(time(NULL));
+
     // Note: Use below hardcoded settings for 2.0 demo purposes. Since the policy was
     // trained with these exact settings, changing them may lead to
     // weird behavior.
@@ -107,7 +110,13 @@ void demo() {
         .spawn_settings = spawn_settings,
         .map_name = "resources/drive/binaries/carla_2D/map_001.bin",
         .reward_conditioning = conf.reward_conditioning,
+        .partner_obs_radius = conf.partner_obs_radius,
     };
+
+    if (conf.init_mode == INIT_VARIABLE_AGENT_NUMBER) {
+        env.num_agents = conf.min_agents_per_env + rand() % (conf.max_agents_per_env - conf.min_agents_per_env + 1);
+    }
+
     allocate(&env);
     if (env.active_agent_count == 0) {
         fprintf(stderr, "Error: No active agents found in map '%s' with init_mode=%d. Cannot run demo.\n", env.map_name,

diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
@@ -104,10 +104,10 @@
 
 // Observation constants
 #define MAX_ROAD_SEGMENT_OBSERVATIONS 256
-#ifndef MAX_AGENTS // TODO: Needs to be replaced with MAX_PARTNER_OBS(agents in obs_radius) throughout observations code
-                   // and with env->max_agents_in_sim throughout all agent for loops
+#ifndef MAX_AGENTS
 #define MAX_AGENTS 128
 #endif
+#define MAX_PARTNER_OBSERVATIONS 32
 #define STOP_AGENT 1
 #define REMOVE_AGENT 2
 
@@ -241,6 +241,7 @@ struct Log {
     float avg_speed_per_agent;
     float max_observation_distance; // average max observation distance
     float observation_coverage;     // percentage of entities in obs window seen on average
+    float partner_obs_coverage;     // % of partners within radius that fit in the obs slots
 };
 
 typedef struct GridMapEntity GridMapEntity;
@@ -254,6 +255,11 @@ typedef struct {
     float max_val;
 } RewardBound;
 
+typedef struct {
+    int idx;
+    float dist_sq;
+} AgentDistance;
+
 typedef struct GridMap GridMap;
 struct GridMap {
     float top_left_x;
@@ -360,6 +366,9 @@ struct Drive {
     int turn_off_normalization;
     RewardBound reward_bounds[NUM_REWARD_COEFS];
     float min_avg_speed_to_consider_goal_attempt;
+    float partner_obs_radius;
+    float partner_obs_norm;
+    float road_obs_norm;
 };
 
 // ========================================
@@ -1666,6 +1675,7 @@ void add_log(Drive *env) {
         env->log.lane_center_rate += env->logs[i].lane_center_rate / safe_timestep;
         env->log.max_observation_distance += env->logs[i].max_observation_distance / safe_timestep;
         env->log.observation_coverage += env->logs[i].observation_coverage / safe_timestep;
+        env->log.partner_obs_coverage += env->logs[i].partner_obs_coverage / safe_timestep;
         env->log.n += 1;
     }
 }
@@ -2281,7 +2291,8 @@ void allocate(Drive *env) {
     init(env);
     int ego_dim = (env->dynamics_model == JERK) ? EGO_FEATURES_JERK : EGO_FEATURES_CLASSIC;
     ego_dim = (env->reward_conditioning == 1) ? ego_dim + NUM_REWARD_COEFS : ego_dim;
-    int max_obs = ego_dim + PARTNER_FEATURES * (MAX_AGENTS - 1) + ROAD_FEATURES * MAX_ROAD_SEGMENT_OBSERVATIONS;
+    int max_obs =
+        ego_dim + PARTNER_FEATURES * (MAX_PARTNER_OBSERVATIONS) + ROAD_FEATURES * MAX_ROAD_SEGMENT_OBSERVATIONS;
     env->observations = (float *)calloc(env->active_agent_count * max_obs, sizeof(float));
     env->actions = (float *)calloc(env->active_agent_count * 2, sizeof(float));
     env->rewards = (float *)calloc(env->active_agent_count, sizeof(float));
@@ -2603,10 +2614,90 @@ void compute_agent_metrics(Drive *env, int agent_idx) {
 
 // void compute_rewards(void){}
 
+float compute_partner_observations(Drive *env, float *obs, int agent_idx, int obs_idx) {
+
+    int ego_idx = env->active_agent_indices[agent_idx];
+    Agent *ego_entity = &env->agents[ego_idx];
+    int ego_id = ego_entity->id;
+
+    // Ego stats
+    float cos_heading = cosf(ego_entity->sim_heading);
+    float sin_heading = sinf(ego_entity->sim_heading);
+
+    float obs_radius = env->partner_obs_radius;
+    int partners_in_radius = 0;
+    AgentDistance candidates[MAX_AGENTS];
+    for (int i = 0; i < env->num_created_agents; i++) {
+        Agent *partner = &env->agents[i];
+        if (ego_id == partner->id)
+            continue;
+        float dx = partner->sim_x - ego_entity->sim_x;
+        float dy = partner->sim_y - ego_entity->sim_y;
+        float dz = partner->sim_z - ego_entity->sim_z;
+        float dist_sq = dx * dx + dy * dy + dz * dz;
+        if (dist_sq <= obs_radius * obs_radius) {
+            candidates[partners_in_radius].idx = i;
+            candidates[partners_in_radius].dist_sq = dist_sq;
+            partners_in_radius++;
+        }
+    }
+
+    // Partial selection sort: pick nearest min(partners_in_radius, MAX_PARTNER_OBSERVATIONS)
+    int cars_seen = (partners_in_radius < MAX_PARTNER_OBSERVATIONS) ? partners_in_radius : MAX_PARTNER_OBSERVATIONS;
+    for (int k = 0; k < cars_seen; k++) {
+        int min_idx = k;
+        for (int j = k + 1; j < partners_in_radius; j++) {
+            if (candidates[j].dist_sq < candidates[min_idx].dist_sq)
+                min_idx = j;
+        }
+        if (min_idx != k) {
+            AgentDistance tmp = candidates[k];
+            candidates[k] = candidates[min_idx];
+            candidates[min_idx] = tmp;
+        }
+    }
+
+    // Write observations for nearest cars_seen agents
+    for (int k = 0; k < cars_seen; k++) {
+        Agent *partner = &env->agents[candidates[k].idx];
+        float dx = partner->sim_x - ego_entity->sim_x;
+        float dy = partner->sim_y - ego_entity->sim_y;
+        float dz = partner->sim_z - ego_entity->sim_z;
+        float rel_x = dx * cos_heading + dy * sin_heading;
+        float rel_y = -dx * sin_heading + dy * cos_heading;
+        obs[obs_idx] = rel_x * env->partner_obs_norm;
+        obs[obs_idx + 1] = rel_y * env->partner_obs_norm;
+        obs[obs_idx + 2] = dz * env->partner_obs_norm;
+        obs[obs_idx + 3] = partner->sim_width / MAX_VEH_WIDTH;
+        obs[obs_idx + 4] = partner->sim_length / MAX_VEH_LEN;
+        float other_cos = cosf(partner->sim_heading);
+        float other_sin = sinf(partner->sim_heading);
+        obs[obs_idx + 5] = other_cos * cos_heading + other_sin * sin_heading;
+        obs[obs_idx + 6] = other_sin * cos_heading - other_cos * sin_heading;
+        float rel_vx = partner->sim_vx - ego_entity->sim_vx;
+        float rel_vy = partner->sim_vy - ego_entity->sim_vy;
+        float rel_speed_magnitude = sqrtf(rel_vx * rel_vx + rel_vy * rel_vy);
+        float rel_v_dot_heading = rel_vx * other_cos + rel_vy * other_sin;
+        obs[obs_idx + 7] = copysignf(rel_speed_magnitude, rel_v_dot_heading) / MAX_SPEED;
+        obs_idx += 8;
+    }
+
+    // Pad remaining partner obs with zero
+    int remaining_partner_obs = (MAX_PARTNER_OBSERVATIONS - cars_seen) * 8;
-        obs_idx += 8;
-    }
-
-    // Pad remaining partner obs with zero
-    int remaining_partner_obs = (MAX_PARTNER_OBSERVATIONS - cars_seen) * 8;
+        obs_idx += PARTNER_FEATURES;
+    }
+
+    // Pad remaining partner obs with zero
+    int remaining_partner_obs = (MAX_PARTNER_OBSERVATIONS - cars_seen) * PARTNER_FEATURES;
-        obs_idx += 8;
-    }
-
-    // Pad remaining partner obs with zero
-    int remaining_partner_obs = (MAX_PARTNER_OBSERVATIONS - cars_seen) * 8;
+        obs_idx += PARTNER_FEATURES;
+    }
+
+    // Pad remaining partner obs with zero
+    int remaining_partner_obs = (MAX_PARTNER_OBSERVATIONS - cars_seen) * PARTNER_FEATURES;
+    memset(&obs[obs_idx], 0, remaining_partner_obs * sizeof(float));
+
+    // Return coverage: fraction of partners within radius that fit in obs slots
+    if (partners_in_radius == 0) {
+        return 100.0f;
+    }
+    return ((float)cars_seen / (float)partners_in_radius) * 100.0f;
+}
+
 void compute_observations(Drive *env) {
     int ego_dim = (env->dynamics_model == JERK) ? EGO_FEATURES_JERK : EGO_FEATURES_CLASSIC;
     ego_dim = (env->reward_conditioning == 1) ? ego_dim + NUM_REWARD_COEFS : ego_dim;
-    int max_obs = ego_dim + PARTNER_FEATURES * (MAX_AGENTS - 1) + ROAD_FEATURES * MAX_ROAD_SEGMENT_OBSERVATIONS;
+    int max_obs =
+        ego_dim + PARTNER_FEATURES * (MAX_PARTNER_OBSERVATIONS) + ROAD_FEATURES * MAX_ROAD_SEGMENT_OBSERVATIONS;
     memset(env->observations, 0, max_obs * env->active_agent_count * sizeof(float));
     float (*observations)[max_obs] = (float (*)[max_obs])env->observations;
     for (int i = 0; i < env->active_agent_count; i++) {
@@ -2678,70 +2769,14 @@ void compute_observations(Drive *env) {
                 }
             }
         }
-        //
-        //  Relative Pos of other cars
-
-        int cars_seen = 0;
-        for (int j = 0; j < MAX_AGENTS; j++) {
-            int index = -1;
-            if (j < env->active_agent_count) {
-                index = env->active_agent_indices[j];
-            } else if (j < env->num_created_agents && env->static_agent_count > 0) {
-                index = env->static_agent_indices[j - env->active_agent_count];
-            }
-            if (index == -1)
-                continue;
-            if (env->agents[index].type > CYCLIST)
-                break;
-            if (index == env->active_agent_indices[i])
-                continue; // Skip self, but don't increment obs_idx
-            Agent *other_entity = &env->agents[index];
-            if (ego_entity->respawn_timestep != -1)
-                continue;
-            if (other_entity->respawn_timestep != -1)
-                continue;
-            // Store original relative positions
-            float dx = other_entity->sim_x - ego_entity->sim_x;
-            float dy = other_entity->sim_y - ego_entity->sim_y;
-            float dz = other_entity->sim_z - ego_entity->sim_z;
-            float dist = (dx * dx + dy * dy + dz * dz);
-            if (dist > 2500.0f)
-                continue;
-            // Rotate to ego vehicle's frame
-            float rel_x = dx * cos_heading + dy * sin_heading;
-            float rel_y = -dx * sin_heading + dy * cos_heading;
-            float rel_z = dz; // No rotation needed for vertical component
-            // Store observations with correct indexing
-            obs[obs_idx] = rel_x * 0.02f;
-            obs[obs_idx + 1] = rel_y * 0.02f;
-            obs[obs_idx + 2] = rel_z * 0.02f;
-            obs[obs_idx + 3] = other_entity->sim_width / MAX_VEH_WIDTH;
-            obs[obs_idx + 4] = other_entity->sim_length / MAX_VEH_LEN;
-            // relative heading
-            float other_cos = cosf(other_entity->sim_heading);
-            float other_sin = sinf(other_entity->sim_heading);
-            float rel_heading_x =
-                other_cos * cos_heading + other_sin * sin_heading; // cos(a-b) = cos(a)cos(b) + sin(a)sin(b)
-            float rel_heading_y =
-                other_sin * cos_heading - other_cos * sin_heading; // sin(a-b) = sin(a)cos(b) - cos(a)sin(b)
-
-            obs[obs_idx + 5] = rel_heading_x;
-            obs[obs_idx + 6] = rel_heading_y;
-            // relative speed
-            float rel_vx = other_entity->sim_vx - ego_entity->sim_vx;
-            float rel_vy = other_entity->sim_vy - ego_entity->sim_vy;
-            float rel_speed_magnitude = sqrtf(rel_vx * rel_vx + rel_vy * rel_vy);
-            float rel_v_dot_heading = rel_vx * other_cos + rel_vy * other_sin;
-            float rel_signed_speed = copysignf(rel_speed_magnitude, rel_v_dot_heading);
-            obs[obs_idx + 7] = rel_signed_speed / MAX_SPEED;
-            cars_seen++;
-            obs_idx += 8; // Move to next observation slot
-        }
-        int remaining_partner_obs = (MAX_AGENTS - 1 - cars_seen) * 8;
-        memset(&obs[obs_idx], 0, remaining_partner_obs * sizeof(float));
-        obs_idx += remaining_partner_obs;
 
-        // Map observations
+        // Partner vehicle observations
+        float coverage = compute_partner_observations(env, obs, i, obs_idx);
+        env->logs[i].partner_obs_coverage += coverage;
+
+        obs_idx += MAX_PARTNER_OBSERVATIONS * PARTNER_FEATURES;
+
+        // map observations
         GridMapEntity entity_list[MAX_ROAD_SEGMENT_OBSERVATIONS];
         int grid_idx = getGridIndex(env, ego_entity->sim_x, ego_entity->sim_y);
         float max_observation_distance = 0.0f;
@@ -2802,9 +2837,9 @@ void compute_observations(Drive *env) {
             // Compute sin and cos of relative angle directly without atan2f
             float cos_angle = dx_norm * cos_heading + dy_norm * sin_heading;
             float sin_angle = -dx_norm * sin_heading + dy_norm * cos_heading;
-            obs[obs_idx] = x_obs * 0.02f;
-            obs[obs_idx + 1] = y_obs * 0.02f;
-            obs[obs_idx + 2] = z_obs * 0.02f;
+            obs[obs_idx] = x_obs * env->road_obs_norm;
+            obs[obs_idx + 1] = y_obs * env->road_obs_norm;
+            obs[obs_idx + 2] = z_obs * env->road_obs_norm;
             obs[obs_idx + 3] = length / MAX_ROAD_SEGMENT_LENGTH;
             obs[obs_idx + 4] = width / MAX_ROAD_SCALE;
             obs[obs_idx + 5] = cos_angle;
@@ -3567,7 +3602,8 @@ void draw_agent_obs(Drive *env, int agent_index, int mode, int obs_only, int las
 
     int ego_dim = (env->dynamics_model == JERK) ? EGO_FEATURES_JERK : EGO_FEATURES_CLASSIC;
     ego_dim = (env->reward_conditioning == 1) ? ego_dim + NUM_REWARD_COEFS : ego_dim;
-    int max_obs = ego_dim + PARTNER_FEATURES * (MAX_AGENTS - 1) + ROAD_FEATURES * MAX_ROAD_SEGMENT_OBSERVATIONS;
+    int max_obs =
+        ego_dim + PARTNER_FEATURES * (MAX_PARTNER_OBSERVATIONS) + ROAD_FEATURES * MAX_ROAD_SEGMENT_OBSERVATIONS;
     float (*observations)[max_obs] = (float (*)[max_obs])env->observations;
     float *agent_obs = &observations[agent_index][0];
     // self
@@ -3599,7 +3635,7 @@ void draw_agent_obs(Drive *env, int agent_index, int mode, int obs_only, int las
     }
     // First draw other agent observations
     int obs_idx = ego_dim; // Start after ego obs
-    for (int j = 0; j < MAX_AGENTS - 1; j++) {
+    for (int j = 0; j < MAX_PARTNER_OBSERVATIONS; j++) {
         if (agent_obs[obs_idx] == 0 || agent_obs[obs_idx + 1] == 0) {
             obs_idx += 8; // Move to next agent observation
             continue;
@@ -3721,8 +3757,8 @@ void draw_agent_obs(Drive *env, int agent_index, int mode, int obs_only, int las
         obs_idx += PARTNER_FEATURES; // Move to next agent observation (8 values per agent)
     }
     // Then draw map observations
-    int map_start_idx = ego_dim + PARTNER_FEATURES * (MAX_AGENTS - 1); // Start after agent observations
-    for (int k = 0; k < MAX_ROAD_SEGMENT_OBSERVATIONS; k++) {          // Loop through potential map entities
+    int map_start_idx = ego_dim + PARTNER_FEATURES * (MAX_PARTNER_OBSERVATIONS); // Start after agent observations
+    for (int k = 0; k < MAX_ROAD_SEGMENT_OBSERVATIONS; k++) {                    // Loop through potential map entities
         int entity_idx = map_start_idx + k * 8;
         if (agent_obs[entity_idx] == 0 && agent_obs[entity_idx + 1] == 0) {
             continue;
@@ -3960,10 +3996,17 @@ void draw_scene(Drive *env, Client *client, int mode, int obs_only, int lasers,
                 Vector3 model_size = {bounds.max.x - bounds.min.x, bounds.max.y - bounds.min.y,
                                       bounds.max.z - bounds.min.z};
                 Vector3 scale = {size.x / model_size.x, size.y / model_size.y, size.z / model_size.z};
-                // if((obs_only ||  IsKeyDown(KEY_LEFT_CONTROL)) && agent_index != env->human_agent_idx){
-                //     rlPopMatrix();
-                //     continue;
-                // }
+                if ((obs_only || IsKeyDown(KEY_LEFT_CONTROL)) && agent_index != env->human_agent_idx) {
+                    int ego_active_idx = env->active_agent_indices[env->human_agent_idx];
+                    Agent *ego = &env->agents[ego_active_idx];
+                    float dist = relative_distance_3d(ego->sim_x, ego->sim_y, ego->sim_z, agent->sim_x, agent->sim_y,
+                                                      agent->sim_z);
+                    float obs_radius = env->partner_obs_radius;
+                    if (dist > obs_radius) {
+                        rlPopMatrix();
+                        continue;
+                    }
+                }
                 if (agent->type == CYCLIST) {
                     scale = (Vector3){0.01, 0.01, 0.01};
                     car_model = client->cyclist;