Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions pufferlib/config/ocean/drive.ini
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ spawn_height = 1.5
turn_off_normalization = 1
; Options: 0 - Use normalized reward coefs as input to NN, 1 - Dont

; Radius (meters) within which partner agents are included in observations
partner_obs_radius = 100.0
; Normalization scale for partner agent relative positions
partner_obs_norm = 0.02
; Normalization scale for road element relative positions
road_obs_norm = 0.02

; Reward settings
reward_randomization = 1
Expand Down
10 changes: 10 additions & 0 deletions pufferlib/ocean/drive/binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) {
float reward_bound_acc_max = unpack(kwargs, "reward_bound_acc_max");

float min_avg_speed_to_consider_goal_attempt = unpack(kwargs, "min_avg_speed_to_consider_goal_attempt");
float partner_obs_radius = unpack(kwargs, "partner_obs_radius");
float partner_obs_norm = unpack(kwargs, "partner_obs_norm");
float road_obs_norm = unpack(kwargs, "road_obs_norm");

int use_all_maps = unpack(kwargs, "use_all_maps");
int min_agents_per_env = unpack(kwargs, "min_agents_per_env");
Expand Down Expand Up @@ -206,6 +209,9 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) {
env->polyline_reduction_threshold = polyline_reduction_threshold;
env->polyline_max_segment_length = polyline_max_segment_length;
env->min_avg_speed_to_consider_goal_attempt = min_avg_speed_to_consider_goal_attempt;
env->partner_obs_radius = partner_obs_radius;
env->partner_obs_norm = partner_obs_norm;
env->road_obs_norm = road_obs_norm;
// reward randomization bounds
env->reward_bounds[REWARD_COEF_GOAL_RADIUS] =
(RewardBound){reward_bound_goal_radius_min, reward_bound_goal_radius_max};
Expand Down Expand Up @@ -350,6 +356,9 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) {
env->observation_window_size = (float)unpack(kwargs, "observation_window_size");
env->polyline_reduction_threshold = (float)unpack(kwargs, "polyline_reduction_threshold");
env->polyline_max_segment_length = (float)unpack(kwargs, "polyline_max_segment_length");
env->partner_obs_radius = (float)unpack(kwargs, "partner_obs_radius");
env->partner_obs_norm = (float)unpack(kwargs, "partner_obs_norm");
env->road_obs_norm = (float)unpack(kwargs, "road_obs_norm");

// reward randomization bounds
env->reward_bounds[REWARD_COEF_GOAL_RADIUS] = (RewardBound){(float)unpack(kwargs, "reward_bound_goal_radius_min"),
Expand Down Expand Up @@ -434,6 +443,7 @@ static int my_log(PyObject *dict, Log *log) {
assign_to_dict(dict, "avg_speed_per_agent", log->avg_speed_per_agent);
assign_to_dict(dict, "max_observation_distance", log->max_observation_distance);
assign_to_dict(dict, "observation_coverage", log->observation_coverage);
assign_to_dict(dict, "partner_obs_coverage", log->partner_obs_coverage);
// assign_to_dict(dict, "avg_displacement_error", log->avg_displacement_error);
return 0;
}
13 changes: 11 additions & 2 deletions pufferlib/ocean/drive/drive.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#include "libgen.h"
#include "../env_config.h"
#include <string.h>
#include "../env_config.h"

// Use this test if the network changes to ensure that the forward pass
// matches the torch implementation to the 3rd or ideally 4th decimal place
Expand All @@ -21,7 +20,8 @@ void test_drivenet() {

// Weights* weights = load_weights("resources/drive/puffer_drive_weights.bin");
Weights *weights = load_weights("puffer_drive_weights.bin");
DriveNet *net = init_drivenet(weights, num_agents, CLASSIC, 0);
int reward_conditioning = 0;
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what're these changes about?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was just some minor fixes for the demo, I can do it in a separate PR too!

DriveNet *net = init_drivenet(weights, num_agents, CLASSIC, reward_conditioning);

forward(net, observations, actions);
for (int i = 0; i < num_agents * num_actions; i++) {
Expand All @@ -44,6 +44,9 @@ void demo() {
exit(1);
}

// Set different seed each time
srand(time(NULL));

// Note: Use below hardcoded settings for 2.0 demo purposes. Since the policy was
// trained with these exact settings, changing them may lead to
// weird behavior.
Expand Down Expand Up @@ -107,7 +110,13 @@ void demo() {
.spawn_settings = spawn_settings,
.map_name = "resources/drive/binaries/carla_2D/map_001.bin",
.reward_conditioning = conf.reward_conditioning,
.partner_obs_radius = conf.partner_obs_radius,
};

if (conf.init_mode == INIT_VARIABLE_AGENT_NUMBER) {
env.num_agents = conf.min_agents_per_env + rand() % (conf.max_agents_per_env - conf.min_agents_per_env + 1);
}

allocate(&env);
if (env.active_agent_count == 0) {
fprintf(stderr, "Error: No active agents found in map '%s' with init_mode=%d. Cannot run demo.\n", env.map_name,
Expand Down
199 changes: 121 additions & 78 deletions pufferlib/ocean/drive/drive.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,10 @@

// Observation constants
#define MAX_ROAD_SEGMENT_OBSERVATIONS 256
#ifndef MAX_AGENTS // TODO: Needs to be replaced with MAX_PARTNER_OBS(agents in obs_radius) throughout observations code
// and with env->max_agents_in_sim throughout all agent for loops
#ifndef MAX_AGENTS
#define MAX_AGENTS 128
#endif
#define MAX_PARTNER_OBSERVATIONS 32
#define STOP_AGENT 1
#define REMOVE_AGENT 2

Expand Down Expand Up @@ -241,6 +241,7 @@ struct Log {
float avg_speed_per_agent;
float max_observation_distance; // average max observation distance
float observation_coverage; // percentage of entities in obs window seen on average
float partner_obs_coverage; // % of partners within radius that fit in the obs slots
};

typedef struct GridMapEntity GridMapEntity;
Expand All @@ -254,6 +255,11 @@ typedef struct {
float max_val;
} RewardBound;

typedef struct {
int idx;
float dist_sq;
} AgentDistance;

typedef struct GridMap GridMap;
struct GridMap {
float top_left_x;
Expand Down Expand Up @@ -360,6 +366,9 @@ struct Drive {
int turn_off_normalization;
RewardBound reward_bounds[NUM_REWARD_COEFS];
float min_avg_speed_to_consider_goal_attempt;
float partner_obs_radius;
float partner_obs_norm;
float road_obs_norm;
};

// ========================================
Expand Down Expand Up @@ -1666,6 +1675,7 @@ void add_log(Drive *env) {
env->log.lane_center_rate += env->logs[i].lane_center_rate / safe_timestep;
env->log.max_observation_distance += env->logs[i].max_observation_distance / safe_timestep;
env->log.observation_coverage += env->logs[i].observation_coverage / safe_timestep;
env->log.partner_obs_coverage += env->logs[i].partner_obs_coverage / safe_timestep;
env->log.n += 1;
}
}
Expand Down Expand Up @@ -2281,7 +2291,8 @@ void allocate(Drive *env) {
init(env);
int ego_dim = (env->dynamics_model == JERK) ? EGO_FEATURES_JERK : EGO_FEATURES_CLASSIC;
ego_dim = (env->reward_conditioning == 1) ? ego_dim + NUM_REWARD_COEFS : ego_dim;
int max_obs = ego_dim + PARTNER_FEATURES * (MAX_AGENTS - 1) + ROAD_FEATURES * MAX_ROAD_SEGMENT_OBSERVATIONS;
int max_obs =
ego_dim + PARTNER_FEATURES * (MAX_PARTNER_OBSERVATIONS) + ROAD_FEATURES * MAX_ROAD_SEGMENT_OBSERVATIONS;
env->observations = (float *)calloc(env->active_agent_count * max_obs, sizeof(float));
env->actions = (float *)calloc(env->active_agent_count * 2, sizeof(float));
env->rewards = (float *)calloc(env->active_agent_count, sizeof(float));
Expand Down Expand Up @@ -2603,10 +2614,90 @@ void compute_agent_metrics(Drive *env, int agent_idx) {

// void compute_rewards(void){}

float compute_partner_observations(Drive *env, float *obs, int agent_idx, int obs_idx) {

int ego_idx = env->active_agent_indices[agent_idx];
Agent *ego_entity = &env->agents[ego_idx];
int ego_id = ego_entity->id;

// Ego stats
float cos_heading = cosf(ego_entity->sim_heading);
float sin_heading = sinf(ego_entity->sim_heading);
Comment on lines +2624 to +2625
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a little puzzling. The radius is not dependent upon the frame so having any code depend on which way the ego is facing seems odd

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, figured out why they're there but I think code-wise it makes sense for this to be closer to where it's used


float obs_radius = env->partner_obs_radius;
int partners_in_radius = 0;
AgentDistance candidates[MAX_AGENTS];
for (int i = 0; i < env->num_created_agents; i++) {
Agent *partner = &env->agents[i];
if (ego_id == partner->id)
continue;
float dx = partner->sim_x - ego_entity->sim_x;
float dy = partner->sim_y - ego_entity->sim_y;
float dz = partner->sim_z - ego_entity->sim_z;
float dist_sq = dx * dx + dy * dy + dz * dz;
if (dist_sq <= obs_radius * obs_radius) {
candidates[partners_in_radius].idx = i;
candidates[partners_in_radius].dist_sq = dist_sq;
partners_in_radius++;
}
}

// Partial selection sort: pick nearest min(partners_in_radius, MAX_PARTNER_OBSERVATIONS)
int cars_seen = (partners_in_radius < MAX_PARTNER_OBSERVATIONS) ? partners_in_radius : MAX_PARTNER_OBSERVATIONS;
for (int k = 0; k < cars_seen; k++) {
int min_idx = k;
for (int j = k + 1; j < partners_in_radius; j++) {
if (candidates[j].dist_sq < candidates[min_idx].dist_sq)
min_idx = j;
}
if (min_idx != k) {
AgentDistance tmp = candidates[k];
candidates[k] = candidates[min_idx];
candidates[min_idx] = tmp;
}
}

// Write observations for nearest cars_seen agents
for (int k = 0; k < cars_seen; k++) {
Agent *partner = &env->agents[candidates[k].idx];
float dx = partner->sim_x - ego_entity->sim_x;
float dy = partner->sim_y - ego_entity->sim_y;
float dz = partner->sim_z - ego_entity->sim_z;
float rel_x = dx * cos_heading + dy * sin_heading;
float rel_y = -dx * sin_heading + dy * cos_heading;
obs[obs_idx] = rel_x * env->partner_obs_norm;
obs[obs_idx + 1] = rel_y * env->partner_obs_norm;
obs[obs_idx + 2] = dz * env->partner_obs_norm;
obs[obs_idx + 3] = partner->sim_width / MAX_VEH_WIDTH;
obs[obs_idx + 4] = partner->sim_length / MAX_VEH_LEN;
float other_cos = cosf(partner->sim_heading);
float other_sin = sinf(partner->sim_heading);
obs[obs_idx + 5] = other_cos * cos_heading + other_sin * sin_heading;
obs[obs_idx + 6] = other_sin * cos_heading - other_cos * sin_heading;
Comment on lines +2675 to +2676
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not for now but we really should pull all the transformer code into one place so we can't mess it up anywhere

float rel_vx = partner->sim_vx - ego_entity->sim_vx;
float rel_vy = partner->sim_vy - ego_entity->sim_vy;
float rel_speed_magnitude = sqrtf(rel_vx * rel_vx + rel_vy * rel_vy);
float rel_v_dot_heading = rel_vx * other_cos + rel_vy * other_sin;
obs[obs_idx + 7] = copysignf(rel_speed_magnitude, rel_v_dot_heading) / MAX_SPEED;
obs_idx += 8;
}

// Pad remaining partner obs with zero
int remaining_partner_obs = (MAX_PARTNER_OBSERVATIONS - cars_seen) * 8;
Comment on lines +2682 to +2686
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

compute_partner_observations() hard-codes partner feature width as 8 in several places (obs_idx += 8, (…)* 8). This will silently break if PARTNER_FEATURES changes. Prefer using PARTNER_FEATURES consistently for indexing/padding to keep observation layout self-consistent.

Suggested change
obs_idx += 8;
}
// Pad remaining partner obs with zero
int remaining_partner_obs = (MAX_PARTNER_OBSERVATIONS - cars_seen) * 8;
obs_idx += PARTNER_FEATURES;
}
// Pad remaining partner obs with zero
int remaining_partner_obs = (MAX_PARTNER_OBSERVATIONS - cars_seen) * PARTNER_FEATURES;

Copilot uses AI. Check for mistakes.
memset(&obs[obs_idx], 0, remaining_partner_obs * sizeof(float));

// Return coverage: fraction of partners within radius that fit in obs slots
if (partners_in_radius == 0) {
return 100.0f;
}
return ((float)cars_seen / (float)partners_in_radius) * 100.0f;
}

void compute_observations(Drive *env) {
int ego_dim = (env->dynamics_model == JERK) ? EGO_FEATURES_JERK : EGO_FEATURES_CLASSIC;
ego_dim = (env->reward_conditioning == 1) ? ego_dim + NUM_REWARD_COEFS : ego_dim;
int max_obs = ego_dim + PARTNER_FEATURES * (MAX_AGENTS - 1) + ROAD_FEATURES * MAX_ROAD_SEGMENT_OBSERVATIONS;
int max_obs =
ego_dim + PARTNER_FEATURES * (MAX_PARTNER_OBSERVATIONS) + ROAD_FEATURES * MAX_ROAD_SEGMENT_OBSERVATIONS;
memset(env->observations, 0, max_obs * env->active_agent_count * sizeof(float));
float (*observations)[max_obs] = (float (*)[max_obs])env->observations;
for (int i = 0; i < env->active_agent_count; i++) {
Expand Down Expand Up @@ -2678,70 +2769,14 @@ void compute_observations(Drive *env) {
}
}
}
//
// Relative Pos of other cars

int cars_seen = 0;
for (int j = 0; j < MAX_AGENTS; j++) {
int index = -1;
if (j < env->active_agent_count) {
index = env->active_agent_indices[j];
} else if (j < env->num_created_agents && env->static_agent_count > 0) {
index = env->static_agent_indices[j - env->active_agent_count];
}
if (index == -1)
continue;
if (env->agents[index].type > CYCLIST)
break;
if (index == env->active_agent_indices[i])
continue; // Skip self, but don't increment obs_idx
Agent *other_entity = &env->agents[index];
if (ego_entity->respawn_timestep != -1)
continue;
if (other_entity->respawn_timestep != -1)
continue;
// Store original relative positions
float dx = other_entity->sim_x - ego_entity->sim_x;
float dy = other_entity->sim_y - ego_entity->sim_y;
float dz = other_entity->sim_z - ego_entity->sim_z;
float dist = (dx * dx + dy * dy + dz * dz);
if (dist > 2500.0f)
continue;
// Rotate to ego vehicle's frame
float rel_x = dx * cos_heading + dy * sin_heading;
float rel_y = -dx * sin_heading + dy * cos_heading;
float rel_z = dz; // No rotation needed for vertical component
// Store observations with correct indexing
obs[obs_idx] = rel_x * 0.02f;
obs[obs_idx + 1] = rel_y * 0.02f;
obs[obs_idx + 2] = rel_z * 0.02f;
obs[obs_idx + 3] = other_entity->sim_width / MAX_VEH_WIDTH;
obs[obs_idx + 4] = other_entity->sim_length / MAX_VEH_LEN;
// relative heading
float other_cos = cosf(other_entity->sim_heading);
float other_sin = sinf(other_entity->sim_heading);
float rel_heading_x =
other_cos * cos_heading + other_sin * sin_heading; // cos(a-b) = cos(a)cos(b) + sin(a)sin(b)
float rel_heading_y =
other_sin * cos_heading - other_cos * sin_heading; // sin(a-b) = sin(a)cos(b) - cos(a)sin(b)

obs[obs_idx + 5] = rel_heading_x;
obs[obs_idx + 6] = rel_heading_y;
// relative speed
float rel_vx = other_entity->sim_vx - ego_entity->sim_vx;
float rel_vy = other_entity->sim_vy - ego_entity->sim_vy;
float rel_speed_magnitude = sqrtf(rel_vx * rel_vx + rel_vy * rel_vy);
float rel_v_dot_heading = rel_vx * other_cos + rel_vy * other_sin;
float rel_signed_speed = copysignf(rel_speed_magnitude, rel_v_dot_heading);
obs[obs_idx + 7] = rel_signed_speed / MAX_SPEED;
cars_seen++;
obs_idx += 8; // Move to next observation slot
}
int remaining_partner_obs = (MAX_AGENTS - 1 - cars_seen) * 8;
memset(&obs[obs_idx], 0, remaining_partner_obs * sizeof(float));
obs_idx += remaining_partner_obs;

// Map observations
// Partner vehicle observations
float coverage = compute_partner_observations(env, obs, i, obs_idx);
env->logs[i].partner_obs_coverage += coverage;

obs_idx += MAX_PARTNER_OBSERVATIONS * PARTNER_FEATURES;
Comment on lines +2774 to +2777
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

love it


// map observations
GridMapEntity entity_list[MAX_ROAD_SEGMENT_OBSERVATIONS];
int grid_idx = getGridIndex(env, ego_entity->sim_x, ego_entity->sim_y);
float max_observation_distance = 0.0f;
Expand Down Expand Up @@ -2802,9 +2837,9 @@ void compute_observations(Drive *env) {
// Compute sin and cos of relative angle directly without atan2f
float cos_angle = dx_norm * cos_heading + dy_norm * sin_heading;
float sin_angle = -dx_norm * sin_heading + dy_norm * cos_heading;
obs[obs_idx] = x_obs * 0.02f;
obs[obs_idx + 1] = y_obs * 0.02f;
obs[obs_idx + 2] = z_obs * 0.02f;
obs[obs_idx] = x_obs * env->road_obs_norm;
obs[obs_idx + 1] = y_obs * env->road_obs_norm;
obs[obs_idx + 2] = z_obs * env->road_obs_norm;
obs[obs_idx + 3] = length / MAX_ROAD_SEGMENT_LENGTH;
obs[obs_idx + 4] = width / MAX_ROAD_SCALE;
obs[obs_idx + 5] = cos_angle;
Expand Down Expand Up @@ -3567,7 +3602,8 @@ void draw_agent_obs(Drive *env, int agent_index, int mode, int obs_only, int las

int ego_dim = (env->dynamics_model == JERK) ? EGO_FEATURES_JERK : EGO_FEATURES_CLASSIC;
ego_dim = (env->reward_conditioning == 1) ? ego_dim + NUM_REWARD_COEFS : ego_dim;
int max_obs = ego_dim + PARTNER_FEATURES * (MAX_AGENTS - 1) + ROAD_FEATURES * MAX_ROAD_SEGMENT_OBSERVATIONS;
int max_obs =
ego_dim + PARTNER_FEATURES * (MAX_PARTNER_OBSERVATIONS) + ROAD_FEATURES * MAX_ROAD_SEGMENT_OBSERVATIONS;
float (*observations)[max_obs] = (float (*)[max_obs])env->observations;
float *agent_obs = &observations[agent_index][0];
// self
Expand Down Expand Up @@ -3599,7 +3635,7 @@ void draw_agent_obs(Drive *env, int agent_index, int mode, int obs_only, int las
}
// First draw other agent observations
int obs_idx = ego_dim; // Start after ego obs
for (int j = 0; j < MAX_AGENTS - 1; j++) {
for (int j = 0; j < MAX_PARTNER_OBSERVATIONS; j++) {
if (agent_obs[obs_idx] == 0 || agent_obs[obs_idx + 1] == 0) {
obs_idx += 8; // Move to next agent observation
continue;
Expand Down Expand Up @@ -3721,8 +3757,8 @@ void draw_agent_obs(Drive *env, int agent_index, int mode, int obs_only, int las
obs_idx += PARTNER_FEATURES; // Move to next agent observation (8 values per agent)
}
// Then draw map observations
int map_start_idx = ego_dim + PARTNER_FEATURES * (MAX_AGENTS - 1); // Start after agent observations
for (int k = 0; k < MAX_ROAD_SEGMENT_OBSERVATIONS; k++) { // Loop through potential map entities
int map_start_idx = ego_dim + PARTNER_FEATURES * (MAX_PARTNER_OBSERVATIONS); // Start after agent observations
for (int k = 0; k < MAX_ROAD_SEGMENT_OBSERVATIONS; k++) { // Loop through potential map entities
int entity_idx = map_start_idx + k * 8;
if (agent_obs[entity_idx] == 0 && agent_obs[entity_idx + 1] == 0) {
continue;
Expand Down Expand Up @@ -3960,10 +3996,17 @@ void draw_scene(Drive *env, Client *client, int mode, int obs_only, int lasers,
Vector3 model_size = {bounds.max.x - bounds.min.x, bounds.max.y - bounds.min.y,
bounds.max.z - bounds.min.z};
Vector3 scale = {size.x / model_size.x, size.y / model_size.y, size.z / model_size.z};
// if((obs_only || IsKeyDown(KEY_LEFT_CONTROL)) && agent_index != env->human_agent_idx){
// rlPopMatrix();
// continue;
// }
if ((obs_only || IsKeyDown(KEY_LEFT_CONTROL)) && agent_index != env->human_agent_idx) {
int ego_active_idx = env->active_agent_indices[env->human_agent_idx];
Agent *ego = &env->agents[ego_active_idx];
float dist = relative_distance_3d(ego->sim_x, ego->sim_y, ego->sim_z, agent->sim_x, agent->sim_y,
agent->sim_z);
float obs_radius = env->partner_obs_radius;
if (dist > obs_radius) {
rlPopMatrix();
continue;
}
}
if (agent->type == CYCLIST) {
scale = (Vector3){0.01, 0.01, 0.01};
car_model = client->cyclist;
Expand Down
Loading
Loading