diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index 77e474f86..24b917b5a 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -77,6 +77,12 @@ spawn_height = 1.5 turn_off_normalization = 1 ; Options: 0 - Use normalized reward coefs as input to NN, 1 - Dont +; Radius (meters) within which partner agents are included in observations +partner_obs_radius = 100.0 +; Normalization scale for partner agent relative positions +partner_obs_norm = 0.02 +; Normalization scale for road element relative positions +road_obs_norm = 0.02 ; Reward settings reward_randomization = 1 diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c index f178fedfc..6e87547bc 100644 --- a/pufferlib/ocean/drive/binding.c +++ b/pufferlib/ocean/drive/binding.c @@ -129,6 +129,9 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { float reward_bound_acc_max = unpack(kwargs, "reward_bound_acc_max"); float min_avg_speed_to_consider_goal_attempt = unpack(kwargs, "min_avg_speed_to_consider_goal_attempt"); + float partner_obs_radius = unpack(kwargs, "partner_obs_radius"); + float partner_obs_norm = unpack(kwargs, "partner_obs_norm"); + float road_obs_norm = unpack(kwargs, "road_obs_norm"); int use_all_maps = unpack(kwargs, "use_all_maps"); int min_agents_per_env = unpack(kwargs, "min_agents_per_env"); @@ -206,6 +209,9 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { env->polyline_reduction_threshold = polyline_reduction_threshold; env->polyline_max_segment_length = polyline_max_segment_length; env->min_avg_speed_to_consider_goal_attempt = min_avg_speed_to_consider_goal_attempt; + env->partner_obs_radius = partner_obs_radius; + env->partner_obs_norm = partner_obs_norm; + env->road_obs_norm = road_obs_norm; // reward randomization bounds env->reward_bounds[REWARD_COEF_GOAL_RADIUS] = (RewardBound){reward_bound_goal_radius_min, reward_bound_goal_radius_max}; @@ -350,6 +356,9 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) { env->observation_window_size = (float)unpack(kwargs, "observation_window_size"); env->polyline_reduction_threshold = (float)unpack(kwargs, "polyline_reduction_threshold"); env->polyline_max_segment_length = (float)unpack(kwargs, "polyline_max_segment_length"); + env->partner_obs_radius = (float)unpack(kwargs, "partner_obs_radius"); + env->partner_obs_norm = (float)unpack(kwargs, "partner_obs_norm"); + env->road_obs_norm = (float)unpack(kwargs, "road_obs_norm"); // reward randomization bounds env->reward_bounds[REWARD_COEF_GOAL_RADIUS] = (RewardBound){(float)unpack(kwargs, "reward_bound_goal_radius_min"), @@ -434,6 +443,7 @@ static int my_log(PyObject *dict, Log *log) { assign_to_dict(dict, "avg_speed_per_agent", log->avg_speed_per_agent); assign_to_dict(dict, "max_observation_distance", log->max_observation_distance); assign_to_dict(dict, "observation_coverage", log->observation_coverage); + assign_to_dict(dict, "partner_obs_coverage", log->partner_obs_coverage); // assign_to_dict(dict, "avg_displacement_error", log->avg_displacement_error); return 0; } diff --git a/pufferlib/ocean/drive/drive.c b/pufferlib/ocean/drive/drive.c index 0fd36d6e7..f3c1cd66b 100644 --- a/pufferlib/ocean/drive/drive.c +++ b/pufferlib/ocean/drive/drive.c @@ -3,7 +3,6 @@ #include "libgen.h" #include "../env_config.h" #include -#include "../env_config.h" // Use this test if the network changes to ensure that the forward pass // matches the torch implementation to the 3rd or ideally 4th decimal place @@ -21,7 +20,8 @@ void test_drivenet() { // Weights* weights = load_weights("resources/drive/puffer_drive_weights.bin"); Weights *weights = load_weights("puffer_drive_weights.bin"); - DriveNet *net = init_drivenet(weights, num_agents, CLASSIC, 0); + int reward_conditioning = 0; + DriveNet *net = init_drivenet(weights, num_agents, CLASSIC, reward_conditioning); forward(net, observations, actions); for (int i = 0; i < num_agents * num_actions; i++) { @@ -44,6 +44,9 @@ void demo() { exit(1); } + // Set different seed each time + srand(time(NULL)); + // Note: Use below hardcoded settings for 2.0 demo purposes. Since the policy was // trained with these exact settings, changing them may lead to // weird behavior. @@ -107,7 +110,13 @@ void demo() { .spawn_settings = spawn_settings, .map_name = "resources/drive/binaries/carla_2D/map_001.bin", .reward_conditioning = conf.reward_conditioning, + .partner_obs_radius = conf.partner_obs_radius, }; + + if (conf.init_mode == INIT_VARIABLE_AGENT_NUMBER) { + env.num_agents = conf.min_agents_per_env + rand() % (conf.max_agents_per_env - conf.min_agents_per_env + 1); + } + allocate(&env); if (env.active_agent_count == 0) { fprintf(stderr, "Error: No active agents found in map '%s' with init_mode=%d. Cannot run demo.\n", env.map_name, diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 650330cfd..a0cd05f9b 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -104,10 +104,10 @@ // Observation constants #define MAX_ROAD_SEGMENT_OBSERVATIONS 256 -#ifndef MAX_AGENTS // TODO: Needs to be replaced with MAX_PARTNER_OBS(agents in obs_radius) throughout observations code - // and with env->max_agents_in_sim throughout all agent for loops +#ifndef MAX_AGENTS #define MAX_AGENTS 128 #endif +#define MAX_PARTNER_OBSERVATIONS 32 #define STOP_AGENT 1 #define REMOVE_AGENT 2 @@ -241,6 +241,7 @@ struct Log { float avg_speed_per_agent; float max_observation_distance; // average max observation distance float observation_coverage; // percentage of entities in obs window seen on average + float partner_obs_coverage; // % of partners within radius that fit in the obs slots }; typedef struct GridMapEntity GridMapEntity; @@ -254,6 +255,11 @@ typedef struct { float max_val; } RewardBound; +typedef struct { + int idx; + float dist_sq; +} AgentDistance; + typedef struct GridMap GridMap; struct GridMap { float top_left_x; @@ -360,6 +366,9 @@ struct Drive { int turn_off_normalization; RewardBound reward_bounds[NUM_REWARD_COEFS]; float min_avg_speed_to_consider_goal_attempt; + float partner_obs_radius; + float partner_obs_norm; + float road_obs_norm; }; // ======================================== @@ -1666,6 +1675,7 @@ void add_log(Drive *env) { env->log.lane_center_rate += env->logs[i].lane_center_rate / safe_timestep; env->log.max_observation_distance += env->logs[i].max_observation_distance / safe_timestep; env->log.observation_coverage += env->logs[i].observation_coverage / safe_timestep; + env->log.partner_obs_coverage += env->logs[i].partner_obs_coverage / safe_timestep; env->log.n += 1; } } @@ -2281,7 +2291,8 @@ void allocate(Drive *env) { init(env); int ego_dim = (env->dynamics_model == JERK) ? EGO_FEATURES_JERK : EGO_FEATURES_CLASSIC; ego_dim = (env->reward_conditioning == 1) ? ego_dim + NUM_REWARD_COEFS : ego_dim; - int max_obs = ego_dim + PARTNER_FEATURES * (MAX_AGENTS - 1) + ROAD_FEATURES * MAX_ROAD_SEGMENT_OBSERVATIONS; + int max_obs = + ego_dim + PARTNER_FEATURES * (MAX_PARTNER_OBSERVATIONS) + ROAD_FEATURES * MAX_ROAD_SEGMENT_OBSERVATIONS; env->observations = (float *)calloc(env->active_agent_count * max_obs, sizeof(float)); env->actions = (float *)calloc(env->active_agent_count * 2, sizeof(float)); env->rewards = (float *)calloc(env->active_agent_count, sizeof(float)); @@ -2603,10 +2614,90 @@ void compute_agent_metrics(Drive *env, int agent_idx) { // void compute_rewards(void){} +float compute_partner_observations(Drive *env, float *obs, int agent_idx, int obs_idx) { + + int ego_idx = env->active_agent_indices[agent_idx]; + Agent *ego_entity = &env->agents[ego_idx]; + int ego_id = ego_entity->id; + + // Ego stats + float cos_heading = cosf(ego_entity->sim_heading); + float sin_heading = sinf(ego_entity->sim_heading); + + float obs_radius = env->partner_obs_radius; + int partners_in_radius = 0; + AgentDistance candidates[MAX_AGENTS]; + for (int i = 0; i < env->num_created_agents; i++) { + Agent *partner = &env->agents[i]; + if (ego_id == partner->id) + continue; + float dx = partner->sim_x - ego_entity->sim_x; + float dy = partner->sim_y - ego_entity->sim_y; + float dz = partner->sim_z - ego_entity->sim_z; + float dist_sq = dx * dx + dy * dy + dz * dz; + if (dist_sq <= obs_radius * obs_radius) { + candidates[partners_in_radius].idx = i; + candidates[partners_in_radius].dist_sq = dist_sq; + partners_in_radius++; + } + } + + // Partial selection sort: pick nearest min(partners_in_radius, MAX_PARTNER_OBSERVATIONS) + int cars_seen = (partners_in_radius < MAX_PARTNER_OBSERVATIONS) ? partners_in_radius : MAX_PARTNER_OBSERVATIONS; + for (int k = 0; k < cars_seen; k++) { + int min_idx = k; + for (int j = k + 1; j < partners_in_radius; j++) { + if (candidates[j].dist_sq < candidates[min_idx].dist_sq) + min_idx = j; + } + if (min_idx != k) { + AgentDistance tmp = candidates[k]; + candidates[k] = candidates[min_idx]; + candidates[min_idx] = tmp; + } + } + + // Write observations for nearest cars_seen agents + for (int k = 0; k < cars_seen; k++) { + Agent *partner = &env->agents[candidates[k].idx]; + float dx = partner->sim_x - ego_entity->sim_x; + float dy = partner->sim_y - ego_entity->sim_y; + float dz = partner->sim_z - ego_entity->sim_z; + float rel_x = dx * cos_heading + dy * sin_heading; + float rel_y = -dx * sin_heading + dy * cos_heading; + obs[obs_idx] = rel_x * env->partner_obs_norm; + obs[obs_idx + 1] = rel_y * env->partner_obs_norm; + obs[obs_idx + 2] = dz * env->partner_obs_norm; + obs[obs_idx + 3] = partner->sim_width / MAX_VEH_WIDTH; + obs[obs_idx + 4] = partner->sim_length / MAX_VEH_LEN; + float other_cos = cosf(partner->sim_heading); + float other_sin = sinf(partner->sim_heading); + obs[obs_idx + 5] = other_cos * cos_heading + other_sin * sin_heading; + obs[obs_idx + 6] = other_sin * cos_heading - other_cos * sin_heading; + float rel_vx = partner->sim_vx - ego_entity->sim_vx; + float rel_vy = partner->sim_vy - ego_entity->sim_vy; + float rel_speed_magnitude = sqrtf(rel_vx * rel_vx + rel_vy * rel_vy); + float rel_v_dot_heading = rel_vx * other_cos + rel_vy * other_sin; + obs[obs_idx + 7] = copysignf(rel_speed_magnitude, rel_v_dot_heading) / MAX_SPEED; + obs_idx += 8; + } + + // Pad remaining partner obs with zero + int remaining_partner_obs = (MAX_PARTNER_OBSERVATIONS - cars_seen) * 8; + memset(&obs[obs_idx], 0, remaining_partner_obs * sizeof(float)); + + // Return coverage: fraction of partners within radius that fit in obs slots + if (partners_in_radius == 0) { + return 100.0f; + } + return ((float)cars_seen / (float)partners_in_radius) * 100.0f; +} + void compute_observations(Drive *env) { int ego_dim = (env->dynamics_model == JERK) ? EGO_FEATURES_JERK : EGO_FEATURES_CLASSIC; ego_dim = (env->reward_conditioning == 1) ? ego_dim + NUM_REWARD_COEFS : ego_dim; - int max_obs = ego_dim + PARTNER_FEATURES * (MAX_AGENTS - 1) + ROAD_FEATURES * MAX_ROAD_SEGMENT_OBSERVATIONS; + int max_obs = + ego_dim + PARTNER_FEATURES * (MAX_PARTNER_OBSERVATIONS) + ROAD_FEATURES * MAX_ROAD_SEGMENT_OBSERVATIONS; memset(env->observations, 0, max_obs * env->active_agent_count * sizeof(float)); float (*observations)[max_obs] = (float (*)[max_obs])env->observations; for (int i = 0; i < env->active_agent_count; i++) { @@ -2678,70 +2769,14 @@ void compute_observations(Drive *env) { } } } - // - // Relative Pos of other cars - - int cars_seen = 0; - for (int j = 0; j < MAX_AGENTS; j++) { - int index = -1; - if (j < env->active_agent_count) { - index = env->active_agent_indices[j]; - } else if (j < env->num_created_agents && env->static_agent_count > 0) { - index = env->static_agent_indices[j - env->active_agent_count]; - } - if (index == -1) - continue; - if (env->agents[index].type > CYCLIST) - break; - if (index == env->active_agent_indices[i]) - continue; // Skip self, but don't increment obs_idx - Agent *other_entity = &env->agents[index]; - if (ego_entity->respawn_timestep != -1) - continue; - if (other_entity->respawn_timestep != -1) - continue; - // Store original relative positions - float dx = other_entity->sim_x - ego_entity->sim_x; - float dy = other_entity->sim_y - ego_entity->sim_y; - float dz = other_entity->sim_z - ego_entity->sim_z; - float dist = (dx * dx + dy * dy + dz * dz); - if (dist > 2500.0f) - continue; - // Rotate to ego vehicle's frame - float rel_x = dx * cos_heading + dy * sin_heading; - float rel_y = -dx * sin_heading + dy * cos_heading; - float rel_z = dz; // No rotation needed for vertical component - // Store observations with correct indexing - obs[obs_idx] = rel_x * 0.02f; - obs[obs_idx + 1] = rel_y * 0.02f; - obs[obs_idx + 2] = rel_z * 0.02f; - obs[obs_idx + 3] = other_entity->sim_width / MAX_VEH_WIDTH; - obs[obs_idx + 4] = other_entity->sim_length / MAX_VEH_LEN; - // relative heading - float other_cos = cosf(other_entity->sim_heading); - float other_sin = sinf(other_entity->sim_heading); - float rel_heading_x = - other_cos * cos_heading + other_sin * sin_heading; // cos(a-b) = cos(a)cos(b) + sin(a)sin(b) - float rel_heading_y = - other_sin * cos_heading - other_cos * sin_heading; // sin(a-b) = sin(a)cos(b) - cos(a)sin(b) - - obs[obs_idx + 5] = rel_heading_x; - obs[obs_idx + 6] = rel_heading_y; - // relative speed - float rel_vx = other_entity->sim_vx - ego_entity->sim_vx; - float rel_vy = other_entity->sim_vy - ego_entity->sim_vy; - float rel_speed_magnitude = sqrtf(rel_vx * rel_vx + rel_vy * rel_vy); - float rel_v_dot_heading = rel_vx * other_cos + rel_vy * other_sin; - float rel_signed_speed = copysignf(rel_speed_magnitude, rel_v_dot_heading); - obs[obs_idx + 7] = rel_signed_speed / MAX_SPEED; - cars_seen++; - obs_idx += 8; // Move to next observation slot - } - int remaining_partner_obs = (MAX_AGENTS - 1 - cars_seen) * 8; - memset(&obs[obs_idx], 0, remaining_partner_obs * sizeof(float)); - obs_idx += remaining_partner_obs; - // Map observations + // Partner vehicle observations + float coverage = compute_partner_observations(env, obs, i, obs_idx); + env->logs[i].partner_obs_coverage += coverage; + + obs_idx += MAX_PARTNER_OBSERVATIONS * PARTNER_FEATURES; + + // map observations GridMapEntity entity_list[MAX_ROAD_SEGMENT_OBSERVATIONS]; int grid_idx = getGridIndex(env, ego_entity->sim_x, ego_entity->sim_y); float max_observation_distance = 0.0f; @@ -2802,9 +2837,9 @@ void compute_observations(Drive *env) { // Compute sin and cos of relative angle directly without atan2f float cos_angle = dx_norm * cos_heading + dy_norm * sin_heading; float sin_angle = -dx_norm * sin_heading + dy_norm * cos_heading; - obs[obs_idx] = x_obs * 0.02f; - obs[obs_idx + 1] = y_obs * 0.02f; - obs[obs_idx + 2] = z_obs * 0.02f; + obs[obs_idx] = x_obs * env->road_obs_norm; + obs[obs_idx + 1] = y_obs * env->road_obs_norm; + obs[obs_idx + 2] = z_obs * env->road_obs_norm; obs[obs_idx + 3] = length / MAX_ROAD_SEGMENT_LENGTH; obs[obs_idx + 4] = width / MAX_ROAD_SCALE; obs[obs_idx + 5] = cos_angle; @@ -3567,7 +3602,8 @@ void draw_agent_obs(Drive *env, int agent_index, int mode, int obs_only, int las int ego_dim = (env->dynamics_model == JERK) ? EGO_FEATURES_JERK : EGO_FEATURES_CLASSIC; ego_dim = (env->reward_conditioning == 1) ? ego_dim + NUM_REWARD_COEFS : ego_dim; - int max_obs = ego_dim + PARTNER_FEATURES * (MAX_AGENTS - 1) + ROAD_FEATURES * MAX_ROAD_SEGMENT_OBSERVATIONS; + int max_obs = + ego_dim + PARTNER_FEATURES * (MAX_PARTNER_OBSERVATIONS) + ROAD_FEATURES * MAX_ROAD_SEGMENT_OBSERVATIONS; float (*observations)[max_obs] = (float (*)[max_obs])env->observations; float *agent_obs = &observations[agent_index][0]; // self @@ -3599,7 +3635,7 @@ void draw_agent_obs(Drive *env, int agent_index, int mode, int obs_only, int las } // First draw other agent observations int obs_idx = ego_dim; // Start after ego obs - for (int j = 0; j < MAX_AGENTS - 1; j++) { + for (int j = 0; j < MAX_PARTNER_OBSERVATIONS; j++) { if (agent_obs[obs_idx] == 0 || agent_obs[obs_idx + 1] == 0) { obs_idx += 8; // Move to next agent observation continue; @@ -3721,8 +3757,8 @@ void draw_agent_obs(Drive *env, int agent_index, int mode, int obs_only, int las obs_idx += PARTNER_FEATURES; // Move to next agent observation (8 values per agent) } // Then draw map observations - int map_start_idx = ego_dim + PARTNER_FEATURES * (MAX_AGENTS - 1); // Start after agent observations - for (int k = 0; k < MAX_ROAD_SEGMENT_OBSERVATIONS; k++) { // Loop through potential map entities + int map_start_idx = ego_dim + PARTNER_FEATURES * (MAX_PARTNER_OBSERVATIONS); // Start after agent observations + for (int k = 0; k < MAX_ROAD_SEGMENT_OBSERVATIONS; k++) { // Loop through potential map entities int entity_idx = map_start_idx + k * 8; if (agent_obs[entity_idx] == 0 && agent_obs[entity_idx + 1] == 0) { continue; @@ -3960,10 +3996,17 @@ void draw_scene(Drive *env, Client *client, int mode, int obs_only, int lasers, Vector3 model_size = {bounds.max.x - bounds.min.x, bounds.max.y - bounds.min.y, bounds.max.z - bounds.min.z}; Vector3 scale = {size.x / model_size.x, size.y / model_size.y, size.z / model_size.z}; - // if((obs_only || IsKeyDown(KEY_LEFT_CONTROL)) && agent_index != env->human_agent_idx){ - // rlPopMatrix(); - // continue; - // } + if ((obs_only || IsKeyDown(KEY_LEFT_CONTROL)) && agent_index != env->human_agent_idx) { + int ego_active_idx = env->active_agent_indices[env->human_agent_idx]; + Agent *ego = &env->agents[ego_active_idx]; + float dist = relative_distance_3d(ego->sim_x, ego->sim_y, ego->sim_z, agent->sim_x, agent->sim_y, + agent->sim_z); + float obs_radius = env->partner_obs_radius; + if (dist > obs_radius) { + rlPopMatrix(); + continue; + } + } if (agent->type == CYCLIST) { scale = (Vector3){0.01, 0.01, 0.01}; car_model = client->cyclist; diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py index ad090f80a..7e9324692 100644 --- a/pufferlib/ocean/drive/drive.py +++ b/pufferlib/ocean/drive/drive.py @@ -87,6 +87,9 @@ def __init__( reward_bound_acc_min=0.666, reward_bound_acc_max=1.5, min_avg_speed_to_consider_goal_attempt=2.0, + partner_obs_radius=50.0, + partner_obs_norm=0.02, + road_obs_norm=0.02, # spawn settings min_agents_per_env=32, max_agents_per_env=64, @@ -160,6 +163,11 @@ def __init__( self.reward_bound_acc_min = reward_bound_acc_min self.reward_bound_acc_max = reward_bound_acc_max self.min_avg_speed_to_consider_goal_attempt = min_avg_speed_to_consider_goal_attempt + self.partner_obs_radius = partner_obs_radius + if self.partner_obs_radius <= 0.0: + raise ValueError(f"partner_obs_radius must be > 0. Got: {self.partner_obs_radius}") + self.partner_obs_norm = partner_obs_norm + self.road_obs_norm = road_obs_norm # Observation space calculation if self.reward_conditioning: @@ -175,7 +183,7 @@ def __init__( # Extract observation shapes from constants # These need to be defined in C, since they determine the shape of the arrays self.max_road_objects = binding.MAX_ROAD_SEGMENT_OBSERVATIONS - self.max_partner_objects = binding.MAX_AGENTS - 1 + self.max_partner_objects = binding.MAX_PARTNER_OBSERVATIONS self.partner_features = binding.PARTNER_FEATURES self.road_features = binding.ROAD_FEATURES @@ -324,6 +332,9 @@ def __init__( reward_bound_acc_min=self.reward_bound_acc_min, reward_bound_acc_max=self.reward_bound_acc_max, min_avg_speed_to_consider_goal_attempt=self.min_avg_speed_to_consider_goal_attempt, + partner_obs_radius=self.partner_obs_radius, + partner_obs_norm=self.partner_obs_norm, + road_obs_norm=self.road_obs_norm, use_all_maps=self.use_all_maps, min_agents_per_env=self.min_agents_per_env, max_agents_per_env=self.max_agents_per_env, @@ -396,6 +407,9 @@ def __init__( reward_bound_acc_min=self.reward_bound_acc_min, reward_bound_acc_max=self.reward_bound_acc_max, min_avg_speed_to_consider_goal_attempt=self.min_avg_speed_to_consider_goal_attempt, + partner_obs_radius=self.partner_obs_radius, + partner_obs_norm=self.partner_obs_norm, + road_obs_norm=self.road_obs_norm, collision_behavior=self.collision_behavior, offroad_behavior=self.offroad_behavior, observation_window_size=self.observation_window_size, @@ -484,6 +498,9 @@ def resample_maps(self): reward_bound_acc_min=self.reward_bound_acc_min, reward_bound_acc_max=self.reward_bound_acc_max, min_avg_speed_to_consider_goal_attempt=self.min_avg_speed_to_consider_goal_attempt, + partner_obs_radius=self.partner_obs_radius, + partner_obs_norm=self.partner_obs_norm, + road_obs_norm=self.road_obs_norm, use_all_maps=self.use_all_maps, min_agents_per_env=self.min_agents_per_env, max_agents_per_env=self.max_agents_per_env, @@ -554,6 +571,9 @@ def resample_maps(self): reward_bound_acc_min=self.reward_bound_acc_min, reward_bound_acc_max=self.reward_bound_acc_max, min_avg_speed_to_consider_goal_attempt=self.min_avg_speed_to_consider_goal_attempt, + partner_obs_radius=self.partner_obs_radius, + partner_obs_norm=self.partner_obs_norm, + road_obs_norm=self.road_obs_norm, collision_behavior=self.collision_behavior, offroad_behavior=self.offroad_behavior, observation_window_size=self.observation_window_size, @@ -601,11 +621,6 @@ def _validate_agent_dimensions(self): raise ValueError( f"max_agents_per_env ({self.max_agents_per_env}) must be >= min_agents_per_env ({self.min_agents_per_env})" ) - if self.max_agents_per_env > binding.MAX_AGENTS: - # TODO: Check needs to be removed once MAX_PARTNER_OBS deprecates MAX_AGENTS - raise ValueError( - f"max_agents_per_env ({self.max_agents_per_env}) cannot exceed MAX_AGENTS ({binding.MAX_AGENTS}) defined in C code." - ) if self.spawn_width_min < 0.0: raise ValueError(f"spawn_width_min ({self.spawn_width_min}) must be non-negative") if self.spawn_width_max < self.spawn_width_min: diff --git a/pufferlib/ocean/drive/drivenet.h b/pufferlib/ocean/drive/drivenet.h index 50b6a987a..162c7d230 100644 --- a/pufferlib/ocean/drive/drivenet.h +++ b/pufferlib/ocean/drive/drivenet.h @@ -53,7 +53,7 @@ DriveNet *init_drivenet(Weights *weights, int num_agents, int dynamics_model, in if (reward_conditioning) { ego_dim += NUM_REWARD_COEFS; // Add 16 conditioning features } - int max_partners = MAX_AGENTS - 1; + int max_partners = MAX_PARTNER_OBSERVATIONS; int max_road_obs = MAX_ROAD_SEGMENT_OBSERVATIONS; int partner_features = PARTNER_FEATURES; int road_features = ROAD_FEATURES; @@ -146,7 +146,7 @@ void free_drivenet(DriveNet *net) { void forward(DriveNet *net, float *observations, int *actions) { int ego_dim = net->ego_dim; - int max_partners = MAX_AGENTS - 1; + int max_partners = MAX_PARTNER_OBSERVATIONS; int max_road_obs = MAX_ROAD_SEGMENT_OBSERVATIONS; int partner_features = PARTNER_FEATURES; int road_features = ROAD_FEATURES; diff --git a/pufferlib/ocean/drive/visualize.c b/pufferlib/ocean/drive/visualize.c index 6e2bd719c..22f270636 100644 --- a/pufferlib/ocean/drive/visualize.c +++ b/pufferlib/ocean/drive/visualize.c @@ -288,6 +288,9 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o }, .spawn_settings = spawn_settings, .map_name = (char *)map_name, + .partner_obs_radius = conf.partner_obs_radius, + .partner_obs_norm = conf.partner_obs_norm, + .road_obs_norm = conf.road_obs_norm, }; if (conf.init_mode == INIT_VARIABLE_AGENT_NUMBER) { diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h index 1cb1c6f95..44eeacb40 100644 --- a/pufferlib/ocean/env_binding.h +++ b/pufferlib/ocean/env_binding.h @@ -1016,7 +1016,7 @@ PyMODINIT_FUNC PyInit_binding(void) { // Make constants accessible from Python PyModule_AddIntConstant(m, "MAX_ROAD_SEGMENT_OBSERVATIONS", MAX_ROAD_SEGMENT_OBSERVATIONS); - PyModule_AddIntConstant(m, "MAX_AGENTS", MAX_AGENTS); + PyModule_AddIntConstant(m, "MAX_PARTNER_OBSERVATIONS", MAX_PARTNER_OBSERVATIONS); PyModule_AddIntConstant(m, "TRAJECTORY_LENGTH", TRAJECTORY_LENGTH); PyModule_AddIntConstant(m, "ROAD_FEATURES", ROAD_FEATURES); diff --git a/pufferlib/ocean/env_config.h b/pufferlib/ocean/env_config.h index af38e4014..8105a40e9 100644 --- a/pufferlib/ocean/env_config.h +++ b/pufferlib/ocean/env_config.h @@ -82,6 +82,9 @@ typedef struct { float spawn_height; char map_dir[256]; float min_avg_speed_to_consider_goal_attempt; + float partner_obs_radius; + float partner_obs_norm; + float road_obs_norm; } env_init_config; // INI file parser handler - parses all environment configuration from drive.ini @@ -258,6 +261,12 @@ static int handler(void *config, const char *section, const char *name, const ch env_config->num_maps = atoi(value); } else if (MATCH("env", "min_avg_speed_to_consider_goal_attempt")) { env_config->min_avg_speed_to_consider_goal_attempt = atof(value); + } else if (MATCH("env", "partner_obs_radius")) { + env_config->partner_obs_radius = atof(value); + } else if (MATCH("env", "partner_obs_norm")) { + env_config->partner_obs_norm = atof(value); + } else if (MATCH("env", "road_obs_norm")) { + env_config->road_obs_norm = atof(value); } else if (MATCH("env", "min_agents_per_env")) { env_config->min_agents_per_env = atoi(value); } else if (MATCH("env", "max_agents_per_env")) {