Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pufferlib/ocean/drive/binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,7 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) {
env->collision_behavior = conf.collision_behavior;
env->offroad_behavior = conf.offroad_behavior;
env->dt = conf.dt;
env->randomize_respawn = (int)unpack(kwargs, "randomize_respawn");
env->init_mode = (int)unpack(kwargs, "init_mode");
env->control_mode = (int)unpack(kwargs, "control_mode");
env->goal_behavior = (int)unpack(kwargs, "goal_behavior");
Expand Down
125 changes: 114 additions & 11 deletions pufferlib/ocean/drive/drive.h
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ struct Drive {
char video_suffix[64]; // Optional suffix appended to mp4 filename (e.g. "_bev")
int collision_behavior;
int offroad_behavior;
int randomize_respawn;
float observation_window_size;
float polyline_reduction_threshold;
float polyline_max_segment_length;
Expand Down Expand Up @@ -2928,16 +2929,105 @@ void compute_observations(Drive *env) {
}
}

// Find a random collision-free position on a drivable lane for an existing agent.
// Returns true if a valid position was found and updates the agent's sim_x/y/z/heading.
static bool randomize_agent_position(Drive *env, int agent_idx) {
Agent *agent = &env->agents[agent_idx];

// Pre-compute drivable lanes
int drivable_lanes[env->num_roads];
float lane_lengths[env->num_roads];
int num_drivable = 0;
float total_lane_length = 0.0f;
for (int i = 0; i < env->num_roads; i++) {
if (env->road_elements[i].type == ROAD_LANE && env->road_elements[i].polyline_length > 0.0f) {
drivable_lanes[num_drivable] = i;
lane_lengths[num_drivable] = env->road_elements[i].polyline_length;
total_lane_length += lane_lengths[num_drivable];
num_drivable++;
}
}

if (num_drivable == 0)
Comment on lines +2937 to +2951
Copy link

Copilot AI Mar 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

randomize_agent_position allocates drivable_lanes/lane_lengths as VLAs sized by env->num_roads on the stack and recomputes drivable lanes each call. Since env->num_roads can be large and this function may run frequently (respawns/resets), this can cause excessive stack usage and unnecessary work. Prefer reusing env->num_drivable, env->drivable_lane_indices, env->drivable_lane_lengths, and env->total_drivable_lane_length computed in compute_drivable_lane_points() (or allocate once on the heap) instead of per-call stack arrays.

Suggested change
// Pre-compute drivable lanes
int drivable_lanes[env->num_roads];
float lane_lengths[env->num_roads];
int num_drivable = 0;
float total_lane_length = 0.0f;
for (int i = 0; i < env->num_roads; i++) {
if (env->road_elements[i].type == ROAD_LANE && env->road_elements[i].polyline_length > 0.0f) {
drivable_lanes[num_drivable] = i;
lane_lengths[num_drivable] = env->road_elements[i].polyline_length;
total_lane_length += lane_lengths[num_drivable];
num_drivable++;
}
}
if (num_drivable == 0)
// Use pre-computed drivable lanes from the environment to avoid per-call VLAs and recomputation.
int num_drivable = env->num_drivable;
float total_lane_length = env->total_drivable_lane_length;
int *drivable_lanes = env->drivable_lane_indices;
float *lane_lengths = env->drivable_lane_lengths;
if (num_drivable <= 0 || total_lane_length <= 0.0f || drivable_lanes == NULL || lane_lengths == NULL)

Copilot uses AI. Check for mistakes.
return false;

for (int attempt = 0; attempt < MAX_SPAWN_ATTEMPTS; attempt++) {
// Length-weighted lane selection
float r = ((float)rand() / (float)RAND_MAX) * total_lane_length;
float cumulative = 0.0f;
int selected = num_drivable - 1;
for (int k = 0; k < num_drivable; k++) {
cumulative += lane_lengths[k];
if (r < cumulative) {
selected = k;
break;
}
}
RoadMapElement *lane = &env->road_elements[drivable_lanes[selected]];

float spawn_x, spawn_y, spawn_z, spawn_heading;
get_random_point_on_lane(lane, &spawn_x, &spawn_y, &spawn_z, &spawn_heading);
spawn_z += agent->sim_height / 2.0f;

// Temporarily invalidate this agent so check_spawn_collision skips it
float saved_x = agent->sim_x;
agent->sim_x = INVALID_POSITION;
bool collision = check_spawn_collision(env, env->active_agent_count, spawn_x, spawn_y, spawn_z, spawn_heading,
agent->sim_length, agent->sim_width, agent->sim_height);
agent->sim_x = saved_x;
if (collision)
continue;

// Check offroad
if (check_spawn_offroad(env, spawn_x, spawn_y, spawn_z, spawn_heading, agent->sim_length, agent->sim_width,
agent->sim_height))
continue;

agent->sim_x = spawn_x;
agent->sim_y = spawn_y;
agent->sim_z = spawn_z;
agent->sim_heading = spawn_heading;
agent->heading_x = cosf(spawn_heading);
agent->heading_y = sinf(spawn_heading);
// Update stored initial position so future non-random resets are consistent
agent->log_trajectory_x[0] = spawn_x;
agent->log_trajectory_y[0] = spawn_y;
agent->log_trajectory_z[0] = spawn_z;
agent->log_heading[0] = spawn_heading;
return true;
}
return false;
}

void respawn_agent(Drive *env, int agent_idx) {
Agent *agent = &env->agents[agent_idx];
agent->sim_x = agent->log_trajectory_x[0];
agent->sim_y = agent->log_trajectory_y[0];
agent->sim_z = agent->log_trajectory_z[0];
agent->sim_heading = agent->log_heading[0];
agent->heading_x = cosf(agent->sim_heading);
agent->heading_y = sinf(agent->sim_heading);
agent->sim_vx = agent->log_velocity_x[0];
agent->sim_vy = agent->log_velocity_y[0];

if (env->randomize_respawn) {
if (!randomize_agent_position(env, agent_idx)) {
// Fallback to original position if no valid spawn found
agent->sim_x = agent->log_trajectory_x[0];
agent->sim_y = agent->log_trajectory_y[0];
agent->sim_z = agent->log_trajectory_z[0];
agent->sim_heading = agent->log_heading[0];
agent->heading_x = cosf(agent->sim_heading);
agent->heading_y = sinf(agent->sim_heading);
}
// Sample a new goal relative to the new position
sample_new_goal(env, agent_idx);
agent->sim_vx = 0.0f;
agent->sim_vy = 0.0f;
agent->sim_speed = 0.0f;
agent->sim_speed_signed = 0.0f;
} else {
agent->sim_x = agent->log_trajectory_x[0];
agent->sim_y = agent->log_trajectory_y[0];
agent->sim_z = agent->log_trajectory_z[0];
agent->sim_heading = agent->log_heading[0];
agent->heading_x = cosf(agent->sim_heading);
agent->heading_y = sinf(agent->sim_heading);
agent->sim_vx = agent->log_velocity_x[0];
agent->sim_vy = agent->log_velocity_y[0];
}
agent->metrics_array[COLLISION_IDX] = 0.0f;
agent->metrics_array[OFFROAD_IDX] = 0.0f;
agent->metrics_array[REACHED_GOAL_IDX] = 0.0f;
Expand Down Expand Up @@ -3203,8 +3293,21 @@ void move_dynamics(Drive *env, int action_idx, int agent_idx) {

void c_reset(Drive *env) {
env->timestep = env->init_steps;
set_start_position(env);
reset_goal_positions(env);
if (env->randomize_respawn) {
// Randomize all agent positions on reset
for (int x = 0; x < env->active_agent_count; x++) {
int agent_idx = env->active_agent_indices[x];
randomize_agent_position(env, agent_idx);
}
Comment on lines +3298 to +3301
Copy link

Copilot AI Mar 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In c_reset() variable-agent mode, the return value of randomize_agent_position() is ignored. If it fails to find a valid spawn, the agent will keep its prior episode position (which could be offroad/colliding), and the subsequent sample_new_goal() will be based on that stale state. Please check the return value and apply a deterministic fallback (e.g., retry with different constraints, respawn to log_trajectory_*[0], or mark/remove the agent) so resets are reliable.

Copilot uses AI. Check for mistakes.
Copy link

Copilot AI Mar 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In c_reset() variable-agent mode, positions are randomized but per-agent dynamics state (e.g., sim_vx/sim_vy, speed, accelerations, steering) is never reset. This means agents can start a new episode with leftover velocities from the previous episode even though their pose changed. Reset the dynamics fields after randomize_agent_position() (similar to respawn_agent()’s variable-mode branch) to avoid carry-over behavior and immediate post-reset collisions.

Suggested change
}
}
// Reset dynamics state after positions have been randomized to avoid carry-over
for (int x = 0; x < env->active_agent_count; x++) {
int agent_idx = env->active_agent_indices[x];
Agent *agent = &env->agents[agent_idx];
agent->sim_vx = 0.0f;
agent->sim_vy = 0.0f;
agent->sim_speed = 0.0f;
agent->sim_ax = 0.0f;
agent->sim_ay = 0.0f;
agent->steering = 0.0f;
}

Copilot uses AI. Check for mistakes.
// Sample new goals relative to new positions
for (int x = 0; x < env->active_agent_count; x++) {
int agent_idx = env->active_agent_indices[x];
sample_new_goal(env, agent_idx);
}
Comment on lines +3302 to +3306
Copy link

Copilot AI Mar 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sample_new_goal() increments agent->goals_sampled_this_episode, but c_reset() later unconditionally sets goals_sampled_this_episode = 1.0f for all agents. In variable-agent mode this means the initial goal sampling done here will be double-counted or discarded depending on ordering. Consider moving goal sampling to after the per-agent reset loop (or resetting goals_sampled_this_episode appropriately before sampling) so the counter reflects the actual number of sampled goals.

Copilot uses AI. Check for mistakes.
} else {
set_start_position(env);
reset_goal_positions(env);
}
for (int x = 0; x < env->active_agent_count; x++) {
env->logs[x] = (Log){0};
int agent_idx = env->active_agent_indices[x];
Expand Down Expand Up @@ -3234,7 +3337,7 @@ void c_reset(Drive *env) {
agent->prev_goal_z = agent->sim_z;
generate_reward_coefs(env, agent);

if (env->goal_behavior == GOAL_GENERATE_NEW) {
if (env->goal_behavior == GOAL_GENERATE_NEW && env->init_mode != INIT_VARIABLE_AGENT_NUMBER) {
agent->goal_position_x = agent->init_goal_x;
agent->goal_position_y = agent->init_goal_y;
agent->goal_position_z = agent->init_goal_z;
Expand Down
7 changes: 7 additions & 0 deletions pufferlib/ocean/drive/drive.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def __init__(
spawn_length_min=2.0,
spawn_length_max=5.5,
spawn_height=1.5,
randomize_respawn=1,
):
# env
self.dt = dt
Expand Down Expand Up @@ -135,6 +136,7 @@ def __init__(
self.episode_length = episode_length
self.termination_mode = termination_mode
self.resample_frequency = resample_frequency
self.randomize_respawn = randomize_respawn
self.dynamics_model = dynamics_model
# reward randomization bounds
self.reward_bound_goal_radius_min = reward_bound_goal_radius_min
Expand Down Expand Up @@ -437,6 +439,7 @@ def __init__(
spawn_length_min=self.spawn_length_min,
spawn_length_max=self.spawn_length_max,
spawn_height=self.spawn_height,
randomize_respawn=self.randomize_respawn,
render_mode=render_mode,
)
self.env_ids.append(env_id)
Expand Down Expand Up @@ -601,6 +604,7 @@ def resample_maps(self):
spawn_length_min=self.spawn_length_min,
spawn_length_max=self.spawn_length_max,
spawn_height=self.spawn_height,
randomize_respawn=self.randomize_respawn,
render_mode=self.render_mode,
)
self.env_ids.append(env_id)
Expand Down Expand Up @@ -852,6 +856,9 @@ def save_map_binary(
sdc_track_index = metadata.get("sdc_track_index", -1) # -1 as default if not found
tracks_to_predict = metadata.get("tracks_to_predict", [])

# Note: C load_map_binary does NOT read a scenario_id prefix.
# Do not write one here or the binary will be misaligned.

# Write sdc_track_index
f.write(struct.pack("i", sdc_track_index))

Expand Down
118 changes: 118 additions & 0 deletions tests/test_randomize_respawn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""Test that randomize_respawn produces different agent positions across resets.

Run on the cluster with:
srun ... python -m pytest tests/test_randomize_respawn.py -v
"""

import numpy as np
import pytest
from pufferlib.ocean.drive.drive import Drive


MAP_DIR = "pufferlib/resources/drive/binaries/carla_data"


def get_agent_positions(env):
"""Extract current agent positions from observations."""
# Ego obs starts at index 0: sim_x, sim_y are the first features
# But obs are in ego frame (normalized). Use the C env directly.
# The simplest proxy: just hash the full observation vector.
return env.observations.copy()


@pytest.fixture
def env_randomize():
e = Drive(
num_agents=8,
num_maps=2,
map_dir=MAP_DIR,
dynamics_model="classic",
min_agents_per_env=1,
max_agents_per_env=8,
init_mode="init_variable_agent_number",
control_mode="control_vehicles",
episode_length=300,
resample_frequency=0,
randomize_respawn=1,
)
e.reset()
yield e
e.close()


@pytest.fixture
def env_no_randomize():
e = Drive(
num_agents=8,
num_maps=2,
map_dir=MAP_DIR,
dynamics_model="classic",
min_agents_per_env=1,
max_agents_per_env=8,
init_mode="init_variable_agent_number",
control_mode="control_vehicles",
episode_length=10,
resample_frequency=0,
randomize_respawn=0,
)
e.reset()
yield e
e.close()


def test_randomize_respawn_produces_different_positions(env_randomize):
"""With randomize_respawn=1, positions should differ after episode reset."""
env = env_randomize
actions = np.zeros(env.action_space.shape, dtype=env.action_space.dtype)

# Get initial observations
obs_before = env.observations.copy()

# Step until episode resets (episode_length=300, or force via resample)
env.resample_maps()
obs_after = env.observations.copy()

# Observations should differ (agents at different positions)
assert not np.allclose(obs_before, obs_after, atol=1e-6), (
"Observations should differ after reset with randomize_respawn=1"
)


def test_no_randomize_same_positions(env_no_randomize):
"""With randomize_respawn=0, positions should be the same after episode reset."""
env = env_no_randomize
actions = np.zeros(env.action_space.shape, dtype=env.action_space.dtype)

# Get initial observations
obs_before = env.observations.copy()

# Step through the full episode to trigger c_reset
for _ in range(15):
env.step(actions)

# After reset, positions should return to initial state
# Note: obs won't be exactly the same due to metrics/counters,
# but the position-related features should match
obs_after = env.observations.copy()

# With no randomization, the first few ego features (position-related)
# should be identical after reset
# ego features: speed, heading components, goal direction, etc.
# After a full reset with no randomization, agents return to log_trajectory[0]
assert np.allclose(obs_before[:, :5], obs_after[:, :5], atol=0.1), (
"Position features should be similar after reset with randomize_respawn=0"
)


def test_multiple_resets_produce_variety(env_randomize):
"""Multiple resets with randomize_respawn should produce different positions each time."""
env = env_randomize
observations = []

for _ in range(5):
env.resample_maps()
observations.append(env.observations[:, :10].copy())

# Check that not all resets produce the same observations
all_same = all(np.allclose(observations[0], obs, atol=1e-6) for obs in observations[1:])
assert not all_same, "5 resets with randomize_respawn should not all produce identical observations"
Loading