Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
8c154bb
Making changes
Aditya-Gupta26 Mar 19, 2026
bf8e27e
Updating drive.h
Aditya-Gupta26 Mar 20, 2026
4517e7f
Further render related updates
Aditya-Gupta26 Mar 24, 2026
33b4c93
Fixing bug with top-down render
Aditya-Gupta26 Mar 25, 2026
07e64b8
Updating over latest 2.0 first
Aditya-Gupta26 Mar 25, 2026
7be0267
Rebased over latest 3.0 with some minor additional changes
Aditya-Gupta26 Mar 26, 2026
615fcde
Adding some render options
Aditya-Gupta26 Mar 26, 2026
54f2792
Adding some render options
Aditya-Gupta26 Mar 26, 2026
e01df36
Adding full safe_eval functionality
Aditya-Gupta26 Mar 28, 2026
ee80a1d
Further bug fixes
Aditya-Gupta26 Mar 28, 2026
2099537
Cleaning up some commented dead code
Aditya-Gupta26 Mar 28, 2026
ab8adf0
Fixing bugs based on PR comments
Aditya-Gupta26 Mar 30, 2026
e61ccec
Update pufferlib/pufferl.py
eugenevinitsky Mar 30, 2026
da25579
Some bug fixes
Aditya-Gupta26 Apr 5, 2026
04f9304
Updating over latest 3.0 and some minor bug fixes
Aditya-Gupta26 Apr 5, 2026
293f9d2
Adding safe eval renders
Aditya-Gupta26 Apr 7, 2026
90361f5
Fixing some episoden length related bugs
Aditya-Gupta26 Apr 7, 2026
eb2c530
Fixing bugs with human replay with variable agent mode
Aditya-Gupta26 Apr 7, 2026
be85a6f
Update drive.ini
eugenevinitsky Apr 11, 2026
849be1f
Update drive.ini
eugenevinitsky Apr 11, 2026
f418fc1
Update drive.h
eugenevinitsky Apr 11, 2026
face788
Update drive.ini
eugenevinitsky Apr 11, 2026
02d8ecf
Unify rollout loop across Evaluator, render(), and SafeEvaluator (#395)
eugenevinitsky Apr 11, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 44 additions & 43 deletions pufferlib/config/ocean/drive.ini
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ spawn_length_min = 2.0
spawn_length_max = 5.5
spawn_height = 1.5

; Render mode options:
; 0:"window" = pop-up raylib window (original)
; 1:"headless" = off-screen; frames piped to ffmpeg (recommended for training)
render_mode = 1

turn_off_normalization = 1
; Options: 0 - Use normalized reward coefs as input to NN, 1 - Dont

Expand Down Expand Up @@ -171,28 +176,35 @@ vf_clip_coef = 0.1999999999999999
vf_coef = 2
vtrace_c_clip = 1
vtrace_rho_clip = 1
checkpoint_interval = 250
; Rendering options
render = True
render_async = True # Render in background process to avoid blocking training
render_interval = 1000
; If True, show exactly what the agent sees in agent observation
obs_only = True
; Show grid lines
show_grid = True
; Draws lines from ego agent observed ORUs and road elements to show detection range
show_lasers = False
; Display human xy logs in the background
show_human_logs = False
; If True, zoom in on a part of the map. Otherwise, show full map
zoom_in = True
; Options: List[str to path], str to path (e.g., "resources/drive/training/binaries/map_001.bin"), None
render_map = none
checkpoint_interval = 1000

[eval]
eval_interval = 1000
; Path to dataset used for evaluation
map_dir = "resources/drive/binaries/training"
; Map directory for self-play evaluation (Carla maps)
sp_map_dir = "resources/drive/binaries/carla_2D"
; Map directory for human replay evaluation (WOMD training scenarios)
hr_map_dir = "resources/drive/binaries/training"

; Number of agents for self-play evaluation
num_eval_agents = 64
; Number of agents for human replay evaluation (one SDC per scenario)
human_replay_num_agents = 16
; If True, enable self-play evaluation (pair policy-controlled agent with a copy of itself)
self_play_eval = True
; If True, enable human replay evaluation (pair policy-controlled agent with human replays)
human_replay_eval = False
; Control mode for human replay: "control_sdc_only" controls only the SDC; others replay logged trajectories
human_replay_control_mode = "control_sdc_only"
; Which env to render during eval. Options: "first" (by index), "worst_collision", "random"
render_select_mode = "first"
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

awesome

; View mode(s) for eval rendering. Options: sim_state, bev, persp, both (sim_state+persp), all (sim_state+persp+bev)
; Multi-view options run a separate rollout per view, each producing its own wandb video under render/{mode}/{view}
render_view_mode = "all"
; If True, render random scenarios. Note: Doing this frequently will slow down the training.
render_human_replay_eval = False
render_self_play_eval = True


; Number of scenarios to process per batch
wosac_batch_size = 32
; Target number of unique scenarios perform evaluation in
Expand Down Expand Up @@ -222,18 +234,13 @@ wosac_sanity_check = False
wosac_aggregate_results = True
; Evaluation mode: "policy", "ground_truth"
wosac_eval_mode = "policy"
; If True, enable human replay evaluation (pair policy-controlled agent with human replays)
human_replay_eval = False
; Control only the self-driving car
human_replay_control_mode = "control_sdc_only"
; Number of scenarios for human replay evaluation equals the number of agents
human_replay_num_agents = 16

[safe_eval]
; If True, periodically run policy with safe/law-abiding reward conditioning and log metrics
enabled = True
; How often to run safe eval (in training epochs). Defaults to render_interval.
interval = 250
render_safe_eval = True
; Number of agents to run in the eval environment
num_agents = 50
; Number of episodes to collect metrics over
Expand Down Expand Up @@ -271,29 +278,23 @@ steer = 1.0
acc = 1.0

[render]
; Mode to render a bunch of maps with a given policy
; Path to dataset used for rendering
map_dir = "resources/drive/binaries/training"
; Directory to output rendered videos
; Render a batch of maps offline using the in-process c_render (ffmpeg) pipeline.
; Path to the .bin map directory
map_dir = "resources/drive/binaries/carla_2D"
; Directory to write output mp4 files
output_dir = "resources/drive/render_videos"
; Evaluation will run on the first num_maps maps in the map_dir directory
num_maps = 100
; "both", "topdown", "agent"; Other args are passed from train confs
view_mode = "both"
; Policy bin file used for rendering videos
policy_path = "resources/drive/puffer_drive_weights_resampling_300.bin"
; Number of maps to render (capped at files available in map_dir)
num_maps = 3
; View mode: sim_state (top-down, origin-centered), zoom_out (top-down, full map bbox), bev (agent BEV obs), persp (third-person follow-cam)
; Multi-view: "both" = sim_state + persp, "all" = sim_state + persp + bev (runs a separate rollout per view)
; NOTE: "persp" and "bev" require active_agent_count > 0
view_mode = "persp"
; Whether to draw agent trajectory traces
draw_traces = True
; Allows more than cpu cores workers for rendering
overwork = True
; If True, show exactly what the agent sees in agent observation
obs_only = True
; Show grid lines
show_grid = True
; Draws lines from ego agent observed ORUs and road elements to show detection range
show_lasers = True
; Display human xy logs in the background
show_human_logs = False
; If True, zoom in on a part of the map. Otherwise, show full map
zoom_in = True

[sweep.train.learning_rate]
distribution = log_normal
Expand Down
Loading
Loading