Emerge-Lab · eugenevinitsky · Apr 11, 2026 · Mar 19, 2026 · Mar 20, 2026 · Mar 24, 2026
diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
@@ -74,6 +74,11 @@ spawn_length_min = 2.0
 spawn_length_max = 5.5
 spawn_height = 1.5
 
+; Render mode options:
+; 0:"window" = pop-up raylib window (original)
+; 1:"headless" = off-screen; frames piped to ffmpeg (recommended for training)
+render_mode = 1
+
 turn_off_normalization = 1
 ; Options: 0 - Use normalized reward coefs as input to NN, 1 - Dont
 
@@ -171,28 +176,35 @@ vf_clip_coef = 0.1999999999999999
 vf_coef = 2
 vtrace_c_clip = 1
 vtrace_rho_clip = 1
-checkpoint_interval = 250
-; Rendering options
-render = True
-render_async = True      # Render in background process to avoid blocking training
-render_interval = 1000
-; If True, show exactly what the agent sees in agent observation
-obs_only = True
-; Show grid lines
-show_grid = True
-; Draws lines from ego agent observed ORUs and road elements to show detection range
-show_lasers = False
-; Display human xy logs in the background
-show_human_logs = False
-; If True, zoom in on a part of the map. Otherwise, show full map
-zoom_in = True
-; Options: List[str to path], str to path (e.g., "resources/drive/training/binaries/map_001.bin"), None
-render_map = none
+checkpoint_interval = 1000
 
 [eval]
 eval_interval = 1000
-; Path to dataset used for evaluation
-map_dir = "resources/drive/binaries/training"
+; Map directory for self-play evaluation (Carla maps)
+sp_map_dir = "resources/drive/binaries/carla_2D"
+; Map directory for human replay evaluation (WOMD training scenarios)
+hr_map_dir = "resources/drive/binaries/training"
+
+; Number of agents for self-play evaluation
+num_eval_agents = 64
+; Number of agents for human replay evaluation (one SDC per scenario)
+human_replay_num_agents = 16
+; If True, enable self-play evaluation (pair policy-controlled agent with a copy of itself)
+self_play_eval = True
+; If True, enable human replay evaluation (pair policy-controlled agent with human replays)
+human_replay_eval = False
+; Control mode for human replay: "control_sdc_only" controls only the SDC; others replay logged trajectories
+human_replay_control_mode = "control_sdc_only"
+; Which env to render during eval. Options: "first" (by index), "worst_collision", "random"
+render_select_mode = "first"
+; View mode(s) for eval rendering. Options: sim_state, bev, persp, both (sim_state+persp), all (sim_state+persp+bev)
+; Multi-view options run a separate rollout per view, each producing its own wandb video under render/{mode}/{view}
+render_view_mode = "all"
+; If True, render random scenarios. Note: Doing this frequently will slow down the training.
+render_human_replay_eval = False
+render_self_play_eval = True
+
+
 ; Number of scenarios to process per batch
 wosac_batch_size = 32
 ; Target number of unique scenarios perform evaluation in
@@ -222,18 +234,13 @@ wosac_sanity_check = False
 wosac_aggregate_results = True
 ; Evaluation mode: "policy", "ground_truth"
 wosac_eval_mode = "policy"
-; If True, enable human replay evaluation (pair policy-controlled agent with human replays)
-human_replay_eval = False
-; Control only the self-driving car
-human_replay_control_mode = "control_sdc_only"
-; Number of scenarios for human replay evaluation equals the number of agents
-human_replay_num_agents = 16
 
 [safe_eval]
 ; If True, periodically run policy with safe/law-abiding reward conditioning and log metrics
 enabled = True
 ; How often to run safe eval (in training epochs). Defaults to render_interval.
 interval = 250
+render_safe_eval = True
 ; Number of agents to run in the eval environment
 num_agents = 50
 ; Number of episodes to collect metrics over
@@ -271,29 +278,23 @@ steer = 1.0
 acc = 1.0
 
 [render]
-; Mode to render a bunch of maps with a given policy
-; Path to dataset used for rendering
-map_dir = "resources/drive/binaries/training"
-; Directory to output rendered videos
+; Render a batch of maps offline using the in-process c_render (ffmpeg) pipeline.
+; Path to the .bin map directory
+map_dir = "resources/drive/binaries/carla_2D"
+; Directory to write output mp4 files
 output_dir = "resources/drive/render_videos"
-; Evaluation will run on the first num_maps maps in the map_dir directory
-num_maps = 100
-; "both", "topdown", "agent"; Other args are passed from train confs
-view_mode = "both"
-; Policy bin file used for rendering videos
-policy_path = "resources/drive/puffer_drive_weights_resampling_300.bin"
+; Number of maps to render (capped at files available in map_dir)
+num_maps = 3
+; View mode: sim_state (top-down, origin-centered), zoom_out (top-down, full map bbox), bev (agent BEV obs), persp (third-person follow-cam)
+; Multi-view: "both" = sim_state + persp, "all" = sim_state + persp + bev (runs a separate rollout per view)
+; NOTE: "persp" and "bev" require active_agent_count > 0
+view_mode = "persp"
+; Whether to draw agent trajectory traces
+draw_traces = True
 ; Allows more than cpu cores workers for rendering
 overwork = True
-; If True, show exactly what the agent sees in agent observation
-obs_only = True
-; Show grid lines
-show_grid = True
-; Draws lines from ego agent observed ORUs and road elements to show detection range
-show_lasers = True
 ; Display human xy logs in the background
 show_human_logs = False
-; If True, zoom in on a part of the map. Otherwise, show full map
-zoom_in = True
 
 [sweep.train.learning_rate]
 distribution = log_normal