1717from debug_gym .gym .utils import filter_problems
1818from debug_gym .logger import DebugGymLogger
1919
20- main_logger = logging .getLogger (__name__ )
21-
2220
2321def decolor_dict_keys (key ):
2422 """Remove ANSI escape codes"""
@@ -265,10 +263,6 @@ def load_dataset(
265263 prepull_images : bool = False ,
266264 logger : DebugGymLogger | None = None ,
267265 ) -> dict :
268- main_logger .info (
269- f"Loading R2E-Gym dataset `{ dataset_id } ` (rev: { dataset_revision } )..."
270- )
271-
272266 logger = logger or DebugGymLogger ("debug_gym" )
273267 data_path = Path (dataset_id )
274268
@@ -285,7 +279,6 @@ def load_dataset(
285279 # Loading from HuggingFace or a folder.
286280 ds = load_dataset (dataset_id , revision = dataset_revision )
287281
288- main_logger .info ("Dataset loaded." )
289282 # Select the split.
290283 ds = ds [split ]
291284
@@ -297,18 +290,18 @@ def load_dataset(
297290 def extract_instance_id (docker_image : str ) -> str :
298291 return docker_image .split ("/" , 1 )[- 1 ]
299292
300- dataset = {
293+ id2idx = {
301294 extract_instance_id (docker_image ): i
302295 for i , docker_image in enumerate (ds ["docker_image" ])
303296 }
304- problems = filter_problems (dataset , problems , custom_splits , excluded_ids )
305- dataset = {problem : ds [dataset [problem ]] for problem in problems }
297+ problems = filter_problems (id2idx , problems , custom_splits , excluded_ids )
298+ dataset = {problem : ds [id2idx [problem ]] for problem in problems }
306299
307300 # add instance id to each example (name of the image)
308- for instance_id in dataset :
309- dataset [ instance_id ] ["instance_id" ] = instance_id
301+ for instance_id , task_data in dataset . items () :
302+ task_data ["instance_id" ] = instance_id
310303
311- image_names = set (example ["docker_image" ] for example in dataset .values ())
304+ image_names = set (task_data ["docker_image" ] for task_data in dataset .values ())
312305 logger .debug (
313306 f"Loaded { len (dataset )} tasks across { len (image_names )} Docker images from { dataset_id } ."
314307 )
0 commit comments