feat: interleaving indices

cuongth95 · cuongth95 · commit b6740b73e03f · 2025-10-27T17:42:16.000+01:00
diff --git a/ditec_wdn_dataset/core/datasets_large.py b/ditec_wdn_dataset/core/datasets_large.py
@@ -624,6 +624,7 @@ def compute_indices(
         # total_num_samples: int = sum_of_root_sizes  # if self.num_records is None else min(sum_of_root_sizes, self.num_records)
         # num_samples_per_network = total_num_samples // len(self._roots)
 
+        num_networks = len(self._roots)
         for network_index, root in enumerate(self._roots):
             if self.batch_axis_choice == "scene":
                 # arr WILL have shape <merged>(#scenes, #nodes_or_#links, #statics + time_dims * #dynamics)
@@ -645,7 +646,8 @@ def compute_indices(
             else:
                 raise NotImplementedError
             extended_network_ids = np.full([num_samples], network_index)
-            flatten_ids = np.arange(flatten_index, flatten_index + num_samples)
+            # flatten_ids = np.arange(flatten_index, flatten_index + num_samples)
+            flatten_ids = np.arange(num_samples) * num_networks + network_index
 
             local_chunk_map: dict[int, int] = {}
             lefts: np.ndarray | None = tuples[0] if tuples[0] is not None else None
@@ -699,6 +701,14 @@ def compute_indices(
             flatten_index += num_samples
             num_samples_per_network_list.append(num_samples)
 
+        # trick to perform interleaving, we sort the index map. The result will be
+        # 0     -> sample_0_dataset_0
+        # 1     -> sample_0_dataset_1
+        # N-1   -> sample_0_dataset_N-1
+        # N   -> sample_1_dataset_0
+        # ...
+        index_map = OrderedDict(sorted(index_map.items()))
+
         length = flatten_index
         return length, index_map, network_map, chunk_map, num_samples_per_network_list
 
diff --git a/ditec_wdn_dataset/hf/dataset.py b/ditec_wdn_dataset/hf/dataset.py
@@ -562,6 +562,7 @@ def compute_indices(self, wdn_names: list[str]) -> tuple[int, dict[int, tuple[in
                 f"ERROR! Simulation time (duration) must be equal in option batch_axis_choice <{self.batch_axis_choice}>, but get {time_dims}!"
             )
 
+        num_networks = len(self._roots)
         for network_index, root in enumerate(self._roots):
             if self.batch_axis_choice == "scene":
                 # arr WILL have shape <merged>(#scenes, #nodes_or_#links, #statics + time_dims * #dynamics)
@@ -584,7 +585,8 @@ def compute_indices(self, wdn_names: list[str]) -> tuple[int, dict[int, tuple[in
             else:
                 raise NotImplementedError
             extended_network_ids = np.full([num_samples], network_index)
-            flatten_ids = np.arange(flatten_index, flatten_index + num_samples)
+            # flatten_ids = np.arange(flatten_index, flatten_index + num_samples)
+            flatten_ids = np.arange(num_samples) * num_networks + network_index
 
             network_index_map: dict[int, tuple[int | None, int | None]] = {}
             # fid_nid_map: dict[int, int] = {}
@@ -601,7 +603,13 @@ def compute_indices(self, wdn_names: list[str]) -> tuple[int, dict[int, tuple[in
             # update flatten index indicator and network index
             flatten_index += num_samples
             num_samples_per_network_list.append(num_samples)
-
+        # trick to perform interleaving, we sort the index map. The result will be
+        # 0     -> sample_0_dataset_0
+        # 1     -> sample_0_dataset_1
+        # N-1   -> sample_0_dataset_N-1
+        # N   -> sample_1_dataset_0
+        # ...
+        index_map = OrderedDict(sorted(index_map.items()))
         length = flatten_index
         return length, index_map, network_map, num_samples_per_network_list