Skip to content

Commit 3e5f67c

Browse files
committed
fix: compute first size
1 parent 32cbcb5 commit 3e5f67c

File tree

2 files changed

+10
-6
lines changed

2 files changed

+10
-6
lines changed

ditec_wdn_dataset/core/datasets_large.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ def compute_first_size(self) -> int:
171171
my_zarr = self.get_array_by_key(node_attr)
172172
num_samples = my_zarr.shape[0]
173173
assert num_samples is not None and isinstance(num_samples, int)
174+
if num_samples != self.attrs["num_samples"]:
175+
print(f"WARN! The number of samples ({num_samples}) does not match the #samples ({self.attrs['num_samples']}) recorded in metadata ")
174176
return num_samples
175177

176178
def __init__(

ditec_wdn_dataset/hf/dataset.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -277,12 +277,14 @@ def get_array_by_key(self, key: str) -> hfd.Dataset:
277277
def compute_first_size(self) -> int:
278278
if len(self.sorted_node_attrs) <= 0:
279279
return 0
280-
num_samples = self.root.num_rows
281-
if isinstance(num_samples, dict):
282-
total_samples = sum(num_samples.values())
283-
return total_samples
284-
else:
285-
return num_samples
280+
# num_samples = self.root.num_rows
281+
# if isinstance(num_samples, dict):
282+
# total_samples = sum(num_samples.values())
283+
# return total_samples
284+
# else:
285+
# return num_samples
286+
287+
return self.attrs["num_samples"]
286288

287289
def __init__(
288290
self,

0 commit comments

Comments
 (0)