|
6 | 6 | import sys |
7 | 7 | import tempfile |
8 | 8 | import uuid |
| 9 | +import asyncio |
9 | 10 |
|
10 | 11 | import h5py |
11 | 12 | import numpy as np |
|
24 | 25 | ) |
25 | 26 | from src.service.data.storage import get_storage_interface |
26 | 27 |
|
27 | | - |
| 28 | +@pytest.fixture(autouse=True) |
| 29 | +def reset_storage(): |
| 30 | + """Reset storage interface for each test.""" |
| 31 | + import src.service.data.storage |
| 32 | + src.service.data.storage._storage_interface = None |
| 33 | + yield |
| 34 | + |
28 | 35 | def pytest_sessionfinish(session, exitstatus): |
29 | 36 | """Clean up the temporary directory after all tests are done.""" |
30 | 37 | if os.path.exists(TEMP_DIR): |
@@ -129,46 +136,43 @@ def generate_mismatched_shape_no_unique_name_multi_input_payload(n_rows, n_input |
129 | 136 |
|
130 | 137 | def get_data_from_storage(model_name, suffix): |
131 | 138 | """Get data from storage file.""" |
132 | | - storage = get_storage_interface() |
133 | | - filename = storage._get_filename(model_name + suffix) |
134 | | - if not os.path.exists(filename): |
135 | | - return None |
136 | | - with h5py.File(filename, "r") as f: |
137 | | - if model_name + suffix in f: |
138 | | - data = f[model_name + suffix][:] |
139 | | - column_names = f[model_name + suffix].attrs.get("column_names", []) |
| 139 | + async def _get_data(): |
| 140 | + storage = get_storage_interface() |
| 141 | + try: |
| 142 | + data, column_names = await storage.read_data(model_name + suffix) |
140 | 143 | return {"data": data, "column_names": column_names} |
| 144 | + except Exception as e: |
| 145 | + print(f"Error reading {model_name + suffix}: {e}") |
| 146 | + return None |
| 147 | + |
| 148 | + try: |
| 149 | + return asyncio.run(_get_data()) |
| 150 | + except Exception as e: |
| 151 | + print(f"Async error for {model_name + suffix}: {e}") |
| 152 | + return None |
141 | 153 |
|
142 | 154 |
|
143 | 155 | def get_metadata_ids(model_name): |
144 | 156 | """Extract actual IDs from metadata storage.""" |
145 | | - storage = get_storage_interface() |
146 | | - filename = storage._get_filename(model_name + METADATA_SUFFIX) |
147 | | - if not os.path.exists(filename): |
148 | | - return [] |
149 | | - ids = [] |
150 | | - with h5py.File(filename, "r") as f: |
151 | | - if model_name + METADATA_SUFFIX in f: |
152 | | - metadata = f[model_name + METADATA_SUFFIX][:] |
153 | | - column_names = f[model_name + METADATA_SUFFIX].attrs.get("column_names", []) |
| 157 | + async def _get_ids(): |
| 158 | + storage = get_storage_interface() |
| 159 | + try: |
| 160 | + metadata, column_names = await storage.read_data(model_name + METADATA_SUFFIX) |
154 | 161 | id_idx = next((i for i, name in enumerate(column_names) if name.lower() == "id"), None) |
155 | | - if id_idx is not None: |
| 162 | + if id_idx is not None and metadata is not None: |
| 163 | + ids = [] |
156 | 164 | for row in metadata: |
157 | | - try: |
158 | | - if hasattr(row, "__getitem__") and len(row) > id_idx: |
159 | | - id_val = row[id_idx] |
160 | | - else: |
161 | | - row_data = pickle.loads(row.tobytes()) |
162 | | - id_val = row_data[id_idx] |
163 | | - if isinstance(id_val, np.ndarray): |
164 | | - ids.append(str(id_val)) |
165 | | - else: |
166 | | - ids.append(str(id_val)) |
167 | | - except Exception as e: |
168 | | - print(f"Error processing ID from row {len(ids)}: {e}") |
169 | | - continue |
170 | | - print(f"Successfully extracted {len(ids)} IDs: {ids}") |
171 | | - return ids |
| 165 | + if hasattr(row, "__len__") and len(row) > id_idx: |
| 166 | + ids.append(str(row[id_idx])) |
| 167 | + return ids |
| 168 | + except Exception as e: |
| 169 | + print(f"Error getting metadata: {e}") |
| 170 | + return [] |
| 171 | + |
| 172 | + try: |
| 173 | + return asyncio.run(_get_ids()) |
| 174 | + except Exception: |
| 175 | + return [] |
172 | 176 |
|
173 | 177 |
|
174 | 178 | def get_metadata_from_storage(model_name): |
|
0 commit comments