Skip to content

Commit 4262b4c

Browse files
authored
Merge pull request #195 from ImagingDataCommons/copilot/raise-exception-disk-space
Raise exception when there is not enough disk space
2 parents f61b69e + d3160a3 commit 4262b4c

File tree

4 files changed

+203
-90
lines changed

4 files changed

+203
-90
lines changed

idc_index/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
__all__ = ["__version__"]
1111

12-
from .index import IDCClient
12+
from .index import IDCClient, IDCClientInsufficientDiskSpaceError
1313

1414
_ = IDCClient
15+
_ = IDCClientInsufficientDiskSpaceError

idc_index/cli.py

Lines changed: 88 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import click
1212

1313
from . import index
14-
from .index import IDCClient
14+
from .index import IDCClient, IDCClientInsufficientDiskSpaceError
1515

1616
# Set up logging for the CLI module
1717
logging.basicConfig(format="%(asctime)s - %(message)s", level=logging.DEBUG)
@@ -190,19 +190,22 @@ def download_from_selection(
190190
logger_cli.debug(f"use_s5cmd_sync: {use_s5cmd_sync}")
191191
logger_cli.debug(f"dirTemplate: {dir_template}")
192192

193-
client.download_from_selection(
194-
download_dir,
195-
dry_run=dry_run,
196-
collection_id=collection_id,
197-
patientId=patient_id,
198-
studyInstanceUID=study_instance_uid,
199-
seriesInstanceUID=series_instance_uid,
200-
crdc_series_uuid=crdc_series_uuid,
201-
quiet=quiet,
202-
show_progress_bar=show_progress_bar,
203-
use_s5cmd_sync=use_s5cmd_sync,
204-
dirTemplate=dir_template,
205-
)
193+
try:
194+
client.download_from_selection(
195+
download_dir,
196+
dry_run=dry_run,
197+
collection_id=collection_id,
198+
patientId=patient_id,
199+
studyInstanceUID=study_instance_uid,
200+
seriesInstanceUID=series_instance_uid,
201+
crdc_series_uuid=crdc_series_uuid,
202+
quiet=quiet,
203+
show_progress_bar=show_progress_bar,
204+
use_s5cmd_sync=use_s5cmd_sync,
205+
dirTemplate=dir_template,
206+
)
207+
except IDCClientInsufficientDiskSpaceError as e:
208+
logger_cli.error(e.message)
206209

207210

208211
@idc.command()
@@ -286,15 +289,18 @@ def download_from_manifest(
286289
logger_cli.debug(f"dirTemplate: {dir_template}")
287290

288291
# Call IDCClient's download_from_manifest method with the provided parameters
289-
client.download_from_manifest(
290-
manifestFile=manifest_file,
291-
downloadDir=download_dir,
292-
quiet=quiet,
293-
validate_manifest=validate_manifest,
294-
show_progress_bar=show_progress_bar,
295-
use_s5cmd_sync=use_s5cmd_sync,
296-
dirTemplate=dir_template,
297-
)
292+
try:
293+
client.download_from_manifest(
294+
manifestFile=manifest_file,
295+
downloadDir=download_dir,
296+
quiet=quiet,
297+
validate_manifest=validate_manifest,
298+
show_progress_bar=show_progress_bar,
299+
use_s5cmd_sync=use_s5cmd_sync,
300+
dirTemplate=dir_template,
301+
)
302+
except IDCClientInsufficientDiskSpaceError as e:
303+
logger_cli.error(e.message)
298304

299305

300306
@idc.command()
@@ -339,65 +345,68 @@ def download(generic_argument, download_dir, dir_template, log_level):
339345
else:
340346
download_dir = Path.cwd()
341347

342-
if (
343-
len(generic_argument) < _get_max_path_length()
344-
and Path(generic_argument).is_file()
345-
):
346-
# Parse the input parameters and pass them to IDC
347-
logger_cli.info("Detected manifest file, downloading from manifest.")
348-
client.download_from_manifest(
349-
generic_argument, downloadDir=download_dir, dirTemplate=dir_template
350-
)
351-
# this is not a file manifest
352-
else:
353-
# Split the input string and filter out any empty values
354-
item_ids = [item for item in generic_argument.split(",") if item]
355-
356-
if not item_ids:
357-
logger_cli.error("No valid IDs provided.")
358-
359-
index_df = client.index
360-
361-
def check_and_download(column_name, item_ids, download_dir, kwarg_name):
362-
matches = index_df[column_name].isin(item_ids)
363-
matched_ids = index_df[column_name][matches].unique().tolist()
364-
if not matched_ids:
365-
return False
366-
unmatched_ids = list(set(item_ids) - set(matched_ids))
367-
if unmatched_ids:
368-
logger_cli.debug(
369-
f"Partial match for {column_name}: matched {matched_ids}, unmatched {unmatched_ids}"
370-
)
371-
logger_cli.info(f"Identified matching {column_name}: {matched_ids}")
372-
client.download_from_selection(
373-
**{
374-
kwarg_name: matched_ids,
375-
"downloadDir": download_dir,
376-
"dirTemplate": dir_template,
377-
}
348+
try:
349+
if (
350+
len(generic_argument) < _get_max_path_length()
351+
and Path(generic_argument).is_file()
352+
):
353+
# Parse the input parameters and pass them to IDC
354+
logger_cli.info("Detected manifest file, downloading from manifest.")
355+
client.download_from_manifest(
356+
generic_argument, downloadDir=download_dir, dirTemplate=dir_template
378357
)
379-
return True
358+
# this is not a file manifest
359+
else:
360+
# Split the input string and filter out any empty values
361+
item_ids = [item for item in generic_argument.split(",") if item]
362+
363+
if not item_ids:
364+
logger_cli.error("No valid IDs provided.")
365+
366+
index_df = client.index
367+
368+
def check_and_download(column_name, item_ids, download_dir, kwarg_name):
369+
matches = index_df[column_name].isin(item_ids)
370+
matched_ids = index_df[column_name][matches].unique().tolist()
371+
if not matched_ids:
372+
return False
373+
unmatched_ids = list(set(item_ids) - set(matched_ids))
374+
if unmatched_ids:
375+
logger_cli.debug(
376+
f"Partial match for {column_name}: matched {matched_ids}, unmatched {unmatched_ids}"
377+
)
378+
logger_cli.info(f"Identified matching {column_name}: {matched_ids}")
379+
client.download_from_selection(
380+
**{
381+
kwarg_name: matched_ids,
382+
"downloadDir": download_dir,
383+
"dirTemplate": dir_template,
384+
}
385+
)
386+
return True
380387

381-
matches_found = 0
382-
matches_found += check_and_download(
383-
"collection_id", item_ids, download_dir, "collection_id"
384-
)
385-
matches_found += check_and_download(
386-
"PatientID", item_ids, download_dir, "patientId"
387-
)
388-
matches_found += check_and_download(
389-
"StudyInstanceUID", item_ids, download_dir, "studyInstanceUID"
390-
)
391-
matches_found += check_and_download(
392-
"SeriesInstanceUID", item_ids, download_dir, "seriesInstanceUID"
393-
)
394-
matches_found += check_and_download(
395-
"crdc_series_uuid", item_ids, download_dir, "crdc_series_uuid"
396-
)
397-
if not matches_found:
398-
logger_cli.error(
399-
"None of the values passed matched any of the identifiers: collection_id, PatientID, StudyInstanceUID, SeriesInstanceUID, crdc_series_uuid."
388+
matches_found = 0
389+
matches_found += check_and_download(
390+
"collection_id", item_ids, download_dir, "collection_id"
400391
)
392+
matches_found += check_and_download(
393+
"PatientID", item_ids, download_dir, "patientId"
394+
)
395+
matches_found += check_and_download(
396+
"StudyInstanceUID", item_ids, download_dir, "studyInstanceUID"
397+
)
398+
matches_found += check_and_download(
399+
"SeriesInstanceUID", item_ids, download_dir, "seriesInstanceUID"
400+
)
401+
matches_found += check_and_download(
402+
"crdc_series_uuid", item_ids, download_dir, "crdc_series_uuid"
403+
)
404+
if not matches_found:
405+
logger_cli.error(
406+
"None of the values passed matched any of the identifiers: collection_id, PatientID, StudyInstanceUID, SeriesInstanceUID, crdc_series_uuid."
407+
)
408+
except IDCClientInsufficientDiskSpaceError as e:
409+
logger_cli.error(e.message)
401410

402411

403412
if __name__ == "__main__":

idc_index/index.py

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,30 @@
3030
logger = logging.getLogger(__name__)
3131

3232

33+
class IDCClientInsufficientDiskSpaceError(Exception):
34+
"""Exception raised when there is insufficient disk space for download."""
35+
36+
def __init__(
37+
self, disk_space_needed: str, disk_space_available: str, message: str = None
38+
):
39+
"""Initialize the exception.
40+
41+
Args:
42+
disk_space_needed: Human-readable string of disk space needed.
43+
disk_space_available: Human-readable string of disk space available.
44+
message: Optional custom message. If not provided, a default message is used.
45+
"""
46+
self.disk_space_needed = disk_space_needed
47+
self.disk_space_available = disk_space_available
48+
if message is None:
49+
message = (
50+
f"Insufficient disk space. "
51+
f"Need {disk_space_needed}, but only {disk_space_available} available."
52+
)
53+
self.message = message
54+
super().__init__(self.message)
55+
56+
3357
class IDCClient:
3458
# Default download hierarchy template
3559
DOWNLOAD_HIERARCHY_DEFAULT = (
@@ -314,14 +338,27 @@ def _check_create_directory(download_dir):
314338

315339
return str(download_dir.resolve())
316340

317-
def _check_disk_size_and_warn(self, download_dir, disk_size_needed):
341+
def _validate_disk_space(self, download_dir, disk_size_needed):
342+
"""Check if there is sufficient disk space for the download.
343+
344+
Args:
345+
download_dir: The directory where files will be downloaded.
346+
disk_size_needed: The size needed in MB.
347+
348+
Raises:
349+
IDCClientInsufficientDiskSpaceError: If there is not enough disk space.
350+
"""
318351
disk_free_space_MB = psutil.disk_usage(download_dir).free / (1000 * 1000)
319-
logger.info("Disk size needed: " + self._format_size(disk_size_needed))
320-
logger.info("Disk size available: " + self._format_size(disk_free_space_MB))
352+
disk_size_needed_str = self._format_size(disk_size_needed)
353+
disk_free_space_str = self._format_size(disk_free_space_MB)
354+
logger.info("Disk size needed: " + disk_size_needed_str)
355+
logger.info("Disk size available: " + disk_free_space_str)
321356
if disk_free_space_MB < disk_size_needed:
322357
logger.error("Not enough free space on disk to download the files.")
323-
return False
324-
return True
358+
raise IDCClientInsufficientDiskSpaceError(
359+
disk_space_needed=disk_size_needed_str,
360+
disk_space_available=disk_free_space_str,
361+
)
325362

326363
def fetch_index(self, index_name) -> None:
327364
"""Downloads requested index and adds this index joined with the main index as respective class attribute.
@@ -1591,6 +1628,7 @@ def download_from_manifest(
15911628
15921629
Raises:
15931630
ValueError: If the download directory does not exist.
1631+
IDCClientInsufficientDiskSpaceError: If there is not enough disk space.
15941632
"""
15951633
downloadDir = self._check_create_directory(downloadDir)
15961634

@@ -1610,8 +1648,7 @@ def download_from_manifest(
16101648
)
16111649

16121650
total_size_rounded = round(total_size, 2)
1613-
if not self._check_disk_size_and_warn(downloadDir, total_size):
1614-
return
1651+
self._validate_disk_space(downloadDir, total_size)
16151652

16161653
self._s5cmd_run(
16171654
endpoint_to_use=endpoint_to_use,
@@ -1841,8 +1878,7 @@ def download_from_selection(
18411878
total_size_bytes = round(result_df["instance_size"].sum(), 2)
18421879
total_size = total_size_bytes / (10**6)
18431880

1844-
if not self._check_disk_size_and_warn(downloadDir, total_size):
1845-
return
1881+
self._validate_disk_space(downloadDir, total_size)
18461882

18471883
if dry_run:
18481884
logger.info(

tests/idcindex.py

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,13 @@
66
import unittest
77
from itertools import product
88
from pathlib import Path
9+
from unittest.mock import patch
910

1011
import pandas as pd
1112
import pytest
1213
import requests
1314
from click.testing import CliRunner
14-
from idc_index import IDCClient, cli
15+
from idc_index import IDCClient, IDCClientInsufficientDiskSpaceError, cli
1516

1617
# Run tests using the following command from the root of the repository:
1718
# python -m unittest -vv tests/idcindex.py
@@ -604,5 +605,71 @@ def test_instance_file_URLs(self):
604605
assert files_aws == files_gcp == file_url
605606

606607

608+
class TestInsufficientDiskSpaceException(unittest.TestCase):
609+
def setUp(self):
610+
self.client = IDCClient()
611+
612+
@staticmethod
613+
def _create_mock_disk_usage(free_bytes=1000):
614+
"""Create a mock disk usage object with the specified free space."""
615+
return type("DiskUsage", (), {"free": free_bytes})()
616+
617+
def test_exception_attributes(self):
618+
"""Test that the exception has the correct attributes."""
619+
exc = IDCClientInsufficientDiskSpaceError(
620+
disk_space_needed="10 GB",
621+
disk_space_available="5 GB",
622+
)
623+
assert exc.disk_space_needed == "10 GB"
624+
assert exc.disk_space_available == "5 GB"
625+
assert "10 GB" in str(exc)
626+
assert "5 GB" in str(exc)
627+
assert "Insufficient disk space" in str(exc)
628+
629+
def test_exception_custom_message(self):
630+
"""Test that a custom message can be provided."""
631+
custom_msg = "Custom error message"
632+
exc = IDCClientInsufficientDiskSpaceError(
633+
disk_space_needed="10 GB",
634+
disk_space_available="5 GB",
635+
message=custom_msg,
636+
)
637+
assert exc.message == custom_msg
638+
assert str(exc) == custom_msg
639+
640+
def test_exception_raised_on_insufficient_space(self):
641+
"""Test that exception is raised when disk space is insufficient."""
642+
# Mock the disk check to simulate insufficient space (1000 bytes = ~0.001 MB)
643+
mock_usage = self._create_mock_disk_usage(free_bytes=1000)
644+
with tempfile.TemporaryDirectory() as temp_dir:
645+
with patch("psutil.disk_usage", return_value=mock_usage):
646+
with pytest.raises(IDCClientInsufficientDiskSpaceError) as exc_info:
647+
self.client.download_from_selection(
648+
downloadDir=temp_dir,
649+
seriesInstanceUID="1.3.6.1.4.1.14519.5.2.1.7695.1700.153974929648969296590126728101",
650+
)
651+
assert "Insufficient disk space" in str(exc_info.value)
652+
653+
def test_cli_handles_insufficient_space_gracefully(self):
654+
"""Test that CLI handles the exception without crashing."""
655+
# Mock the disk check to simulate insufficient space (1000 bytes = ~0.001 MB)
656+
mock_usage = self._create_mock_disk_usage(free_bytes=1000)
657+
runner = CliRunner()
658+
with patch("psutil.disk_usage", return_value=mock_usage):
659+
result = runner.invoke(
660+
cli.download_from_selection,
661+
[
662+
"--download-dir",
663+
"/tmp",
664+
"--study-instance-uid",
665+
"1.3.6.1.4.1.14519.5.2.1.7695.1700.114861588187429958687900856462",
666+
],
667+
)
668+
# The CLI should not crash (exit code 0) - the error is logged
669+
assert result.exit_code == 0
670+
# The exception was raised, handled, and logged (not re-raised)
671+
assert result.exception is None
672+
673+
607674
if __name__ == "__main__":
608675
unittest.main()

0 commit comments

Comments
 (0)