From 4025aeaa4710a22ab30e6fc53ff63a1c0ac5494f Mon Sep 17 00:00:00 2001 From: Ha-Ree <48283886+Ha-Ree@users.noreply.github.com> Date: Tue, 28 Apr 2026 10:36:36 +0100 Subject: [PATCH 1/6] pin (#41) --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 0bd47d4..7ad02c3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,7 +3,7 @@ version: "3.8" services: localstack: container_name: "${LOCALSTACK_DOCKER_NAME-localstack_main}" - image: localstack/localstack + image: localstack/localstack:3 ports: - "127.0.0.1:4566:4566" # LocalStack Gateway - "127.0.0.1:4510-4559:4510-4559" # external services port range From a41b1d4258fd15df1be65169a37fb146f170744d Mon Sep 17 00:00:00 2001 From: Ha-Ree <48283886+Ha-Ree@users.noreply.github.com> Date: Wed, 27 May 2026 14:36:18 +0100 Subject: [PATCH 2/6] Tests failing due to geodatasets link (#45) * see tests * attempt doi * requests untyped * requests untyped 2 --- tests/df_reader/test_geo.py | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/tests/df_reader/test_geo.py b/tests/df_reader/test_geo.py index fbe1388..56f4ab4 100644 --- a/tests/df_reader/test_geo.py +++ b/tests/df_reader/test_geo.py @@ -1,9 +1,10 @@ +import zipfile from tempfile import NamedTemporaryFile -import geodatasets import geopandas as gpd import pandas as pd import pytest +import requests # type: ignore[import-untyped] from shapely.geometry import Point from oasis_data_manager.df_reader.reader import ( @@ -14,6 +15,8 @@ ) from oasis_data_manager.filestore.backends.local import LocalStorage +NYBB_URL = "https://raw.githubusercontent.com/geopandas/geodatasets/main/data_backup/nybb_16a.zip" + READERS = [ OasisPandasReaderCSV, OasisPandasReaderParquet, @@ -25,14 +28,26 @@ storage = LocalStorage("/") +@pytest.fixture(scope="session") +def nybb_path(tmp_path_factory): + tmp = tmp_path_factory.mktemp("nybb") + zip_dest = tmp / "nybb_16a.zip" + r = requests.get(NYBB_URL, timeout=30) + r.raise_for_status() + zip_dest.write_bytes(r.content) + with zipfile.ZipFile(zip_dest) as zf: + zf.extractall(tmp) + return str(next(tmp.rglob("*.shp"))) + + @pytest.fixture -def df(): +def df(nybb_path): """ df representing data with a long/lat, here we pull some values from within the nybb dataset used in tests to create some values in out expected boroughs. """ - b = [int(x) for x in gpd.read_file(geodatasets.get_path("nybb")).total_bounds] + b = [int(x) for x in gpd.read_file(nybb_path).total_bounds] return pd.DataFrame( [ @@ -46,7 +61,7 @@ def df(): @pytest.mark.parametrize("reader", READERS) -def test_read__expected_pandas_dataframe(reader, df): +def test_read__expected_pandas_dataframe(reader, df, nybb_path): suffix = ".csv" if "csv" in reader.__name__.lower() else ".parquet" with NamedTemporaryFile(suffix=suffix) as file: if suffix == ".csv": @@ -56,7 +71,7 @@ def test_read__expected_pandas_dataframe(reader, df): result = ( reader(file.name, storage) - .apply_geo(geodatasets.get_path("nybb")) + .apply_geo(nybb_path) .as_pandas() ) @@ -70,7 +85,7 @@ def test_read__expected_pandas_dataframe(reader, df): @pytest.mark.parametrize("reader", READERS) -def test_read__expected_pandas_dataframe__drop_geo(reader, df): +def test_read__expected_pandas_dataframe__drop_geo(reader, df, nybb_path): suffix = ".csv" if "csv" in reader.__name__.lower() else ".parquet" with NamedTemporaryFile(suffix=suffix) as file: if suffix == ".csv": @@ -83,7 +98,7 @@ def test_read__expected_pandas_dataframe__drop_geo(reader, df): file.name, storage, ) - .apply_geo(geodatasets.get_path("nybb"), drop_geo=False) + .apply_geo(nybb_path, drop_geo=False) .as_pandas() ) @@ -123,7 +138,7 @@ def test_read__expected_pandas_dataframe__drop_geo(reader, df): @pytest.mark.parametrize("reader", READERS) -def test_read__gql_filter__expected_pandas_dataframe(reader, df): +def test_read__gql_filter__expected_pandas_dataframe(reader, df, nybb_path): suffix = ".csv" if "csv" in reader.__name__.lower() else ".parquet" with NamedTemporaryFile(suffix=suffix) as file: if suffix == ".csv": @@ -133,7 +148,7 @@ def test_read__gql_filter__expected_pandas_dataframe(reader, df): result = ( reader(file.name, storage) - .apply_geo(geodatasets.get_path("nybb")) + .apply_geo(nybb_path) .filter([lambda x: x[x["longitude"] == 1028825]]) .as_pandas() ) @@ -148,7 +163,7 @@ def test_read__gql_filter__expected_pandas_dataframe(reader, df): @pytest.mark.parametrize("reader", READERS) -def test_read__shape_file__invalid(reader, df, caplog): +def test_read__shape_file__invalid(reader, df, nybb_path, caplog): df = df.rename(columns={"longitude": "lon", "latitude": "lat"}) suffix = ".csv" if "csv" in reader.__name__.lower() else ".parquet" @@ -163,7 +178,7 @@ def test_read__shape_file__invalid(reader, df, caplog): file.name, storage, ) - .apply_geo(geodatasets.get_path("nybb")) + .apply_geo(nybb_path) .as_pandas() ) From 158b0375aa7f40bcccd6d5e521df751eba935450 Mon Sep 17 00:00:00 2001 From: sambles Date: Wed, 27 May 2026 14:43:37 +0100 Subject: [PATCH 3/6] Fix OASIS_AWS_LOCATION var (#43) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit put() override — effective_subdir = location + subdir is passed to super().put(), which stores the file at DirFS(bucket_name/root_dir)/location/subdir/file. Since root_dir is typically "", this means S3 key = location/subdir/file. The return value from super() is os.path.join(location, subdir, filename) — the full bucket-relative key — which is exactly what the server's is_in_bucket() and CopyObject(Key=...) need. get_storage_url() fix — same principle: the returned key includes the location prefix so the server stores a reference that the worker can later resolve. get() needs no change — DirFileSystem rooted at bucket_name accepts bucket-relative keys like oasis/file.txt directly. Both worker-side re-fetches and server-provided references (which may already carry the Django location prefix) work without stripping. root_dir / config_options unchanged — location and root_dir remain independent in storage config, so round-tripping is clean and existing deployments with location="" behave identically to before. --- .../filestore/backends/aws_s3.py | 12 ++- tests/filestorage/test_aws.py | 87 +++++++++++++++++++ 2 files changed, 97 insertions(+), 2 deletions(-) diff --git a/oasis_data_manager/filestore/backends/aws_s3.py b/oasis_data_manager/filestore/backends/aws_s3.py index 55c873f..25b2985 100755 --- a/oasis_data_manager/filestore/backends/aws_s3.py +++ b/oasis_data_manager/filestore/backends/aws_s3.py @@ -256,10 +256,18 @@ def url(self, object_name, parameters=None, expire=None): else: return self._strip_signing_parameters(url) + def put(self, reference, filename=None, subdir="", suffix=None, arcname=None): + # Prepend location so the returned key is bucket-relative (location/subdir/file). + # The server's CopyObject and is_in_bucket() both use the raw bucket key, so the + # returned value must include the location prefix, not be relative to it. + effective_subdir = os.path.join(self.location, subdir) if self.location else subdir + return super().put(reference, filename=filename, subdir=effective_subdir, suffix=suffix, arcname=arcname) + def get_storage_url(self, filename=None, suffix="tar.gz", encode_params=True): filename = ( filename if filename is not None else self._get_unique_filename(suffix) ) + key = os.path.join(self.location, filename) if self.location else filename params = {} if encode_params: @@ -279,8 +287,8 @@ def get_storage_url(self, filename=None, suffix="tar.gz", encode_params=True): params["endpoint"] = self.endpoint_url return ( - filename, - f"s3://{os.path.join(self.root_dir, filename)}{'?' if params else ''}{parse.urlencode(params) if params else ''}", + key, + f"s3://{os.path.join(self.root_dir, key)}{'?' if params else ''}{parse.urlencode(params) if params else ''}", ) @contextlib.contextmanager diff --git a/tests/filestorage/test_aws.py b/tests/filestorage/test_aws.py index b6fc044..ba15cb0 100644 --- a/tests/filestorage/test_aws.py +++ b/tests/filestorage/test_aws.py @@ -41,6 +41,93 @@ def test_storage_constructed_from_config_matches_initial(): assert result.bucket_name == storage.bucket_name +def test_location_does_not_affect_root_dir(): + # root_dir is bucket-scoped only; location is applied at put/get_storage_url time + storage = make_storage(location="oasis") + + assert storage.root_dir == storage.bucket_name + + +def test_location_round_trips_through_config(): + storage = make_storage(location="oasis", root_dir="subdir") + + result = get_storage_from_config(storage.to_config()) + + assert isinstance(result, AwsS3Storage) + assert result.root_dir == storage.root_dir + assert result.location == storage.location + + +def test_location_put_returns_bucket_relative_key(): + # The returned key must be the raw S3 key (bucket-relative) so the server's + # is_in_bucket() lookup and CopyObject both resolve correctly. + storage = make_storage(location="oasis") + + with tempfile.NamedTemporaryFile("w", suffix=".txt") as f: + f.write("content") + f.flush() + + stored_path = storage.put(f.name, "test_file.txt") + + assert stored_path == os.path.join("oasis", "test_file.txt") + # The file must actually live at that bucket-relative key + assert storage.fs.isfile(stored_path) + + +def test_location_put_with_subdir(): + storage = make_storage(location="oasis") + + with tempfile.NamedTemporaryFile("w", suffix=".txt") as f: + f.write("content") + f.flush() + + stored_path = storage.put(f.name, "test_file.txt", subdir="run1") + + assert stored_path == os.path.join("oasis", "run1", "test_file.txt") + assert storage.fs.isfile(stored_path) + + +def test_no_location_put_returns_bare_key(): + # Without location, behaviour is unchanged: returned key has no prefix + storage = make_storage() + + with tempfile.NamedTemporaryFile("w", suffix=".txt") as f: + f.write("content") + f.flush() + + stored_path = storage.put(f.name, "test_file.txt") + + assert stored_path == "test_file.txt" + assert storage.fs.isfile(stored_path) + + +def test_location_get_storage_url_includes_prefix(): + storage = make_storage(location="oasis") + + key, url = storage.get_storage_url(filename="myfile.tar.gz") + + assert key == "oasis/myfile.tar.gz" + assert "oasis/myfile.tar.gz" in url + + +def test_location_put_then_get_roundtrip(): + # Worker puts a file, then retrieves it using the returned key — must work + storage = make_storage(location="oasis") + + with tempfile.NamedTemporaryFile("w", suffix=".txt", delete=False) as src: + src.write("hello") + src.flush() + stored_path = storage.put(src.name, "roundtrip.txt") + + with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as dest: + dest_path = dest.name + + result = storage.get(stored_path, dest_path) + assert result == dest_path + with open(dest_path) as f: + assert f.read() == "hello" + + @pytest.mark.parametrize("acl", ["public-read-write", "public-read"]) def test_uploaded_file_has_the_correct_acl(acl): storage = make_storage(default_acl=acl) From 9da3884789e88590f87f9e9c1ebb64a6582d619e Mon Sep 17 00:00:00 2001 From: sambles Date: Thu, 28 May 2026 09:01:23 +0100 Subject: [PATCH 4/6] Fix license (#46) * return LICENSE * Fix setup --- LICENSE | 28 ++++++++++++++++++++++++++++ setup.py | 2 ++ 2 files changed, 30 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..9a54a41 --- /dev/null +++ b/LICENSE @@ -0,0 +1,28 @@ +BSD 3-Clause License + +Copyright (c) 2023, Oasis Loss Modelling Framework + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/setup.py b/setup.py index c796eb6..d2aa201 100644 --- a/setup.py +++ b/setup.py @@ -44,6 +44,8 @@ def get_optional_requirements(): exclude_package_data={ "": ["__pycache__", "*.py[co]"], }, + license="BSD-3-Clause", + license_files=["LICENSE"], description="", long_description="", long_description_content_type="text/markdown", From 1bf66a6a7ad7ac855cd882dfc84cb372ee1d4837 Mon Sep 17 00:00:00 2001 From: awsbuild Date: Thu, 28 May 2026 08:07:19 +0000 Subject: [PATCH 5/6] Set package to version release/0.2.2 --- oasis_data_manager/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oasis_data_manager/__init__.py b/oasis_data_manager/__init__.py index fc79d63..07ef838 100644 --- a/oasis_data_manager/__init__.py +++ b/oasis_data_manager/__init__.py @@ -1 +1 @@ -__version__ = '0.2.1' +__version__ = 'release/0.2.2' From b2b95d9badc6d1f009dca53f7942171858346f86 Mon Sep 17 00:00:00 2001 From: sambles Date: Thu, 28 May 2026 09:41:04 +0100 Subject: [PATCH 6/6] Fix __init__.py --- oasis_data_manager/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oasis_data_manager/__init__.py b/oasis_data_manager/__init__.py index 07ef838..020ed73 100644 --- a/oasis_data_manager/__init__.py +++ b/oasis_data_manager/__init__.py @@ -1 +1 @@ -__version__ = 'release/0.2.2' +__version__ = '0.2.2'