Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
BSD 3-Clause License

Copyright (c) 2023, Oasis Loss Modelling Framework

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ version: "3.8"
services:
localstack:
container_name: "${LOCALSTACK_DOCKER_NAME-localstack_main}"
image: localstack/localstack
image: localstack/localstack:3
ports:
- "127.0.0.1:4566:4566" # LocalStack Gateway
- "127.0.0.1:4510-4559:4510-4559" # external services port range
Expand Down
2 changes: 1 addition & 1 deletion oasis_data_manager/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.2.1'
__version__ = '0.2.2'
12 changes: 10 additions & 2 deletions oasis_data_manager/filestore/backends/aws_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,10 +256,18 @@ def url(self, object_name, parameters=None, expire=None):
else:
return self._strip_signing_parameters(url)

def put(self, reference, filename=None, subdir="", suffix=None, arcname=None):
# Prepend location so the returned key is bucket-relative (location/subdir/file).
# The server's CopyObject and is_in_bucket() both use the raw bucket key, so the
# returned value must include the location prefix, not be relative to it.
effective_subdir = os.path.join(self.location, subdir) if self.location else subdir
return super().put(reference, filename=filename, subdir=effective_subdir, suffix=suffix, arcname=arcname)

def get_storage_url(self, filename=None, suffix="tar.gz", encode_params=True):
filename = (
filename if filename is not None else self._get_unique_filename(suffix)
)
key = os.path.join(self.location, filename) if self.location else filename

params = {}
if encode_params:
Expand All @@ -279,8 +287,8 @@ def get_storage_url(self, filename=None, suffix="tar.gz", encode_params=True):
params["endpoint"] = self.endpoint_url

return (
filename,
f"s3://{os.path.join(self.root_dir, filename)}{'?' if params else ''}{parse.urlencode(params) if params else ''}",
key,
f"s3://{os.path.join(self.root_dir, key)}{'?' if params else ''}{parse.urlencode(params) if params else ''}",
)

@contextlib.contextmanager
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ def get_optional_requirements():
exclude_package_data={
"": ["__pycache__", "*.py[co]"],
},
license="BSD-3-Clause",
license_files=["LICENSE"],
description="",
long_description="",
long_description_content_type="text/markdown",
Expand Down
37 changes: 26 additions & 11 deletions tests/df_reader/test_geo.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import zipfile
from tempfile import NamedTemporaryFile

import geodatasets
import geopandas as gpd
import pandas as pd
import pytest
import requests # type: ignore[import-untyped]
from shapely.geometry import Point

from oasis_data_manager.df_reader.reader import (
Expand All @@ -14,6 +15,8 @@
)
from oasis_data_manager.filestore.backends.local import LocalStorage

NYBB_URL = "https://raw.githubusercontent.com/geopandas/geodatasets/main/data_backup/nybb_16a.zip"

READERS = [
OasisPandasReaderCSV,
OasisPandasReaderParquet,
Expand All @@ -25,14 +28,26 @@
storage = LocalStorage("/")


@pytest.fixture(scope="session")
def nybb_path(tmp_path_factory):
tmp = tmp_path_factory.mktemp("nybb")
zip_dest = tmp / "nybb_16a.zip"
r = requests.get(NYBB_URL, timeout=30)
r.raise_for_status()
zip_dest.write_bytes(r.content)
with zipfile.ZipFile(zip_dest) as zf:
zf.extractall(tmp)
return str(next(tmp.rglob("*.shp")))


@pytest.fixture
def df():
def df(nybb_path):
"""
df representing data with a long/lat, here we pull some values from
within the nybb dataset used in tests to create some values in
out expected boroughs.
"""
b = [int(x) for x in gpd.read_file(geodatasets.get_path("nybb")).total_bounds]
b = [int(x) for x in gpd.read_file(nybb_path).total_bounds]

return pd.DataFrame(
[
Expand All @@ -46,7 +61,7 @@ def df():


@pytest.mark.parametrize("reader", READERS)
def test_read__expected_pandas_dataframe(reader, df):
def test_read__expected_pandas_dataframe(reader, df, nybb_path):
suffix = ".csv" if "csv" in reader.__name__.lower() else ".parquet"
with NamedTemporaryFile(suffix=suffix) as file:
if suffix == ".csv":
Expand All @@ -56,7 +71,7 @@ def test_read__expected_pandas_dataframe(reader, df):

result = (
reader(file.name, storage)
.apply_geo(geodatasets.get_path("nybb"))
.apply_geo(nybb_path)
.as_pandas()
)

Expand All @@ -70,7 +85,7 @@ def test_read__expected_pandas_dataframe(reader, df):


@pytest.mark.parametrize("reader", READERS)
def test_read__expected_pandas_dataframe__drop_geo(reader, df):
def test_read__expected_pandas_dataframe__drop_geo(reader, df, nybb_path):
suffix = ".csv" if "csv" in reader.__name__.lower() else ".parquet"
with NamedTemporaryFile(suffix=suffix) as file:
if suffix == ".csv":
Expand All @@ -83,7 +98,7 @@ def test_read__expected_pandas_dataframe__drop_geo(reader, df):
file.name,
storage,
)
.apply_geo(geodatasets.get_path("nybb"), drop_geo=False)
.apply_geo(nybb_path, drop_geo=False)
.as_pandas()
)

Expand Down Expand Up @@ -123,7 +138,7 @@ def test_read__expected_pandas_dataframe__drop_geo(reader, df):


@pytest.mark.parametrize("reader", READERS)
def test_read__gql_filter__expected_pandas_dataframe(reader, df):
def test_read__gql_filter__expected_pandas_dataframe(reader, df, nybb_path):
suffix = ".csv" if "csv" in reader.__name__.lower() else ".parquet"
with NamedTemporaryFile(suffix=suffix) as file:
if suffix == ".csv":
Expand All @@ -133,7 +148,7 @@ def test_read__gql_filter__expected_pandas_dataframe(reader, df):

result = (
reader(file.name, storage)
.apply_geo(geodatasets.get_path("nybb"))
.apply_geo(nybb_path)
.filter([lambda x: x[x["longitude"] == 1028825]])
.as_pandas()
)
Expand All @@ -148,7 +163,7 @@ def test_read__gql_filter__expected_pandas_dataframe(reader, df):


@pytest.mark.parametrize("reader", READERS)
def test_read__shape_file__invalid(reader, df, caplog):
def test_read__shape_file__invalid(reader, df, nybb_path, caplog):
df = df.rename(columns={"longitude": "lon", "latitude": "lat"})

suffix = ".csv" if "csv" in reader.__name__.lower() else ".parquet"
Expand All @@ -163,7 +178,7 @@ def test_read__shape_file__invalid(reader, df, caplog):
file.name,
storage,
)
.apply_geo(geodatasets.get_path("nybb"))
.apply_geo(nybb_path)
.as_pandas()
)

Expand Down
87 changes: 87 additions & 0 deletions tests/filestorage/test_aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,93 @@ def test_storage_constructed_from_config_matches_initial():
assert result.bucket_name == storage.bucket_name


def test_location_does_not_affect_root_dir():
# root_dir is bucket-scoped only; location is applied at put/get_storage_url time
storage = make_storage(location="oasis")

assert storage.root_dir == storage.bucket_name


def test_location_round_trips_through_config():
storage = make_storage(location="oasis", root_dir="subdir")

result = get_storage_from_config(storage.to_config())

assert isinstance(result, AwsS3Storage)
assert result.root_dir == storage.root_dir
assert result.location == storage.location


def test_location_put_returns_bucket_relative_key():
# The returned key must be the raw S3 key (bucket-relative) so the server's
# is_in_bucket() lookup and CopyObject both resolve correctly.
storage = make_storage(location="oasis")

with tempfile.NamedTemporaryFile("w", suffix=".txt") as f:
f.write("content")
f.flush()

stored_path = storage.put(f.name, "test_file.txt")

assert stored_path == os.path.join("oasis", "test_file.txt")
# The file must actually live at that bucket-relative key
assert storage.fs.isfile(stored_path)


def test_location_put_with_subdir():
storage = make_storage(location="oasis")

with tempfile.NamedTemporaryFile("w", suffix=".txt") as f:
f.write("content")
f.flush()

stored_path = storage.put(f.name, "test_file.txt", subdir="run1")

assert stored_path == os.path.join("oasis", "run1", "test_file.txt")
assert storage.fs.isfile(stored_path)


def test_no_location_put_returns_bare_key():
# Without location, behaviour is unchanged: returned key has no prefix
storage = make_storage()

with tempfile.NamedTemporaryFile("w", suffix=".txt") as f:
f.write("content")
f.flush()

stored_path = storage.put(f.name, "test_file.txt")

assert stored_path == "test_file.txt"
assert storage.fs.isfile(stored_path)


def test_location_get_storage_url_includes_prefix():
storage = make_storage(location="oasis")

key, url = storage.get_storage_url(filename="myfile.tar.gz")

assert key == "oasis/myfile.tar.gz"
assert "oasis/myfile.tar.gz" in url


def test_location_put_then_get_roundtrip():
# Worker puts a file, then retrieves it using the returned key — must work
storage = make_storage(location="oasis")

with tempfile.NamedTemporaryFile("w", suffix=".txt", delete=False) as src:
src.write("hello")
src.flush()
stored_path = storage.put(src.name, "roundtrip.txt")

with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as dest:
dest_path = dest.name

result = storage.get(stored_path, dest_path)
assert result == dest_path
with open(dest_path) as f:
assert f.read() == "hello"


@pytest.mark.parametrize("acl", ["public-read-write", "public-read"])
def test_uploaded_file_has_the_correct_acl(acl):
storage = make_storage(default_acl=acl)
Expand Down
Loading