From 4c60a3637e4e9bc2604fd6ff3197d31be3b16479 Mon Sep 17 00:00:00 2001 From: Sam Gamble Date: Tue, 19 May 2026 15:26:30 +0100 Subject: [PATCH] Fix OASIS_AWS_LOCATION var MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit put() override — effective_subdir = location + subdir is passed to super().put(), which stores the file at DirFS(bucket_name/root_dir)/location/subdir/file. Since root_dir is typically "", this means S3 key = location/subdir/file. The return value from super() is os.path.join(location, subdir, filename) — the full bucket-relative key — which is exactly what the server's is_in_bucket() and CopyObject(Key=...) need. get_storage_url() fix — same principle: the returned key includes the location prefix so the server stores a reference that the worker can later resolve. get() needs no change — DirFileSystem rooted at bucket_name accepts bucket-relative keys like oasis/file.txt directly. Both worker-side re-fetches and server-provided references (which may already carry the Django location prefix) work without stripping. root_dir / config_options unchanged — location and root_dir remain independent in storage config, so round-tripping is clean and existing deployments with location="" behave identically to before. --- .../filestore/backends/aws_s3.py | 12 ++- tests/filestorage/test_aws.py | 87 +++++++++++++++++++ 2 files changed, 97 insertions(+), 2 deletions(-) diff --git a/oasis_data_manager/filestore/backends/aws_s3.py b/oasis_data_manager/filestore/backends/aws_s3.py index 55c873f..25b2985 100755 --- a/oasis_data_manager/filestore/backends/aws_s3.py +++ b/oasis_data_manager/filestore/backends/aws_s3.py @@ -256,10 +256,18 @@ def url(self, object_name, parameters=None, expire=None): else: return self._strip_signing_parameters(url) + def put(self, reference, filename=None, subdir="", suffix=None, arcname=None): + # Prepend location so the returned key is bucket-relative (location/subdir/file). + # The server's CopyObject and is_in_bucket() both use the raw bucket key, so the + # returned value must include the location prefix, not be relative to it. + effective_subdir = os.path.join(self.location, subdir) if self.location else subdir + return super().put(reference, filename=filename, subdir=effective_subdir, suffix=suffix, arcname=arcname) + def get_storage_url(self, filename=None, suffix="tar.gz", encode_params=True): filename = ( filename if filename is not None else self._get_unique_filename(suffix) ) + key = os.path.join(self.location, filename) if self.location else filename params = {} if encode_params: @@ -279,8 +287,8 @@ def get_storage_url(self, filename=None, suffix="tar.gz", encode_params=True): params["endpoint"] = self.endpoint_url return ( - filename, - f"s3://{os.path.join(self.root_dir, filename)}{'?' if params else ''}{parse.urlencode(params) if params else ''}", + key, + f"s3://{os.path.join(self.root_dir, key)}{'?' if params else ''}{parse.urlencode(params) if params else ''}", ) @contextlib.contextmanager diff --git a/tests/filestorage/test_aws.py b/tests/filestorage/test_aws.py index b6fc044..ba15cb0 100644 --- a/tests/filestorage/test_aws.py +++ b/tests/filestorage/test_aws.py @@ -41,6 +41,93 @@ def test_storage_constructed_from_config_matches_initial(): assert result.bucket_name == storage.bucket_name +def test_location_does_not_affect_root_dir(): + # root_dir is bucket-scoped only; location is applied at put/get_storage_url time + storage = make_storage(location="oasis") + + assert storage.root_dir == storage.bucket_name + + +def test_location_round_trips_through_config(): + storage = make_storage(location="oasis", root_dir="subdir") + + result = get_storage_from_config(storage.to_config()) + + assert isinstance(result, AwsS3Storage) + assert result.root_dir == storage.root_dir + assert result.location == storage.location + + +def test_location_put_returns_bucket_relative_key(): + # The returned key must be the raw S3 key (bucket-relative) so the server's + # is_in_bucket() lookup and CopyObject both resolve correctly. + storage = make_storage(location="oasis") + + with tempfile.NamedTemporaryFile("w", suffix=".txt") as f: + f.write("content") + f.flush() + + stored_path = storage.put(f.name, "test_file.txt") + + assert stored_path == os.path.join("oasis", "test_file.txt") + # The file must actually live at that bucket-relative key + assert storage.fs.isfile(stored_path) + + +def test_location_put_with_subdir(): + storage = make_storage(location="oasis") + + with tempfile.NamedTemporaryFile("w", suffix=".txt") as f: + f.write("content") + f.flush() + + stored_path = storage.put(f.name, "test_file.txt", subdir="run1") + + assert stored_path == os.path.join("oasis", "run1", "test_file.txt") + assert storage.fs.isfile(stored_path) + + +def test_no_location_put_returns_bare_key(): + # Without location, behaviour is unchanged: returned key has no prefix + storage = make_storage() + + with tempfile.NamedTemporaryFile("w", suffix=".txt") as f: + f.write("content") + f.flush() + + stored_path = storage.put(f.name, "test_file.txt") + + assert stored_path == "test_file.txt" + assert storage.fs.isfile(stored_path) + + +def test_location_get_storage_url_includes_prefix(): + storage = make_storage(location="oasis") + + key, url = storage.get_storage_url(filename="myfile.tar.gz") + + assert key == "oasis/myfile.tar.gz" + assert "oasis/myfile.tar.gz" in url + + +def test_location_put_then_get_roundtrip(): + # Worker puts a file, then retrieves it using the returned key — must work + storage = make_storage(location="oasis") + + with tempfile.NamedTemporaryFile("w", suffix=".txt", delete=False) as src: + src.write("hello") + src.flush() + stored_path = storage.put(src.name, "roundtrip.txt") + + with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as dest: + dest_path = dest.name + + result = storage.get(stored_path, dest_path) + assert result == dest_path + with open(dest_path) as f: + assert f.read() == "hello" + + @pytest.mark.parametrize("acl", ["public-read-write", "public-read"]) def test_uploaded_file_has_the_correct_acl(acl): storage = make_storage(default_acl=acl)