Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
f8d9b28
switch from property to method
valeriupredoi Dec 16, 2025
59d54b3
start fixing tests
valeriupredoi Dec 16, 2025
554fe6f
more test fixing
valeriupredoi Dec 16, 2025
8c82b69
more test fixing
valeriupredoi Dec 16, 2025
934b013
final test fixing
valeriupredoi Dec 16, 2025
1f47bcb
real s3 axis test
valeriupredoi Dec 16, 2025
b88a552
turn on axis
valeriupredoi Dec 16, 2025
23acfde
add axis to mock expected
valeriupredoi Dec 16, 2025
29fcaee
fix bug
valeriupredoi Dec 16, 2025
7298051
turn on some screen printing
valeriupredoi Dec 16, 2025
5ef6146
ix test
valeriupredoi Dec 16, 2025
f7f6949
final version of not working Reductionist test
valeriupredoi Dec 16, 2025
72f12ac
too many blank lines
valeriupredoi Dec 16, 2025
622f492
Merge branch 'main' into axis_api
valeriupredoi Jan 6, 2026
be78e13
Merge branch 'main' into axis_api
valeriupredoi Jan 20, 2026
d87005a
Change the Reductionist API to return a JSON payload, ths removes all…
maxstack Jan 21, 2026
0a66655
Merge branch 'main' into axis_api
valeriupredoi Jan 21, 2026
b2e6e49
add json import
valeriupredoi Jan 21, 2026
6857d22
add some prints for response and sizeof
valeriupredoi Jan 21, 2026
432fc4f
Use response.json() as Reductionist should return application/json
maxstack Jan 21, 2026
e371cee
Fix Reductionist unit tests
maxstack Jan 22, 2026
88fa26f
add small file test
valeriupredoi Jan 23, 2026
4b4f3ad
toss a print statement
valeriupredoi Jan 23, 2026
dc41f7b
add validation vs numpy
valeriupredoi Jan 23, 2026
ae71547
betterify test
valeriupredoi Jan 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions activestorage/active.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ def __load_nc_file(self):
nc = load_from_https(self.uri)
self.filename = self.uri
self.ds = nc[ncvar]
print("Loaded dataset", self.ds)

def __get_missing_attributes(self):
if self.ds is None:
Expand Down Expand Up @@ -365,19 +366,22 @@ def method(self, value):

self._method = value

@property
def mean(self):
def mean(self, axis=None):
self._method = "mean"
if axis is not None:
self._axis = axis
return self

@property
def min(self):
def min(self, axis=None):
self._method = "min"
if axis is not None:
self._axis = axis
return self

@property
def max(self):
def max(self, axis=None):
self._method = "max"
if axis is not None:
self._axis = axis
return self

@property
Expand Down
14 changes: 9 additions & 5 deletions activestorage/reductionist.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import numpy as np
import requests

REDUCTIONIST_AXIS_READY = False
REDUCTIONIST_AXIS_READY = True

DEBUG = 0

Expand Down Expand Up @@ -88,6 +88,7 @@ def reduce_chunk(session,
chunk_selection,
axis,
storage_type=storage_type)
print(f"Reductionist request data dictionary: {request_data}")
if DEBUG:
print(f"Reductionist request data dictionary: {request_data}")
api_operation = "sum" if operation == "mean" else operation or "select"
Expand Down Expand Up @@ -234,15 +235,18 @@ def request(session: requests.Session, url: str, request_data: dict):

def decode_result(response):
"""Decode a successful response, return as a 2-tuple of (numpy array or scalar, count)."""
dtype = response.headers['x-activestorage-dtype']
shape = json.loads(response.headers['x-activestorage-shape'])
reduction_result = json.loads(response.content)
print("Reduction result: ", reduction_result)
print("Reduction result size: ", sys.getsizeof(reduction_result))
dtype = reduction_result['dtype']
shape = reduction_result['shape'] if "shape" in reduction_result else None

# Result
result = np.frombuffer(response.content, dtype=dtype)
result = np.frombuffer(bytes(reduction_result['bytes']), dtype=dtype)
result = result.reshape(shape)

# Counts
count = json.loads(response.headers['x-activestorage-count'])
count = reduction_result['count']
# TODO: When reductionist is ready, we need to fix 'count'

# Mask the result
Expand Down
12 changes: 6 additions & 6 deletions tests/test_bigger_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def test_cl_mean(tmp_path):
active = Active(ncfile, "cl", storage_type=utils.get_storage_type())
active._version = 2
active.components = True
result2 = active.mean[4:5, 1:2]
result2 = active.mean()[4:5, 1:2]
print(result2, ncfile)
# expect {'sum': array([[[[264.]]]], dtype=float32), 'n': array([[[[12]]]])}
# check for typing and structure
Expand All @@ -151,7 +151,7 @@ def test_cl_min(tmp_path):
ncfile = save_cl_file_with_a(tmp_path)
active = Active(ncfile, "cl", storage_type=utils.get_storage_type())
active._version = 2
result2 = active.min[4:5, 1:2]
result2 = active.min()[4:5, 1:2]
np.testing.assert_array_equal(result2,
np.array([[[[22.]]]], dtype="float32"))

Expand All @@ -160,7 +160,7 @@ def test_cl_max(tmp_path):
ncfile = save_cl_file_with_a(tmp_path)
active = Active(ncfile, "cl", storage_type=utils.get_storage_type())
active._version = 2
result2 = active.max[4:5, 1:2]
result2 = active.max()[4:5, 1:2]
np.testing.assert_array_equal(result2,
np.array([[[[22.]]]], dtype="float32"))

Expand All @@ -169,7 +169,7 @@ def test_cl_global_max(tmp_path):
ncfile = save_cl_file_with_a(tmp_path)
active = Active(ncfile, "cl", storage_type=utils.get_storage_type())
active._version = 2
result2 = active.max[:]
result2 = active.max()[:]
np.testing.assert_array_equal(result2,
np.array([[[[22.]]]], dtype="float32"))

Expand All @@ -192,7 +192,7 @@ def test_ps(tmp_path):
active = Active(ncfile, "ps", storage_type=utils.get_storage_type())
active._version = 2
active.components = True
result2 = active.mean[4:5, 1:2]
result2 = active.mean()[4:5, 1:2]
print(result2, ncfile)
# expect {'sum': array([[[22.]]]), 'n': array([[[4]]])}
# check for typing and structure
Expand Down Expand Up @@ -381,7 +381,7 @@ def test_daily_data_masked_two_stats(test_data_path):
# first a mean
active = Active(uri, "ta", storage_type=utils.get_storage_type())
active._version = 2
result2 = active.min[:]
result2 = active.min()[:]
assert result2 == 245.0020751953125

# then recycle Active object for something else
Expand Down
14 changes: 7 additions & 7 deletions tests/test_real_https.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_https():

active = Active(test_file_uri, "cl", storage_type="https")
active._version = 1
result = active.min[0:3, 4:6, 7:9]
result = active.min()[0:3, 4:6, 7:9]
print("Result is", result)
assert result == np.array([0.6909787], dtype="float32")

Expand All @@ -26,7 +26,7 @@ def test_https_100years():
test_file_uri = "https://esgf.ceda.ac.uk/thredds/fileServer/esg_cmip6/CMIP6/CMIP/MOHC/UKESM1-1-LL/historical/r1i1p1f2/Amon/pr/gn/latest/pr_Amon_UKESM1-1-LL_historical_r1i1p1f2_gn_195001-201412.nc"
active = Active(test_file_uri, "pr")
active._version = 1
result = active.min[0:3, 4:6, 7:9]
result = active.min()[0:3, 4:6, 7:9]
print("Result is", result)
assert result == np.array([5.4734613e-07], dtype="float32")

Expand All @@ -43,7 +43,7 @@ def test_https_reductionist():
with pytest.raises(activestorage.reductionist.ReductionistError):
active = Active(test_file_uri, "cl")
active._version = 2
result = active.min[0:3, 4:6, 7:9]
result = active.min()[0:3, 4:6, 7:9]
print("Result is", result)
assert result == np.array([0.6909787], dtype="float32")

Expand All @@ -57,7 +57,7 @@ def test_https_implicit_storage():

active = Active(test_file_uri, "cl")
active._version = 1
result = active.min[0:3, 4:6, 7:9]
result = active.min()[0:3, 4:6, 7:9]
print("Result is", result)
assert result == np.array([0.6909787], dtype="float32")

Expand All @@ -73,7 +73,7 @@ def test_https_implicit_storage_file_not_found():
with pytest.raises(FileNotFoundError):
active = Active(test_file_uri, "cl")
active._version = 1
result = active.min[0:3, 4:6, 7:9]
result = active.min()[0:3, 4:6, 7:9]


def test_https_implicit_storage_wrong_url():
Expand All @@ -98,7 +98,7 @@ def test_https_dataset():

active = Active(av, storage_type="https")
active._version = 1
result = active.min[0:3, 4:6, 7:9]
result = active.min()[0:3, 4:6, 7:9]
print("Result is", result)
assert result == np.array([0.6909787], dtype="float32")

Expand All @@ -114,6 +114,6 @@ def test_https_dataset_implicit_storage():

active = Active(av)
active._version = 1
result = active.min[0:3, 4:6, 7:9]
result = active.min()[0:3, 4:6, 7:9]
print("Result is", result)
assert result == np.array([0.6909787], dtype="float32")
8 changes: 4 additions & 4 deletions tests/test_real_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def test_s3_dataset():
storage_options=storage_options,
active_storage_url=active_storage_url)
active._version = 2
result = active.min[0:3, 4:6, 7:9] # standardized slice
result = active.min()[0:3, 4:6, 7:9] # standardized slice
print("Result is", result)
assert result == 5098.625

Expand All @@ -49,7 +49,7 @@ def test_s3_dataset():
storage_options=storage_options,
active_storage_url=active_storage_url)
active._version = 2
result = active.min[0:3, 4:6, 7:9] # standardized slice
result = active.min()[0:3, 4:6, 7:9] # standardized slice
print("Result is", result)
assert result == 5098.625

Expand All @@ -63,7 +63,7 @@ def test_s3_dataset():
storage_options=storage_options,
active_storage_url=active_storage_url)
active._version = 2
result = active.min[0:3, 4:6, 7:9] # standardized slice
result = active.min()[0:3, 4:6, 7:9] # standardized slice
print("Result is", result)
assert result == 5098.625

Expand All @@ -72,6 +72,6 @@ def test_s3_dataset():
storage_options=storage_options,
active_storage_url=active_storage_url)
active._version = 2
result = active.min[0:3, 4:6, 7:9] # standardized slice
result = active.min()[0:3, 4:6, 7:9] # standardized slice
print("Result is", result)
assert result == 5098.625
134 changes: 134 additions & 0 deletions tests/test_real_s3_with_axes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import os
import numpy as np
import pyfive

from activestorage.active import Active


S3_BUCKET = "bnl"

def build_active_small_file():
"""Run an integration test with real data off S3 but with a small file."""
storage_options = {
'key': "f2d55c6dcfc7618b2c34e00b58df3cef",
'secret': "$/'#M{0{/4rVhp%n^(XeX$q@y#&(NM3W1->~N.Q6VP.5[@bLpi='nt]AfH)>78pT",
'client_kwargs': {'endpoint_url': "https://uor-aces-o.s3-ext.jc.rl.ac.uk"}, # final proxy
}
active_storage_url = "https://reductionist.jasmin.ac.uk/" # Wacasoft new Reductionist
bigger_file = "CMIP6-test.nc" # tas; 15 (time) x 143 x 144

test_file_uri = os.path.join(
S3_BUCKET,
bigger_file
)
print("S3 Test file path:", test_file_uri)
active = Active(test_file_uri, 'tas', storage_type="s3",
storage_options=storage_options,
active_storage_url=active_storage_url)

active._version = 2

return active


def test_small_file_axis_0_1():
"""Fails: activestorage.reductionist.ReductionistError: Reductionist error: HTTP 502: -"""
active = build_active_small_file()
result = active.min(axis=(0, 1))[:]
print("Reductionist final result", result)
assert min(result[0][0]) == 197.69595


def test_small_file_axis_0_1_compare_with_numpy():
"""Fails: activestorage.reductionist.ReductionistError: Reductionist error: HTTP 502: -"""
active = build_active_small_file()
result = active.min(axis=(0, 1))[:]
print("Reductionist final result", result)

# use numpy and local test data
ds = pyfive.File("tests/test_data/CMIP6-test.nc")["tas"]
minarr= np.min(ds[:], axis=(0, 1), keepdims=True)
print(len(minarr)) # 144
print(min(minarr)) # 197.69595
assert np.min(result) == np.min(minarr)
np.testing.assert_array_equal(result, minarr)


def build_active():
"""Run an integration test with real data off S3."""
storage_options = {
'key': "f2d55c6dcfc7618b2c34e00b58df3cef",
'secret': "$/'#M{0{/4rVhp%n^(XeX$q@y#&(NM3W1->~N.Q6VP.5[@bLpi='nt]AfH)>78pT",
'client_kwargs': {'endpoint_url': "https://uor-aces-o.s3-ext.jc.rl.ac.uk"}, # final proxy
}
active_storage_url = "https://reductionist.jasmin.ac.uk/" # Wacasoft new Reductionist
bigger_file = "da193a_25_6hr_t_pt_cordex__198807-198807.nc" # m01s30i111 ## older 3GB 30 chunks

test_file_uri = os.path.join(
S3_BUCKET,
bigger_file
)
print("S3 Test file path:", test_file_uri)
active = Active(test_file_uri, 'm01s30i111', storage_type="s3", # 'm01s06i247_4', storage_type="s3",
storage_options=storage_options,
active_storage_url=active_storage_url)

active._version = 2

return active


## Active loads a 4dim dataset
## Loaded dataset <HDF5 dataset "m01s30i111": shape (120, 85, 324, 432), type "float32">
## default axis arg (when axis=None): 'axis': (0, 1, 2, 3)

def test_no_axis():
"""
Fails: it should pass: 'axis': (0, 1, 2, 3) default
are fine!

activestorage.reductionist.ReductionistError: Reductionist error: HTTP 400: {"error": {"message": "request data is not valid", "caused_by": ["__all__: Validation error: Number of reduction axes must be less than length of shape - to reduce over all axes omit the axis field completely [{}]"]}}
"""
active = build_active()
result = active.min()[:]
assert result == [[[[164.8125]]]]


def test_no_axis_2():
"""
Fails: it should pass: 'axis': (0, 1, 2, 3) default
are fine!

activestorage.reductionist.ReductionistError: Reductionist error: HTTP 400: {"error": {"message": "request data is not valid", "caused_by": ["__all__: Validation error: Number of reduction axes must be less than length of shape - to reduce over all axes omit the axis field completely [{}]"]}}
"""
active = build_active()
result = active.min(axis=())[:]
assert result == [[[[164.8125]]]]


def test_axis_0():
"""Fails: activestorage.reductionist.ReductionistError: Reductionist error: HTTP 502: -"""
active = build_active()
result = active.min(axis=(0, ))[:]
assert result == [[[[164.8125]]]]


def test_axis_0_1():
"""Fails: activestorage.reductionist.ReductionistError: Reductionist error: HTTP 502: -"""
active = build_active()
result = active.min(axis=(0, 1))[:]
assert result == [[[[164.8125]]]]


def test_axis_1():
"""Fails: activestorage.reductionist.ReductionistError: Reductionist error: HTTP 502: -"""
active = build_active()
result = active.min(axis=(1, ))[:]
assert result == [[[[164.8125]]]]


def test_axis_0_1_2():
"""Passes fine."""
active = build_active()
result = active.min(axis=(0, 1, 2))[:]
assert result[0][0][0][0] == 171.05126953125
4 changes: 2 additions & 2 deletions tests/unit/test_active.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def test_activevariable_pyfive_with_attributed_min():
ncvar = "TREFHT"
ds = pyfive.File(uri)[ncvar]
av = Active(ds)
av_slice_min = av.min[3:5]
av_slice_min = av.min()[3:5]
assert av_slice_min == np.array(258.62814, dtype="float32")
# test with Numpy
np_slice_min = np.min(ds[3:5])
Expand All @@ -132,7 +132,7 @@ def test_activevariable_pyfive_with_attributed_mean():
ds = pyfive.File(uri)[ncvar]
av = Active(ds)
av.components = True
av_slice_min = av.mean[3:5]
av_slice_min = av.mean()[3:5]
actual_mean = av_slice_min["sum"] / av_slice_min["n"]
assert actual_mean == np.array(283.39508056640625, dtype="float32")
# test with Numpy
Expand Down
Loading
Loading