Skip to content

Commit d0b06f7

Browse files
committed
improve handling new ERA5 format and review default surface variable
1 parent 89d0327 commit d0b06f7

File tree

3 files changed

+61
-46
lines changed

3 files changed

+61
-46
lines changed

TopoPyScale/fetch_era5.py

Lines changed: 39 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,7 @@
4141
'surface_solar_radiation_downwards':'ssrd',
4242
'surface_pressure':'sp',
4343
'total_precipitation':'tp',
44-
'2m_temperature':'t2m',
45-
'toa_incident_solar_radiation':'tisr',
46-
'friction_velocity':'zust',
47-
'instantaneous_moisture_flux':'ie',
48-
'instantaneous_surface_sensible_heat_flux':'ishf'}
44+
'2m_temperature':'t2m'}
4945

5046
var_plev_name_google = {'geopotential':'z',
5147
'temperature':'t',
@@ -282,7 +278,7 @@ def fetch_era5_google_from_zarr(eraDir, startDate, endDate, lonW, latS, lonE, la
282278

283279
def retrieve_era5(product, startDate, endDate, eraDir, latN, latS, lonE, lonW, step,
284280
num_threads=10, surf_plev='surf', plevels=None, realtime=False,
285-
output_format='netcdf', download_format="unarchived", new_CDS_API=True, rm_daily=False, store_as_zarr='ERA5.zarr'):
281+
output_format='netcdf', download_format="unarchived", new_CDS_API=True, rm_daily=False):
286282
""" Sets up era5 surface retrieval.
287283
* Creates list of year/month pairs to iterate through.
288284
* MARS retrievals are most efficient when subset by time.
@@ -305,7 +301,6 @@ def retrieve_era5(product, startDate, endDate, eraDir, latN, latS, lonE, lonW, s
305301
download_format (str): default "unarchived". Can be "zip"
306302
new_CDS_API: flag to handle new formating of SURF files with the new CDS API (2024).
307303
rm_daily: remove folder containing all daily ERA5 file. Option to clear space of data converted to yearly files.
308-
store_as_zarr (str): name of the Zarr store. None if you want to use the old system with Netcdf
309304
310305
Returns:
311306
Monthly era surface files stored in disk.
@@ -456,10 +451,10 @@ def retrieve_era5(product, startDate, endDate, eraDir, latN, latS, lonE, lonW, s
456451
except:
457452
raise IOError('deletion of daily files issue')
458453

459-
print("===> ERA5 files ready")
454+
print("===> ERA5 netcdf files ready")
460455

461456

462-
def era5_request_surf(dataset, year, month, day, bbox, target, product, time, output_format= "netcdf", download_format="unarchived"):
457+
def era5_request_surf(dataset, year, month, day, bbox, target, product, time, varoi=None, output_format= "netcdf", download_format="unarchived"):
463458
"""CDS surface api call
464459
465460
Args:
@@ -470,23 +465,26 @@ def era5_request_surf(dataset, year, month, day, bbox, target, product, time, ou
470465
target (str): filename
471466
product (str): type of model run. defaul: reanalysis
472467
time (str or list): hours for which to download data
468+
varoi (list): list of variable of interest to download. Default to None which fallsback to minimum needed for TPS
473469
output_format (str): default is "netcdf", can be "grib".
474470
download_format (str): default "unarchived". Can be "zip"
475471
476472
Returns:
477473
Store to disk dataset as indicated
478474
479475
"""
480-
481-
varnames = ['geopotential', '2m_dewpoint_temperature', 'surface_thermal_radiation_downwards',
482-
'surface_solar_radiation_downwards','surface_pressure',
483-
'total_precipitation', '2m_temperature', 'toa_incident_solar_radiation',
484-
'friction_velocity', 'instantaneous_moisture_flux', 'instantaneous_surface_sensible_heat_flux']
485-
476+
if varoi is None:
477+
varoi = ['geopotential',
478+
'2m_dewpoint_temperature',
479+
'surface_thermal_radiation_downwards',
480+
'surface_solar_radiation_downwards','surface_pressure',
481+
'total_precipitation',
482+
'2m_temperature']
483+
486484
c = cdsapi.Client()
487485
c.retrieve(
488486
dataset,
489-
{'variable': varnames,
487+
{'variable': varoi,
490488
'product_type': [product],
491489
"area": bbox,
492490
'year': year,
@@ -502,7 +500,7 @@ def era5_request_surf(dataset, year, month, day, bbox, target, product, time, ou
502500
unzip_file(str(target))
503501

504502

505-
def era5_request_plev(dataset, year, month, day, bbox, target, product, time, plevels, output_format= "netcdf", download_format="unarchived"):
503+
def era5_request_plev(dataset, year, month, day, bbox, target, product, time, plevels, varoi=None, output_format= "netcdf", download_format="unarchived"):
506504
"""CDS plevel api call
507505
508506
Args:
@@ -514,23 +512,30 @@ def era5_request_plev(dataset, year, month, day, bbox, target, product, time, pl
514512
product (str): type of model run. defaul: reanalysis
515513
time (str or list): hours to query
516514
plevels (str or list): pressure levels to query
515+
varoi (list): list of variable of interest to download. Default to None which fallsback to minimum needed for TPS
517516
output_format (str): default is "netcdf", can be "grib".
518517
download_format (str): default "unarchived". Can be "zip"
519518
520519
Returns:
521520
Store to disk dataset as indicated
522521
523522
"""
523+
if varoi is None:
524+
varoi = ['geopotential',
525+
'temperature',
526+
'u_component_of_wind',
527+
'v_component_of_wind',
528+
'specific_humidity',
529+
'relative_humidity']
530+
531+
524532
c = cdsapi.Client()
525533
c.retrieve(
526534
dataset,
527535
{
528536
'product_type': [product],
529537
"area": bbox,
530-
'variable': [
531-
'geopotential', 'temperature', 'u_component_of_wind',
532-
'v_component_of_wind', 'specific_humidity', 'relative_humidity'
533-
],
538+
'variable': varoi,
534539
'pressure_level': plevels,
535540
'year': year,
536541
'month': '%02d'%(month),
@@ -684,7 +689,7 @@ def unzip_file(file_path):
684689
merged_file_path = os.path.join(os.path.dirname(zip_file_path), os.path.basename(zip_file_path).replace('.zip', '.nc'))
685690
try:
686691
# Combine all `.nc` files
687-
datasets = [xr.open_dataset(nc_file) for nc_file in nc_files]
692+
datasets = [xr.open_dataset(nc_file, engine='netcdf4') for nc_file in nc_files]
688693
merged_ds = xr.merge(datasets) # Adjust dimension as needed
689694
merged_ds.to_netcdf(merged_file_path)
690695
print(f"Merged .nc files into {merged_file_path}.")
@@ -721,12 +726,12 @@ def remap_CDSbeta(file_pattern, file_type='SURF'):
721726
try:
722727
ds = ds.drop_vars('number')
723728
except:
724-
print("variables not found")
729+
print("Coordinate 'number' not found")
725730

726731
try:
727732
ds = ds.drop_vars('expver')
728733
except:
729-
print("variables not found")
734+
print("Coordinate 'expver' not found")
730735

731736
ds.to_netcdf(nc_file+ "_remap", mode='w')
732737
# move remap back to orig name
@@ -740,25 +745,24 @@ def remap_CDSbeta(file_pattern, file_type='SURF'):
740745
ds = xr.open_dataset(nc_file)
741746
ds = ds.rename({ 'valid_time' : 'time'})
742747

743-
try:
744-
#cdo delname,number,ishf,ie,zust,tisr SURF_20240925.nc SURF_clean.nc
745-
#ds2 = ds.swap_dims({'valid_time': 'time'})
746-
ds = ds.drop_vars('ishf')
747-
ds = ds.drop_vars('ie')
748-
ds = ds.drop_vars('zust')
749-
ds = ds.drop_vars('tisr')
750-
except:
751-
print("variables not found")
748+
#try:
749+
# #cdo delname,number,ishf,ie,zust,tisr SURF_20240925.nc SURF_clean.nc
750+
# ds = ds.drop_vars('ishf')
751+
# ds = ds.drop_vars('ie')
752+
# ds = ds.drop_vars('zust')
753+
# ds = ds.drop_vars('tisr')
754+
#except:
755+
# print("variables not found")
752756

753757
try:
754758
ds = ds.drop_vars('number')
755759
except:
756-
print("variables not found")
760+
print("Coordinate 'number' not found")
757761

758762
try:
759763
ds = ds.drop_vars('expver')
760764
except:
761-
print("variables not found")
765+
print("Coordinate 'expver' not found")
762766

763767
ds.to_netcdf(nc_file + "_remap", mode='w')
764768
# move remap back to orig name

TopoPyScale/topoclass.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -315,8 +315,13 @@ def extract_topo_cluster_param(self):
315315
else:
316316
if not os.path.isabs(mask_file):
317317
mask_file = Path(self.config.project.directory, mask_file)
318+
if not mask_file.exists():
319+
raise ValueError(f'File {mask_file} does not exist.')
320+
318321
# read mask TIFF
319-
ds_mask = rio.open_rasterio(mask_file).to_dataset('band').rename({1: 'mask'})
322+
ds_mask = xr.open_dataset(mask_file, engine='rasterio').band_data.isel(band=0).drop_vars('band').to_dataset()
323+
ds_mask = ds_mask.rename({'band_data': 'mask'})
324+
self.toposub.ds_param['mask'] = ds_mask.mask.drop_vars('spatial_ref')
320325

321326
# check if bounds and resolution match
322327
if not ds_mask.rio.bounds() == self.toposub.ds_param.rio.bounds() or not ds_mask.rio.resolution() == self.toposub.ds_param.rio.resolution():
@@ -340,9 +345,12 @@ def extract_topo_cluster_param(self):
340345
split_clustering = True
341346
if not os.path.isabs(groups_file):
342347
groups_file = Path(self.config.project.directory, groups_file)
348+
if not groups_file.exists():
349+
raise ValueError(f'File {groups_file} does not exist.')
343350

344351
# read cluster TIFF
345-
ds_group = rio.open_rasterio(groups_file).to_dataset('band').rename({1: 'group'})
352+
ds_group = xr.open_dataset(groups_file, engine='rasterio').band_data.isel(band=0).drop_vars('band').to_dataset()
353+
ds_group = ds_group.rename({'band_data': 'group'})
346354

347355
# check if bounds and resolution match
348356
if not ds_group.rio.bounds() == self.toposub.ds_param.rio.bounds() or not ds_group.rio.resolution() == self.toposub.ds_param.rio.resolution():
@@ -784,8 +792,7 @@ def get_era5(self):
784792
output_format=output_format,
785793
download_format=download_format,
786794
new_CDS_API=True,
787-
rm_daily=self.config.climate[self.config.project.climate].rm_daily,
788-
store_as_zarr=self.config.climate.era5.zarr_store
795+
rm_daily=self.config.climate[self.config.project.climate].rm_daily
789796
)
790797
# retrieve era5 plevels
791798
fe.retrieve_era5(
@@ -802,8 +809,7 @@ def get_era5(self):
802809
output_format=output_format,
803810
download_format=download_format,
804811
new_CDS_API=True,
805-
rm_daily=self.config.climate[self.config.project.climate].rm_daily,
806-
store_as_zarr=self.config.climate.era5.zarr_store
812+
rm_daily=self.config.climate[self.config.project.climate].rm_daily
807813
)
808814

809815
elif data_repository == 'google_cloud_storage':

doc/docs/03_configurationFile.md

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,15 @@ my_project/
77
├── inputs/
88
├── dem/
99
├── my_dem.tif
10-
└── pts_list.csv (OPTIONAL: to downscale to specific points)
10+
├── my_dem_mask.tif (OPTIONAL: to mask part of the DEM)
11+
├── my_dem_groups.tif (OPTIONAL: to delineate groups of clusters)
12+
└── pts_list.csv (OPTIONAL: to downscale to specific points)
1113
└── climate/
12-
├── PLEV*.nc
13-
└── SURF*.nc
14+
├── daily/
15+
├── ERA5.zarr (OPTIONAL: to downscale with zarr optimization)
16+
└── yearly/
17+
├── PLEV*.nc
18+
└── SURF*.nc
1419
├── outputs/
1520
├── tmp/
1621
└── downscaled/
@@ -116,8 +121,8 @@ sampling:
116121
n_clusters: 50 # number of cluster to segment the DEM
117122
random_seed: 2 # random seed for the K-mean clustering
118123
clustering_features: { 'x': 1, 'y': 1, 'elevation': 1, 'slope': 1, 'aspect_cos': 1, 'aspect_sin': 1, 'svf': 1 } # dictionnary of the features of choice to use in clustering with their relative importance. Relative importance is a multiplier after scaling
119-
clustering_mask: clustering/catchment_mask.tif # optional path to tif containing a mask (0/1)
120-
clustering_groups: clustering/groups.tif # optional path to a tif containing cluster groups (int values), e.g. land cover
124+
clustering_mask: inputs/dem/catchment_mask.tif # optional path to tif containing a mask (0/1)
125+
clustering_groups: inputs/dem/groups.tif # optional path to a tif containing cluster groups (int values), e.g. land cover
121126

122127
#.....................................................................................................
123128
toposcale:

0 commit comments

Comments
 (0)