Skip to content

Commit daed4a4

Browse files
authored
Merge pull request pik-piam#66 from irr-github/split_im
Flexibilise Node definitions
2 parents cea7401 + a6ce8cb commit daed4a4

File tree

3 files changed

+194
-8
lines changed

3 files changed

+194
-8
lines changed

config/default_config.yaml

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,5 +316,31 @@ operational_reserve:
316316
epsilon_vres: 0.02
317317
contingency: 300000 # MW
318318

319+
# TODO integrate in workflow
320+
nodes:
321+
split_provinces: False # split (some) provinces (admin l1) using admin l2 (counties/prefectures)
322+
# exclude currently only applies if split_provinces is True, will be fixed with integrartion to workflow
323+
exclude_provinces: ["Macau", "HongKong"] # list of provinces to exclude from splitting
324+
splits:
325+
# adminl2 groups per adminl1. {adminl1: {custom_name: [adminl2_names],..}}
326+
InnerMongolia:
327+
West:
328+
- "Hulunbuir"
329+
- "Xing'an"
330+
- "Tongliao"
331+
- "Chifeng"
332+
- "Xilin Gol"
333+
East:
334+
- "Alxa"
335+
- "Baotou"
336+
- "Baynnur"
337+
- "Hohhot"
338+
- "Ordos"
339+
- "Ulaan Chab"
340+
- "Wuhai"
341+
319342
fetch_regions:
320-
simplify_tol: 0.5
343+
simplify_tol:
344+
eez: 0.5
345+
land: 0.05
346+

workflow/rules/fetch_data.smk

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# snakemake rules for data fetch operations
22

3+
# TODO rework this, save shapes with all admin levels
4+
# build nodes with another script and save that to DERIVED_DATA
5+
# nodes could be read by snakefile and passed as a param to the relevant rules
36
rule fetch_region_shapes:
47
output:
58
country_shape=DERIVED_COMMON + "/regions/country.geojson",

workflow/scripts/fetch_shapes.py

Lines changed: 164 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
import numpy as np
1010
from os import PathLike
1111
from pandas import DataFrame
12+
import zipfile
13+
import io
1214

1315
from constants import (
1416
CRS,
@@ -21,6 +23,8 @@
2123
from _helpers import mock_snakemake, configure_logging
2224

2325
NATURAL_EARTH_RESOLUTION = "10m"
26+
GDAM_LV1 = "NAME_1"
27+
GDAM_LV2 = "NAME_2"
2428

2529
logger = logging.getLogger(__name__)
2630

@@ -37,7 +41,8 @@ def fetch_natural_earth_shape(
3741
3842
Example:
3943
china country: build_natural_earth_shape("admin_0_countries", "ADMIN", "China")
40-
china provinces: build_natural_earth_shape("admin_1_states_provinces", "iso_a2", "CN", region_key = "name_en")
44+
china provinces: build_natural_earth_shape("admin_1_states_provinces",
45+
"iso_a2", "CN", region_key="name_en")
4146
4247
Returns:
4348
gpd.GeoDataFrame: the filtered records
@@ -121,7 +126,7 @@ def fetch_maritime_eez(zone_name: str) -> gpd.GeoDataFrame:
121126

122127
def find_record_id(zone_name: str) -> int:
123128
# get Maritime Gazette record ID for the country
124-
# the eez ID is 70: see https://www.marineregions.org/gazetteer.php?p=webservices&type=rest#/
129+
# eez ID is 70: see https://www.marineregions.org/gazetteer.php?p=webservices&type=rest#/
125130
url = f"https://www.marineregions.org/rest/getGazetteerRecordsByName.json/{zone_name}/?like=true&fuzzy=false&typeID=70&offset=0&count=100"
126131
response = requests.get(url)
127132
if response.status_code != 200:
@@ -142,13 +147,15 @@ def find_record_id(zone_name: str) -> int:
142147
# URL of the WFS service
143148
url = "https://geo.vliz.be/geoserver/wfs"
144149
# WFS request parameters + record ID filter
150+
base_filter_ = "<Filter><PropertyIsEqualTo><PropertyName>mrgid_eez</PropertyName><Literal>"
151+
filter_ = base_filter_ + f"{mgrid}</Literal></PropertyIsEqualTo></Filter>"
145152
params = dict(
146153
service="WFS",
147154
version="1.1.0",
148155
request="GetFeature",
149156
typeName="MarineRegions:eez",
150157
outputFormat="json",
151-
filter=f"<Filter><PropertyIsEqualTo><PropertyName>mrgid_eez</PropertyName><Literal>{mgrid}</Literal></PropertyIsEqualTo></Filter>",
158+
filter=filter_,
152159
)
153160

154161
# Fetch data from WFS using requests
@@ -169,6 +176,143 @@ def find_record_id(zone_name: str) -> int:
169176
return eez.set_crs(epsg=crs)
170177

171178

179+
def fetch_gadm(country_code="CHN", level=2):
180+
"""
181+
fetch GADM shapefile for a given country and administrative level.
182+
https://gadm.org/download_country.html
183+
184+
Parameters:
185+
country_code (str): ISO3 country code (e.g., 'CHN', 'USA').
186+
level (int): Administrative level (0=country, 1=region, etc.).
187+
188+
Returns:
189+
geopandas.GeoDataFrame: Loaded shapefile as GeoDataFrame.
190+
"""
191+
# Construct the URL
192+
url = f"https://geodata.ucdavis.edu/gadm/gadm4.1/shp/gadm41_{country_code}_shp.zip"
193+
194+
# Download the zip file
195+
response = requests.get(url)
196+
if response.status_code != 200:
197+
raise ValueError(
198+
f"Failed to download data for {country_code} - Status code: {response.status_code}"
199+
)
200+
201+
# Extract the zip file in memory
202+
with zipfile.ZipFile(io.BytesIO(response.content)) as z:
203+
# Filter to the desired level shapefile
204+
level_filename = f"gadm41_{country_code}_{level}.shp"
205+
if level_filename not in z.namelist():
206+
raise ValueError(f"Level {level} shapefile not found for {country_code}.")
207+
208+
shp_dir = "resources/data/province_shapes"
209+
z.extractall(shp_dir)
210+
gdf = gpd.read_file(f"{shp_dir}/{level_filename}")
211+
return gdf
212+
213+
214+
def fetch_prefecture_shapes(
215+
fixes={
216+
GDAM_LV1: {
217+
"Nei Mongol": "InnerMongolia",
218+
"Xinjiang Uygur": "Xinjiang",
219+
"Hong Kong": "HongKong",
220+
"Ningxia Hui": "Ningxia",
221+
}
222+
}
223+
):
224+
"""
225+
Fetch county-level shapefiles for China.
226+
227+
Args:
228+
fixes (dict, Optional): Dictionary mapping old names to new names for specific columns.
229+
"""
230+
gdf = fetch_gadm(country_code="CHN", level=2)
231+
for col, fix_dict in fixes.items():
232+
for old_name, new_name in fix_dict.items():
233+
mask = gdf.query(f"{col} == '{old_name}'").index
234+
gdf.loc[mask, col] = new_name
235+
return gdf[["COUNTRY", "NAME_1", "NAME_2", "geometry"]]
236+
237+
238+
def build_nodes(
239+
prefectures: gpd.GeoDataFrame,
240+
nodes_cfg: dict,
241+
) -> gpd.GeoSeries:
242+
""" Build the nodes, either directly at provincial (admin1) level or from adminlvk2 subregions
243+
244+
Args:
245+
prefectures: """
246+
gdf = prefectures.copy()
247+
if nodes_cfg.get("split_provinces", False):
248+
validate_split_cfg(nodes_cfg["splits"], gdf)
249+
return split_provinces(gdf, nodes_cfg)
250+
else:
251+
provs = provs = gdf.dissolve(GDAM_LV1)
252+
provs = provs.drop([nodes_cfg["exclude_provinces"]])
253+
return provs.rename_axis("node")["geometry"]
254+
255+
256+
def validate_split_cfg(split_cfg: dict, gdf: gpd.GeoDataFrame):
257+
"""validate the province split configuration.
258+
The province (admin level 1) is split by admin level 2 {subregion: [prefecture names],..}.
259+
The prefecture names must be unique and cover all admin2 in the admin1 level.
260+
261+
Args:
262+
split_cfg (dict): the configuration for the prefecture split
263+
gdf (gpd.GeoDataFrame): the geodataframe with prefecture shapes
264+
Raises:
265+
ValueError: if the prefectures are not unique or do not cover all admin2 in the admin1 level
266+
"""
267+
# validate_settings
268+
for admin1 in split_cfg:
269+
if admin1 not in gdf[GDAM_LV1].unique():
270+
err_ = f"Invalid admin1 entry {admin1} not found in provinces {gdf[GDAM_LV1].unique()}"
271+
raise ValueError(err_)
272+
273+
# flatten values
274+
admin2 = []
275+
for names, v in split_cfg[admin1].items():
276+
admin2 += v
277+
278+
# check completeness
279+
all_admin2 = gdf.query(f'{GDAM_LV1} == "{admin1}"')[GDAM_LV2].unique().tolist()
280+
if not sorted(admin2) == sorted(all_admin2):
281+
raise ValueError(
282+
f"{admin1} prefectures do not match expected:\ngot {admin2}\nvs\n {all_admin2}"
283+
)
284+
285+
# check uniqueness (pop -> must be after completeness check)
286+
duplicated = any([admin2.pop() in admin2 for i in range(len(admin2))])
287+
if duplicated:
288+
raise ValueError(f"Duplicated prefecture names in {admin1}: {admin2}")
289+
290+
291+
# TODO consider returning country and province
292+
def split_provinces(
293+
prefectures: gpd.GeoDataFrame,
294+
node_config: dict
295+
) -> gpd.GeoSeries:
296+
"""
297+
Split Inner Mongolia into East and West regions based on prefectures.
298+
299+
Args:
300+
prefectures (gpd.GeoDataFrame): Gall chinese prefectures.
301+
node_config (dict): the configuration for node build
302+
Returns:
303+
gpd.GeoDataFrame: Updated GeoDataFrame with Inner Mongolia split EAST/WEST.
304+
"""
305+
gdf = prefectures.copy()
306+
for admin1, splits in node_config["splits"].items():
307+
mask = gdf.query(f"{GDAM_LV1} == '{admin1}'").index
308+
splits_inv = {vv: admin1 + "_" + k for k, v in splits.items() for vv in v}
309+
gdf.loc[mask, GDAM_LV1] = gdf.loc[mask, "NAME_2"].map(splits_inv)
310+
311+
# merge geometries by node
312+
gdf.rename(columns = {GDAM_LV1: "node"}, inplace=True)
313+
return gdf[["node", "geometry"]].dissolve(by="node", aggfunc="sum")
314+
315+
172316
def cut_smaller_from_larger(
173317
row: gpd.GeoSeries, gdf: gpd.GeoDataFrame, overlaps: DataFrame
174318
) -> gpd.GeoSeries:
@@ -192,7 +336,7 @@ def cut_smaller_from_larger(
192336
for idx in ovrlap_idx:
193337
geom = gdf.iloc[idx].geometry
194338
if row.geometry.area > geom.area:
195-
row.geometry = row.geometry.difference(geom)
339+
row["geometry"] = row["geometry"].difference(geom)
196340
elif row.geometry.area == geom.area:
197341
raise ValueError(f"Equal area overlap between {row.name} and {idx} - unhandled")
198342
return row
@@ -286,22 +430,35 @@ def eez_by_region(
286430
snakemake = mock_snakemake("fetch_region_shapes")
287431
configure_logging(snakemake, logger=logger)
288432

433+
nodes_config = snakemake.config.get("nodes", {"split_provinces": False})
434+
tol = snakemake.config["fetch_regions"]["simplify_tol"]
435+
289436
logger.info(f"Fetching country shape {COUNTRY_NAME} from cartopy")
290437
fetch_country_shape(snakemake.output.country_shape)
291438
logger.info(f"Country shape saved to {snakemake.output.country_shape}")
292439

293440
logger.info(f"Fetching province shapes for {COUNTRY_ISO} from cartopy")
294441
# TODO it would be better to filter by set regions after making the voronoi polygons
295-
regions = fetch_province_shapes()
442+
if not nodes_config.get("split_provinces", False):
443+
regions = fetch_province_shapes()
444+
else:
445+
logger.info("Splitting provinces into user defined nodes")
446+
prefectures = fetch_prefecture_shapes()
447+
nodes = build_nodes(prefectures, nodes_config)
448+
nodes.simplify(tol["land"]).to_file(snakemake.output.province_shapes.replace(".geojson", "_nodestest.geojson"), driver="GeoJSON")
449+
450+
raise NotImplementedError(
451+
"Province splitting is not implemented accross the whole workflow yet."
452+
)
453+
296454
regions.to_file(snakemake.output.province_shapes, driver="GeoJSON")
297455
regions.to_file(snakemake.output.prov_shpfile)
298456
logger.info(f"Province shapes saved to {snakemake.output.province_shapes}")
299457

300458
logger.info(f"Fetching maritime zones for EEZ prefix {EEZ_PREFIX}")
301459
eez_country = fetch_maritime_eez(EEZ_PREFIX)
302460
logger.info("Breaking by reion")
303-
tol = snakemake.config["fetch_regions"]["simplify_tol"]
304-
eez_by_region(eez_country, regions, prov_key="province", simplify_tol=tol).to_file(
461+
eez_by_region(eez_country, regions, prov_key="province", simplify_tol=tol["eez"]).to_file(
305462
snakemake.output.offshore_shapes, driver="GeoJSON"
306463
)
307464

0 commit comments

Comments
 (0)