Skip to content

Commit ad4fdf8

Browse files
authored
Merge pull request #18 from quantifyearth/mwd-add-occurrence-validation
Add occurrence validation to run script
2 parents afdba4d + 8049518 commit ad4fdf8

File tree

9 files changed

+146
-51
lines changed

9 files changed

+146
-51
lines changed

prepare_layers/convert_crosswalk.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ def convert_crosswalk(
5050
for code in codes:
5151
res.append([hab, code])
5252

53+
# This addition is for the islands correction layer, which will
54+
# be added as an effective lcc_0.tif
55+
res.append(["islands", "0"])
56+
5357
df = pd.DataFrame(res, columns=["code", "value"])
5458
df.to_csv(output_path, index=False)
5559

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import argparse
2+
import os
3+
from pathlib import Path
4+
5+
import yirgacheffe as yg
6+
7+
def remove_nans_from_mask(
8+
input_path: Path,
9+
output_path: Path,
10+
) -> None:
11+
os.makedirs(output_path.parent, exist_ok=True)
12+
with yg.read_raster(input_path) as layer:
13+
converted = layer.nan_to_num()
14+
converted.to_geotiff(output_path)
15+
16+
def main() -> None:
17+
parser = argparse.ArgumentParser(description="Convert NaNs to zeros in mask layers")
18+
parser.add_argument(
19+
'--original',
20+
type=Path,
21+
help="Original raster",
22+
required=True,
23+
dest="original_path",
24+
)
25+
parser.add_argument(
26+
'--output',
27+
type=Path,
28+
help='Destination raster',
29+
required=True,
30+
dest='output_path',
31+
)
32+
args = parser.parse_args()
33+
34+
remove_nans_from_mask(
35+
args.original_path,
36+
args.output_path
37+
)
38+
39+
if __name__ == "__main__":
40+
main()

prepare_species/common.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@
2828
"threats",
2929
"category",
3030
"category_weight",
31-
"geometry"
31+
"geometry",
32+
"in_star",
3233
]
3334

3435
# From Muir et al: For each species, a global STAR threat abatement (START) score
@@ -84,6 +85,8 @@ class SpeciesReport:
8485
"keeps_habitats",
8586
"has_geometries",
8687
"keeps_geometries",
88+
"has_category",
89+
"in_star",
8790
"filename",
8891
]
8992

@@ -224,6 +227,16 @@ def tidy_reproject_save(
224227
os.makedirs(output_directory_path, exist_ok=True)
225228
output_path = output_directory_path / f"{grow.id_no}.geojson"
226229
res = gpd.GeoDataFrame(grow.to_frame().transpose(), crs=src_crs, geometry="geometry")
230+
231+
# Ensure proper dtypes for JSON serialization
232+
int_columns = ['id_no', 'assessment_id', 'assessment_year', 'elevation_lower', 'elevation_upper', 'category_weight']
233+
for col in int_columns:
234+
if col in res.columns:
235+
res[col] = res[col].astype('Int64') # Nullable integer type
236+
237+
if 'in_star' in res.columns:
238+
res['in_star'] = res['in_star'].astype(bool)
239+
227240
res_projected = res.to_crs(target_crs)
228241
res_projected.to_file(output_path, driver="GeoJSON")
229242
report.filename = output_path

prepare_species/extract_species_data_psql.py

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@
3030
logging.basicConfig()
3131
logger.setLevel(logging.DEBUG)
3232

33+
# Note that this query returns more species that would be accepted into STAR based on
34+
# threat category, but for model checking we want all AOHs for a given taxa, so the
35+
# pipeline must process them all, and then just include the correct set when processing
36+
# STAR
3337
MAIN_STATEMENT = """
3438
SELECT
3539
assessments.sis_taxon_id as id_no,
@@ -54,7 +58,7 @@
5458
AND taxons.class_name = %s
5559
AND taxons.infra_type is NULL -- no subspecies
5660
AND taxons.metadata->>'taxon_level' = 'Species'
57-
AND red_list_category_lookup.code IN ('NT', 'VU', 'EN', 'CR')
61+
AND red_list_category_lookup.code IN ('DD', 'LC', 'NT', 'VU', 'EN', 'CR')
5862
"""
5963

6064
SYSTEMS_STATEMENT = """
@@ -138,20 +142,10 @@ def process_row(
138142
presence += (4,)
139143
report.possibly_extinct = True # pylint: disable=W0201
140144

141-
cursor.execute(SYSTEMS_STATEMENT, (assessment_id,))
142-
systems_data = cursor.fetchall()
143-
try:
144-
systems = process_systems(systems_data, report)
145-
except ValueError as exc:
146-
logger.debug("Dropping %s: %s", id_no, str(exc))
147-
return report
148-
149-
cursor.execute(THREATS_STATEMENT, (assessment_id,))
150-
raw_threats = cursor.fetchall()
151-
threats = process_threats(raw_threats, report)
152-
if len(threats) == 0:
153-
return report
145+
include_in_star = category in ('NT', 'VU', 'EN', 'CR')
146+
report.has_category = include_in_star
154147

148+
# First checks are the ones that rule out being able to make an AOH at all
155149
cursor.execute(HABITATS_STATEMENT, (assessment_id,))
156150
raw_habitats = cursor.fetchall()
157151
try:
@@ -170,6 +164,37 @@ def process_row(
170164
except ValueError as _exc:
171165
return report
172166

167+
# Second checks are whether it is good for STAR, so from hereon we should
168+
# output a GeoJSON regardless as we should make an AOH for validation with this
169+
cursor.execute(SYSTEMS_STATEMENT, (assessment_id,))
170+
systems_data = cursor.fetchall()
171+
try:
172+
systems = process_systems(systems_data, report)
173+
except ValueError as exc:
174+
logger.debug("Dropping %s: %s", id_no, str(exc))
175+
include_in_star = False
176+
try:
177+
systems = systems_data[0][0]
178+
except IndexError:
179+
systems = []
180+
181+
cursor.execute(THREATS_STATEMENT, (assessment_id,))
182+
raw_threats = cursor.fetchall()
183+
threats = process_threats(raw_threats, report)
184+
if len(threats) == 0:
185+
include_in_star = False
186+
187+
report.in_star = include_in_star
188+
189+
try:
190+
category_weight = CATEGORY_WEIGHTS[category]
191+
except KeyError:
192+
assert include_in_star is False
193+
category_weight = 0
194+
195+
# This is a fix as per the method to include the missing islands layer:
196+
habitats_list = list(habitats) + ["islands"]
197+
173198
gdf = gpd.GeoDataFrame(
174199
[[
175200
id_no,
@@ -179,14 +204,15 @@ def process_row(
179204
systems,
180205
int(elevation_lower) if elevation_lower is not None else None,
181206
int(elevation_upper) if elevation_upper is not None else None,
182-
'|'.join(list(habitats)),
207+
'|'.join(habitats_list),
183208
scientific_name,
184209
family_name,
185210
class_name,
186211
json.dumps(threats),
187212
category,
188-
CATEGORY_WEIGHTS[category],
189-
geometry
213+
category_weight,
214+
geometry,
215+
include_in_star,
190216
]],
191217
columns=COLUMNS,
192218
crs=target_projection or 'epsg:4326'

prepare_species/extract_species_data_redlist.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,8 @@ def process_species(
243243
logger.debug("Dropping %s: %s", id_no, exc)
244244
return report
245245

246+
habitats_list = list(habitats) + ["islands"]
247+
246248
# Create GeoDataFrame with all data
247249
gdf = gpd.GeoDataFrame(
248250
[[
@@ -253,7 +255,7 @@ def process_species(
253255
systems,
254256
elevation_lower,
255257
elevation_upper,
256-
'|'.join(list(habitats)),
258+
'|'.join(habitats_list),
257259
scientific_name,
258260
family_name,
259261
class_name,

requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ pymer4
77
pyproj
88
scikit-image
99
redlistapi
10-
yirgacheffe>=1.9
11-
aoh[validation]>=1.0
10+
yirgacheffe>=1.12,<2.0
11+
aoh[validation]>=2.0.1,<3.0
1212

1313
# GDAL should be installed manually to match the version of the library installed on your machine
1414
gdal[numpy]

scripts/run.sh

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -63,32 +63,20 @@ if [ ! -d "${DATADIR}"/habitat_layers ]; then
6363

6464
echo "Processing habitat map..."
6565
aoh-habitat-process --habitat "${DATADIR}"/habitat/raw.tif \
66-
--scale 1000.0 \
66+
--scale 992.292720200000133 \
6767
--projection "ESRI:54009" \
6868
--output "${DATADIR}"/tmp_habitat_layers/current
6969
mv "${DATADIR}"/tmp_habitat_layers "${DATADIR}"/habitat_layers
7070
fi
7171

72-
if [ ! -d "${DATADIR}"/masks ]; then
73-
echo "Processing masks..."
74-
python3 ./prepare_layers/make_masks.py --habitat_layers "${DATADIR}"/habitat_layers/current \
75-
--output_directory "${DATADIR}"/masks
72+
if [ ! -f "${DATADIR}"/habitat_layers/current/lcc_0.tif ]; then
73+
cp "${DATADIR}/Zenodo/MissingLandcover_1km_cover.tif" "${DATADIR}"/habitat_layers/current/lcc_0.tif
7674
fi
7775

78-
# Fetch and prepare the elevation layers
79-
if [[ ! -f "${DATADIR}"/elevation/elevation-max-1k.tif || ! -f "${DATADIR}"/elevation/elevation-min-1k.tif ]]; then
80-
if [ ! -f "${DATADIR}"/elevation/elevation.tif ]; then
81-
echo "Fetching elevation map..."
82-
reclaimer zenodo --zenodo_id 5719984 --filename dem-100m-esri54017.tif --output "${DATADIR}"/elevation/elevation.tif
83-
fi
84-
if [ ! -f "${DATADIR}"/elevation/elevation-max-1k.tif ]; then
85-
echo "Generating elevation max layer..."
86-
gdalwarp -t_srs ESRI:54009 -tr 1000 -1000 -r max -co COMPRESS=LZW -wo NUM_THREADS=40 "${DATADIR}"/elevation/elevation.tif "${DATADIR}"/elevation/elevation-max-1k.tif
87-
fi
88-
if [ ! -f "${DATADIR}"/elevation/elevation-min-1k.tif ]; then
89-
echo "Generating elevation min layer..."
90-
gdalwarp -t_srs ESRI:54009 -tr 1000 -1000 -r min -co COMPRESS=LZW -wo NUM_THREADS=40 "${DATADIR}"/elevation/elevation.tif "${DATADIR}"/elevation/elevation-min-1k.tif
91-
fi
76+
if [ ! -f "${DATADIR}"/masks/CGLS100Inland_withGADMIslands.tif ]; then
77+
echo "Processing masks..."
78+
python3 ./prepare_layers/remove_nans_from_mask.py --original "${DATADIR}"/Zenodo/CGLS100Inland_withGADMIslands.tif \
79+
--output "${DATADIR}"/masks/CGLS100Inland_withGADMIslands.tif
9280
fi
9381

9482
# Generate the crosswalk table
@@ -102,7 +90,10 @@ for TAXA in "${TAXALIST[@]}"
10290
do
10391
if [ ! -d "${DATADIR}"/species-info/"${TAXA}"/ ]; then
10492
echo "Extracting species data for ${TAXA}..."
105-
python3 ./prepare_species/extract_species_data_psql.py --class "${TAXA}" --output "${DATADIR}"/species-info/"${TAXA}"/ --projection "ESRI:54009" --excludes "${DATADIR}"/SpeciesList_generalisedRangePolygons.csv
93+
python3 ./prepare_species/extract_species_data_psql.py --class "${TAXA}" \
94+
--output "${DATADIR}"/species-info/"${TAXA}"/ \
95+
--projection "ESRI:54009" \
96+
--excludes "${DATADIR}"/SpeciesList_generalisedRangePolygons.csv
10697
fi
10798
done
10899

@@ -133,6 +124,18 @@ aoh-collate-data --aoh_results "${DATADIR}"/aohs/current/ \
133124
echo "Calculating model validation..."
134125
aoh-validate-prevalence --collated_aoh_data "${DATADIR}"/validation/aohs.csv \
135126
--output "${DATADIR}"/validation/model_validation.csv
127+
for TAXA in "${TAXALIST[@]}"
128+
do
129+
echo "Fetching GBIF data for ${TAXA}..."
130+
aoh-fetch-gbif-data --collated_aoh_data "${DATADIR}"/validation/aohs.csv \
131+
--taxa "${TAXA}" \
132+
--output_dir "${DATADIR}"/validation/occurrences/
133+
echo "Validating occurrences for ${TAXA}..."
134+
aoh-validate-occurrences --gbif_data_path "${DATADIR}"/validation/occurrences/"${TAXA}" \
135+
--species_data "${DATADIR}"/species-info/"${TAXA}"/current/ \
136+
--aoh_results "${DATADIR}"/aohs/current/"${TAXA}"/ \
137+
--output "${DATADIR}"/validation/occurrences/"${TAXA}".csv
138+
done
136139

137140
# Threats
138141
echo "Generating threat task list..."

utils/aoh_generator.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,21 +22,21 @@ def aoh_generator(
2222
for species_path in species_paths:
2323
res.append([
2424
data_dir / "habitat_layers" / scenario,
25-
data_dir / "elevation" / "elevation-max-1k.tif",
26-
data_dir / "elevation" / "elevation-min-1k.tif",
25+
data_dir / "Zenodo" / "FABDEM_1km_max_patched.tif",
26+
data_dir / "Zenodo" / "FABDEM_1km_min_patched.tif",
2727
data_dir / "crosswalk.csv",
2828
species_path,
29-
data_dir / "masks" / "terrestrial_mask.tif",
29+
data_dir / "masks" / "CGLS100Inland_withGADMIslands.tif",
3030
data_dir / "aohs" / scenario / taxa_dir_path.name,
3131
])
3232

3333
df = pd.DataFrame(res, columns=[
34-
'--habitats',
34+
'--fractional_habitats',
3535
'--elevation-max',
3636
'--elevation-min',
3737
'--crosswalk',
3838
'--speciesdata',
39-
'--area',
39+
'--weights',
4040
'--output'
4141
])
4242
output_dir, _ = os.path.split(output_csv_path)

utils/threats_generator.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#!/usr/bin/env python3
22

33
import argparse
4+
import json
45
import os
56
from pathlib import Path
67

@@ -23,11 +24,17 @@ def threats_generator(
2324
taxon_id = species_path.stem
2425
aoh_path = data_dir / "aohs" / source / taxa_dir_path.name / f"{taxon_id}_all.tif"
2526
if aoh_path.exists():
26-
res.append([
27-
species_path,
28-
aoh_path,
29-
data_dir / "threat_rasters" / taxa_dir_path.name,
30-
])
27+
# Check if species should be included in STAR. We don't use geopandas here
28+
# because it will parse the geometry, which can be quite slow, and we just
29+
# want a single bool field
30+
with open(species_path, 'r', encoding="UTF-8") as f:
31+
data = json.load(f)
32+
if data['features'][0]['properties']['in_star']:
33+
res.append([
34+
species_path,
35+
aoh_path,
36+
data_dir / "threat_rasters" / taxa_dir_path.name,
37+
])
3138

3239
df = pd.DataFrame(res, columns=[
3340
'--speciesdata',

0 commit comments

Comments
 (0)