Skip to content

Commit 1cf3b65

Browse files
authored
Merge pull request #41 from quantifyearth/mwd-food-map-random-seed
Add seed to random number generator used for hybrid Jung + GEAZ/HYDE habitat map.
2 parents 4ba1b25 + bf9f36d commit 1cf3b65

File tree

9 files changed

+178
-48
lines changed

9 files changed

+178
-48
lines changed

.mypy.ini

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
[mypy]
2-
ignore_missing_imports = True
2+
ignore_missing_imports = True
3+
no_namespace_packages = True

.pylintrc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
[MAIN]
2+
init-hook='import sys; from pathlib import Path; script_dir = Path(".").resolve(); sys.path.extend([str(script_dir / "prepare_layers"), str(script_dir / "prepare_species")])'
3+
14
[FORMAT]
25
max-line-length=120
36

prepare_layers/make_food_current_map.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from pathlib import Path
88
from multiprocessing import Manager, Process, cpu_count
99
from queue import Queue
10-
from typing import List, NamedTuple, Optional, Tuple
10+
from typing import NamedTuple
1111

1212
import numpy as np
1313
import yirgacheffe as yg
@@ -32,8 +32,11 @@ def process_tile(
3232
current: yg.layers.RasterLayer,
3333
pnv: yg.layers.RasterLayer,
3434
tile: TileInfo,
35+
random_seed: int,
3536
) -> np.ndarray:
3637

38+
rng = np.random.default_rng(random_seed)
39+
3740
data = current.read_array(tile.x_position, tile.y_position, tile.width, tile.height)
3841

3942
diffs = [
@@ -61,7 +64,7 @@ def process_tile(
6164
continue
6265
required_points = min(required_points, possible_points)
6366

64-
selected_locations = np.random.choice(
67+
selected_locations = rng.choice(
6568
len(valid_locations[0]),
6669
size=required_points,
6770
replace=False
@@ -90,13 +93,14 @@ def process_tile_concurrently(
9093
with yg.read_raster(current_lvl1_path) as current:
9194
with yg.read_raster(pnv_path) as pnv:
9295
while True:
93-
tile: Optional[TileInfo] = input_queue.get()
94-
if tile is None:
96+
job : tuple[TileInfo, int] | None = input_queue.get()
97+
if job is None:
9598
break
99+
tile, seed = job
96100
if np.isnan(tile.crop_diff) and np.isnan(tile.pasture_diff):
97101
result_queue.put((tile, None))
98102
else:
99-
data = process_tile(current, pnv, tile)
103+
data = process_tile(current, pnv, tile, seed)
100104
result_queue.put((tile, data.tobytes()))
101105

102106
result_queue.put(None)
@@ -105,7 +109,7 @@ def build_tile_list(
105109
current_lvl1_path: Path,
106110
crop_adjustment_path: Path,
107111
pasture_adjustment_path: Path,
108-
) -> List[TileInfo]:
112+
) -> list[TileInfo]:
109113
tiles = []
110114
with yg.read_raster(current_lvl1_path) as current:
111115
current_dimensions = current.window.xsize, current.window.ysize
@@ -151,7 +155,7 @@ def assemble_map(
151155
band = output._dataset.GetRasterBand(1) # pylint: disable=W0212
152156

153157
while True:
154-
result : Optional[Tuple[TileInfo,Optional[bytearray]]] = result_queue.get()
158+
result : tuple[TileInfo,bytearray | None] | None = result_queue.get()
155159
if result is None:
156160
sentinal_count -= 1
157161
if sentinal_count == 0:
@@ -174,14 +178,18 @@ def pipeline_source(
174178
pasture_adjustment_path: Path,
175179
source_queue: Queue,
176180
sentinal_count: int,
181+
random_seed: int,
177182
) -> None:
183+
rng = np.random.default_rng(random_seed)
184+
178185
tiles = build_tile_list(
179186
current_lvl1_path,
180187
crop_adjustment_path,
181188
pasture_adjustment_path,
182189
)
183-
for tile in tiles:
184-
source_queue.put(tile)
190+
seeds = rng.integers(2**63, size=len(tiles))
191+
for tile, seed in zip(tiles, seeds):
192+
source_queue.put((tile, seed))
185193
for _ in range(sentinal_count):
186194
source_queue.put(None)
187195

@@ -190,6 +198,7 @@ def make_food_current_map(
190198
pnv_path: Path,
191199
crop_adjustment_path: Path,
192200
pasture_adjustment_path: Path,
201+
random_seed: int,
193202
output_path: Path,
194203
processes_count: int,
195204
) -> None:
@@ -210,7 +219,7 @@ def make_food_current_map(
210219
pnv_path,
211220
source_queue,
212221
result_queue,
213-
)) for index in range(processes_count)]
222+
)) for _ in range(processes_count)]
214223
for worker_process in workers:
215224
worker_process.start()
216225

@@ -220,6 +229,7 @@ def make_food_current_map(
220229
pasture_adjustment_path,
221230
source_queue,
222231
processes_count,
232+
random_seed,
223233
))
224234
source_worker.start()
225235

@@ -273,6 +283,13 @@ def main() -> None:
273283
help="Path of adjustment for pasture diff",
274284
dest="pasture_adjustment_path",
275285
)
286+
parser.add_argument(
287+
"--seed",
288+
type=int,
289+
required=True,
290+
help="Seed the random number generator",
291+
dest="seed",
292+
)
276293
parser.add_argument(
277294
'--output',
278295
type=Path,
@@ -295,6 +312,7 @@ def main() -> None:
295312
args.pnv_path,
296313
args.crop_adjustment_path,
297314
args.pasture_adjustment_path,
315+
args.seed,
298316
args.output_path,
299317
args.processes_count,
300318
)

prepare_species/common.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
COLUMNS = [
2121
"id_no",
2222
"assessment_id",
23+
"assessment_year",
2324
"season",
2425
"elevation_lower",
2526
"elevation_upper",
@@ -170,7 +171,7 @@ def process_habitats(
170171

171172

172173
def process_geometries(
173-
geometries_data: List[Tuple[int,shapely.Geometry]],
174+
geometries_data: List[Tuple[int,str]],
174175
report: SpeciesReport,
175176
) -> Dict[int,shapely.Geometry]:
176177
if len(geometries_data) == 0:
@@ -226,7 +227,8 @@ def process_and_save(
226227
output_directory_path: Path,
227228
) -> None:
228229

229-
id_no, assessment_id, elevation_lower, elevation_upper, scientific_name, family_name, threat_code = row
230+
id_no, assessment_id, assessment_year, elevation_lower, elevation_upper, scientific_name, \
231+
family_name, threat_code = row
230232

231233
seasons = set(geometries.keys()) | set(habitats.keys())
232234

@@ -237,6 +239,7 @@ def process_and_save(
237239
[[
238240
id_no,
239241
assessment_id,
242+
int(assessment_year),
240243
SEASON_NAME[1],
241244
int(elevation_lower) if elevation_lower is not None else None,
242245
int(elevation_upper) if elevation_upper is not None else None,
@@ -299,6 +302,7 @@ def process_and_save(
299302
[[
300303
id_no,
301304
assessment_id,
305+
int(assessment_year),
302306
SEASON_NAME[2],
303307
int(elevation_lower) if elevation_lower is not None else None,
304308
int(elevation_upper) if elevation_upper is not None else None,
@@ -318,6 +322,7 @@ def process_and_save(
318322
[[
319323
id_no,
320324
assessment_id,
325+
int(assessment_year),
321326
SEASON_NAME[3],
322327
int(elevation_lower) if elevation_lower is not None else None,
323328
int(elevation_upper) if elevation_upper is not None else None,

prepare_species/extract_species_psql.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
SELECT
2323
assessments.sis_taxon_id as id_no,
2424
assessments.id as assessment_id,
25+
DATE_PART('year', assessments.assessment_date) as assessment_year,
2526
(assessment_supplementary_infos.supplementary_fields->>'ElevationLower.limit')::numeric AS elevation_lower,
2627
(assessment_supplementary_infos.supplementary_fields->>'ElevationUpper.limit')::numeric AS elevation_upper,
2728
taxons.scientific_name,
@@ -110,7 +111,16 @@ def process_row(
110111
register(connection)
111112
cursor = connection.cursor()
112113

113-
(id_no, assessment_id, _elevation_lower, _elevation_upper, scientific_name, _family_name, _threat_code) = row
114+
(
115+
id_no,
116+
assessment_id,
117+
_assessment_year,
118+
_elevation_lower,
119+
_elevation_upper,
120+
scientific_name,
121+
_family_name,
122+
_threat_code,
123+
) = row
114124
report = SpeciesReport(id_no, assessment_id, scientific_name)
115125
if id_no in overrides:
116126
report.overriden = True

scripts/generate_food_map.sh

100644100755
Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
# downloaded and generated `current_raw.tif` from the original LIFE pipeline (see run.sh)
77

88
set -e
9+
set -x
910

1011
if [ -z "${DATADIR}" ]; then
1112
echo "Please specify $DATADIR"
@@ -67,20 +68,29 @@ if [ ! -d "${DATADIR}"/food/current_layers ]; then
6768
fi
6869

6970
# Combine GAEZ and HYDE data
70-
python3 ./prepare_layers/build_gaez_hyde.py --gaez "${DATADIR}"/food/GLCSv11_02_5m.tif \
71-
--hyde "${DATADIR}"/food/modified_grazing2017AD.asc \
72-
--output "${DATADIR}"/food/
71+
if [ ! -f "${DATADIR}"/food/crop.tif ] || [ ! -f "${DATADIR}"/food/pasture.tif ]; then
72+
python3 ./prepare_layers/build_gaez_hyde.py --gaez "${DATADIR}"/food/GLCSv11_02_5m.tif \
73+
--hyde "${DATADIR}"/food/modified_grazing2017AD.asc \
74+
--output "${DATADIR}"/food/
75+
fi
7376

74-
python3 ./utils/raster_diff.py --raster_a "${DATADIR}"/food/crop.tif \
75-
--raster_b "${DATADIR}"/food/current_layers/lcc_1401.tif \
76-
--output "${DATADIR}"/food/crop_diff.tif
77+
if [ ! -f "${DATADIR}"/food/crop_diff.tif ]; then
78+
python3 ./utils/raster_diff.py --raster_a "${DATADIR}"/food/crop.tif \
79+
--raster_b "${DATADIR}"/food/current_layers/lcc_1401.tif \
80+
--output "${DATADIR}"/food/crop_diff.tif
81+
fi
7782

78-
python3 ./utils/raster_diff.py --raster_a "${DATADIR}"/food/pasture.tif \
79-
--raster_b "${DATADIR}"/food/current_layers/lcc_1402.tif \
80-
--output "${DATADIR}"/food/pasture_diff.tif
83+
if [ ! -f "${DATADIR}"/food/pasture_diff.tif ]; then
84+
python3 ./utils/raster_diff.py --raster_a "${DATADIR}"/food/pasture.tif \
85+
--raster_b "${DATADIR}"/food/current_layers/lcc_1402.tif \
86+
--output "${DATADIR}"/food/pasture_diff.tif
87+
fi
8188

82-
python3 ./prepare_layers/make_food_current_map.py --current_lvl1 "${DATADIR}"/habitat/current_raw.tif \
83-
--pnv "${DATADIR}"/habitat/pnv_raw.tif \
84-
--crop_diff "${DATADIR}"/food/crop_diff.tif \
85-
--pasture_diff "${DATADIR}"/food/pasture_diff.tif \
86-
--output "${DATADIR}"/food/current_raw.tif
89+
if [ ! -f "${DATADIR}"/food/current_raw.tif ]; then
90+
python3 ./prepare_layers/make_food_current_map.py --current_lvl1 "${DATADIR}"/habitat/current_raw.tif \
91+
--pnv "${DATADIR}"/habitat/pnv_raw.tif \
92+
--crop_diff "${DATADIR}"/food/crop_diff.tif \
93+
--pasture_diff "${DATADIR}"/food/pasture_diff.tif \
94+
--seed 42 \
95+
--output "${DATADIR}"/food/current_raw.tif
96+
fi

0 commit comments

Comments
 (0)