Skip to content

Commit 6328e09

Browse files
ehinmanjzemmels
andauthored
Make ref table function way simpler and more efficient (#209)
* make ref table function way simpler and more efficient * add more documentation, an example * add in the deduplication line * add required packages for gpd.explore in docs and move a requirement to docs section of pyproject.toml * Update dataretrieval/waterdata/api.py Co-authored-by: Joe Zemmels (he/him) <jzemmels@gmail.com> * update notebook to pip install all required packages and fix small change --------- Co-authored-by: Joe Zemmels (he/him) <jzemmels@gmail.com>
1 parent 3d96d5c commit 6328e09

File tree

5 files changed

+49
-38
lines changed

5 files changed

+49
-38
lines changed

.github/workflows/sphinx-docs.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ jobs:
1818
shell: bash -l {0}
1919
run: |
2020
python -m pip install --upgrade pip
21-
pip install "docutils<0.22"
2221
pip install .[doc,nldi]
2322
ipython kernel install --name "python3" --user
2423
sudo apt update -y && sudo apt install -y latexmk texlive-latex-recommended texlive-latex-extra texlive-fonts-recommended dvipng pandoc

dataretrieval/waterdata/api.py

Lines changed: 34 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1417,6 +1417,7 @@ def get_field_measurements(
14171417

14181418
return get_ogc_data(args, output_id, service)
14191419

1420+
14201421
def get_reference_table(
14211422
collection: str,
14221423
limit: Optional[int] = None,
@@ -1441,6 +1442,27 @@ def get_reference_table(
14411442
allowable limit is 50000. It may be beneficial to set this number lower
14421443
if your internet connection is spotty. The default (None) will set the
14431444
limit to the maximum allowable limit for the service.
1445+
1446+
Returns
1447+
-------
1448+
df : ``pandas.DataFrame`` or ``geopandas.GeoDataFrame``
1449+
Formatted data returned from the API query. The primary metadata
1450+
of each reference table will show up in the first column, where
1451+
the name of the column is the singular form of the collection name,
1452+
separated by underscores (e.g. the "medium-codes" reference table
1453+
has a column called "medium_code", which contains all possible
1454+
medium code values).
1455+
md: :obj:`dataretrieval.utils.Metadata`
1456+
A custom metadata object including the URL request and query time.
1457+
1458+
Examples
1459+
--------
1460+
.. code::
1461+
1462+
>>> # Get table of USGS parameter codes
1463+
>>> ref, md = dataretrieval.waterdata.get_reference_table(
1464+
... collection="parameter-codes"
1465+
... )
14441466
"""
14451467
valid_code_services = get_args(METADATA_COLLECTIONS)
14461468
if collection not in valid_code_services:
@@ -1449,29 +1471,19 @@ def get_reference_table(
14491471
f"Valid options are: {valid_code_services}."
14501472
)
14511473

1452-
req = _construct_api_requests(
1453-
service=collection,
1454-
limit=limit,
1455-
skip_geometry=True,
1456-
)
1457-
# Run API request and iterate through pages if needed
1458-
return_list, response = _walk_pages(
1459-
geopd=False, req=req
1460-
)
1461-
1462-
# Give ID column a more meaningful name
1463-
if collection.endswith("s"):
1464-
return_list = return_list.rename(
1465-
columns={"id": f"{collection[:-1].replace('-', '_')}_id"}
1466-
)
1474+
# Give ID column the collection name with underscores
1475+
if collection.endswith("s") and collection != "counties":
1476+
output_id = f"{collection[:-1].replace('-', '_')}"
1477+
elif collection == "counties":
1478+
output_id = "county"
14671479
else:
1468-
return_list = return_list.rename(
1469-
columns={"id": f"{collection.replace('-', '_')}_id"}
1470-
)
1471-
1472-
# Create metadata object from response
1473-
metadata = BaseMetadata(response)
1474-
return return_list, metadata
1480+
output_id = f"{collection.replace('-', '_')}"
1481+
1482+
return get_ogc_data(
1483+
args={},
1484+
output_id=output_id,
1485+
service=collection
1486+
)
14751487

14761488

14771489
def get_codes(code_service: CODE_SERVICES) -> pd.DataFrame:

demos/WaterData_demo.ipynb

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -87,20 +87,17 @@
8787
"metadata": {},
8888
"source": [
8989
"## Examples\n",
90-
"Let's get into some examples using the functions listed above. First, we need to load the `waterdata` module and a few other packages and functions to go through the examples. To run the entirety of this notebook, you will need to install `dataretrieval`, `matplotlib`, and `geopandas` packages. `matplotlib` is needed to create the plots, and `geopandas` is needed to create the interactive maps."
91-
]
92-
},
93-
{
94-
"cell_type": "code",
95-
"execution_count": null,
96-
"id": "cd626a14",
97-
"metadata": {},
98-
"outputs": [],
99-
"source": [
100-
"# Install necessary packages to run notebook\n",
90+
"Let's get into some examples using the functions listed above. First, we need to load the `waterdata` module and a few other packages and functions to go through the examples. To run the entirety of this notebook, you will need to install `dataretrieval`, `matplotlib`, and `geopandas` packages (plus dependencies). `matplotlib` is needed to create the plots, and `geopandas` is needed to create the interactive maps.\n",
91+
"\n",
92+
"Note that if you use conda rather than pip, you do not need to install folium and mapclassify separately, as they are included in the conda-forge geopandas install.\n",
93+
"\n",
94+
"```python\n",
10195
"!pip install dataretrieval\n",
10296
"!pip install matplotlib\n",
103-
"!pip install geopandas"
97+
"!pip install geopandas\n",
98+
"!pip install folium\n",
99+
"!pip install mapclassify\n",
100+
"``` "
104101
]
105102
},
106103
{
@@ -156,7 +153,7 @@
156153
"outputs": [],
157154
"source": [
158155
"streamflow_pcodes = pcodes[pcodes['parameter_name'].str.contains('streamflow|discharge', case=False, na=False)]\n",
159-
"display(streamflow_pcodes[['parameter_code_id', 'parameter_name']])"
156+
"display(streamflow_pcodes[['parameter_code', 'parameter_name']])"
160157
]
161158
},
162159
{
@@ -599,7 +596,7 @@
599596
],
600597
"metadata": {
601598
"kernelspec": {
602-
"display_name": "waterdata-demo",
599+
"display_name": "waterdata-demo-pip",
603600
"language": "python",
604601
"name": "python3"
605602
},

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,16 @@ test = [
3939
"flake8",
4040
]
4141
doc = [
42+
"docutils<0.22",
4243
"sphinx",
4344
"sphinx-rtd-theme",
4445
"nbsphinx",
4546
"nbsphinx_link",
4647
"ipython",
4748
"ipykernel",
4849
"matplotlib",
50+
"folium>=0.12",
51+
"mapclassify"
4952
]
5053
nldi = [
5154
'geopandas>=0.10'

tests/waterdata_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ def test_get_time_series_metadata():
248248

249249
def test_get_reference_table():
250250
df, md = get_reference_table("agency-codes")
251-
assert "agency_code_id" in df.columns
251+
assert "agency_code" in df.columns
252252
assert df.shape[0] > 0
253253
assert hasattr(md, 'url')
254254
assert hasattr(md, 'query_time')

0 commit comments

Comments
 (0)