Skip to content

Commit 3025125

Browse files
Use numpydoc on public API of Parcels (#2474)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent f111dfa commit 3025125

File tree

5 files changed

+200
-11
lines changed

5 files changed

+200
-11
lines changed

docs/conf.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,11 @@
1515
import inspect
1616
import os
1717
import sys
18+
import tomllib
1819
import warnings
20+
from pathlib import Path
21+
22+
PROJECT_ROOT = (Path(__file__).parent / "..").resolve()
1923

2024
# If extensions (or modules to document with autodoc) are in another directory,
2125
# add these directories to sys.path here. If the directory is relative to the
@@ -180,15 +184,20 @@
180184
# ----------------
181185
numpydoc_class_members_toctree = False # https://stackoverflow.com/a/73294408
182186

183-
# full list of numpydoc error codes: https://numpydoc.readthedocs.io/en/latest/validation.html
184-
numpydoc_validation_checks = {
185-
"GL05",
186-
"GL06",
187-
"GL07",
188-
"GL10",
189-
"PR05",
190-
"PR10",
191-
"RT02",
187+
with open(PROJECT_ROOT / "tools/tool-data.toml", "rb") as f:
188+
numpydoc_skip_errors = tomllib.load(f)["numpydoc_skip_errors"]
189+
190+
numpydoc_validation_checks = {"all"} | set(numpydoc_skip_errors)
191+
numpydoc_validation_exclude = { # regex to ignore during docstring check
192+
r"\.__getitem__",
193+
r"\.__contains__",
194+
r"\.__hash__",
195+
r"\.__mul__",
196+
r"\.__sub__",
197+
r"\.__add__",
198+
r"\.__iter__",
199+
r"\.__div__",
200+
r"\.__neg__",
192201
}
193202

194203
# Add any paths that contain custom static files (such as style sheets) here,

pixi.toml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ gsw = "*"
7575

7676
[feature.docs.dependencies]
7777
parcels = { path = "." }
78-
numpydoc = "!=1.9.0"
78+
numpydoc = "*"
7979
myst-nb = "*"
8080
ipython = "*"
8181
sphinx = "*"
@@ -97,6 +97,12 @@ pre_commit = "*"
9797
[feature.pre-commit.tasks]
9898
lint = "pre-commit run --all-files"
9999

100+
[feature.numpydoc.dependencies]
101+
numpydoc = "*"
102+
103+
[feature.numpydoc.tasks]
104+
numpydoc-lint = "python tools/numpydoc-public-api.py"
105+
100106
[feature.typing.dependencies]
101107
mypy = "*"
102108
lxml = "*" # in CI
@@ -112,6 +118,7 @@ default = { features = [
112118
"notebooks",
113119
"typing",
114120
"pre-commit",
121+
"numpydoc",
115122
], solve-group = "main" }
116123
test = { features = ["test"], solve-group = "main" }
117124
test-minimum = { features = ["test", "minimum"] }

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ select = [
8080
"UP", # pyupgrade
8181
"LOG", # logging
8282
"ICN", # import conventions
83-
"G", # logging-format
83+
# "G", # logging-format
8484
"RUF", # ruff
8585
"ISC001", # single-line-implicit-string-concatenation
8686
"TID", # flake8-tidy-imports

tools/numpydoc-public-api.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
#!/usr/bin/env python
2+
"""A script that can be quickly run that explores the public API of Parcels
3+
and validates docstrings along the way according to the numpydoc conventions.
4+
5+
This script is a best attempt, and it meant as a first line of defence (compared
6+
to the sphinx numpydoc integration which is the ground truth - as those are the
7+
docstrings that end up in the documentation).
8+
"""
9+
10+
import functools
11+
import importlib
12+
import logging
13+
import sys
14+
import tomllib
15+
import types
16+
from pathlib import Path
17+
18+
from numpydoc.validate import validate
19+
20+
logger = logging.getLogger("numpydoc-public-api")
21+
handler = logging.StreamHandler()
22+
handler.setFormatter(logging.Formatter("%(levelname)s: %(message)s"))
23+
logger.addHandler(handler)
24+
25+
PROJECT_ROOT = (Path(__file__).parent / "..").resolve()
26+
PUBLIC_MODULES = ["parcels", "parcels.interpolators"]
27+
ROOT_PACKAGE = "parcels"
28+
29+
30+
def is_built_in(type_or_instance: type | object):
31+
if isinstance(type_or_instance, type):
32+
return type_or_instance.__module__ == "builtins"
33+
else:
34+
return type_or_instance.__class__.__module__ == "builtins"
35+
36+
37+
def walk_module(module_str: str, public_api: list[str] | None = None) -> list[str]:
38+
if public_api is None:
39+
public_api = []
40+
41+
module = importlib.import_module(module_str)
42+
try:
43+
all_ = module.__all__
44+
except AttributeError:
45+
print(f"No __all__ variable found in public module {module_str!r}")
46+
return public_api
47+
48+
if module_str not in public_api:
49+
public_api.append(module_str)
50+
for item_str in all_:
51+
item = getattr(module, item_str)
52+
if isinstance(item, types.ModuleType):
53+
walk_module(f"{module_str}.{item_str}", public_api)
54+
if isinstance(item, (types.FunctionType,)):
55+
public_api.append(f"{module_str}.{item_str}")
56+
elif is_built_in(item):
57+
print(f"Found builtin at '{module_str}.{item_str}' of type {type(item)}")
58+
continue
59+
elif isinstance(item, type):
60+
public_api.append(f"{module_str}.{item_str}")
61+
walk_class(module_str, item, public_api)
62+
else:
63+
logger.info(
64+
f"Encountered unexpected public object at '{module_str}.{item_str}' of {item!r} in public API. Don't know how to handle with numpydoc - ignoring."
65+
)
66+
67+
return public_api
68+
69+
70+
def get_public_class_attrs(class_: type) -> set[str]:
71+
return {a for a in dir(class_) if not a.startswith("_")}
72+
73+
74+
def walk_class(module_str: str, class_: type, public_api: list[str]) -> list[str]:
75+
class_str = class_.__name__
76+
77+
# attributes that were introduced by this class specifically - not from inheritance
78+
attrs = get_public_class_attrs(class_) - functools.reduce(
79+
set.add, (get_public_class_attrs(base) for base in class_.__bases__)
80+
)
81+
82+
public_api.extend([f"{module_str}.{class_str}.{attr_str}" for attr_str in attrs])
83+
return public_api
84+
85+
86+
def main():
87+
import argparse
88+
89+
parser = argparse.ArgumentParser(description="Validate numpydoc docstrings in the public API")
90+
parser.add_argument("-v", "--verbose", action="count", default=0, help="Increase verbosity (can be repeated)")
91+
args = parser.parse_args()
92+
93+
# Set logging level based on verbosity: 0=WARNING, 1=INFO, 2+=DEBUG
94+
if args.verbose == 0:
95+
log_level = logging.WARNING
96+
elif args.verbose == 1:
97+
log_level = logging.INFO
98+
else:
99+
log_level = logging.DEBUG
100+
101+
logger.setLevel(log_level)
102+
103+
with open(PROJECT_ROOT / "tools/tool-data.toml", "rb") as f:
104+
skip_errors = tomllib.load(f)["numpydoc_skip_errors"]
105+
public_api = []
106+
for module in PUBLIC_MODULES:
107+
public_api += walk_module(module)
108+
109+
errors = 0
110+
for item in public_api:
111+
logger.info(f"Processing validating {item}")
112+
try:
113+
res = validate(item)
114+
except (AttributeError, StopIteration) as e:
115+
logger.warning(f"Could not process {item!r}. Encountered error. {e!r}")
116+
continue
117+
if res["type"] in ("module", "float", "int", "dict"):
118+
continue
119+
for err in res["errors"]:
120+
if err[0] not in skip_errors:
121+
print(f"{item}: {err}")
122+
errors += 1
123+
sys.exit(errors)
124+
125+
126+
if __name__ == "__main__":
127+
main()

tools/tool-data.toml

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# full list of numpydoc error codes: https://numpydoc.readthedocs.io/en/latest/validation.html
2+
numpydoc_skip_errors = [
3+
"SA01", # Parcels doesn't require the "See also" section
4+
"SA04", #
5+
"ES01", # We don't require the extended summary for all docstrings
6+
"EX01", # We don't require the "Examples" section for all docstrings
7+
"SS06", # Not possible to make all summaries one line
8+
9+
# To be fixed up
10+
"GL03", # Double line break found; please use only one blank line to separate sections or paragraphs, and do not leave blank lines at the end of docstrings
11+
"GL05", # Tabs found at the start of line "{line_with_tabs}", please use whitespace only
12+
"GL06", # Found unknown section "{section}". Allowed sections are: {allowed_sections}
13+
"GL07", # Sections are in the wrong order. Correct order is: {correct_sections}
14+
"GL08", # The object does not have a docstring
15+
"SS01", # No summary found (a short summary in a single line should be present at the beginning of the docstring)
16+
"SS02", # Summary does not start with a capital letter
17+
"SS03", # Summary does not end with a period
18+
"SS04", # Summary contains heading whitespaces
19+
"SS05", # Summary must start with infinitive verb, not third person (e.g. use "Generate" instead of "Generates")
20+
"PR01", # Parameters {missing_params} not documented
21+
"PR02", # Unknown parameters {unknown_params}
22+
"PR03", # Wrong parameters order. Actual: {actual_params}. Documented: {documented_params}
23+
"SA02", # Missing period at end of description for See Also "{reference_name}" reference
24+
"SA03", # Description should be capitalized for See Also
25+
26+
#? Might conflict with Ruff rules. Needs more testing... Enable ignore if they conflict
27+
# "GL01", # Docstring text (summary) should start in the line immediately after the opening quotes (not in the same line, or leaving a blank line in between)
28+
# "GL02", # Closing quotes should be placed in the line after the last text in the docstring (do not close the quotes in the same line as the text, or leave a blank line between the last text and the quotes)
29+
30+
# TODO consider whether to continue ignoring the following
31+
"GL09", # Deprecation warning should precede extended summary
32+
"GL10", # reST directives {directives} must be followed by two colons
33+
"PR04", # Parameter "{param_name}" has no type
34+
"PR05", # Parameter "{param_name}" type should not finish with "."
35+
"PR06", # Parameter "{param_name}" type should use "{right_type}" instead of "{wrong_type}"
36+
"PR07", # Parameter "{param_name}" has no description
37+
"PR08", # Parameter "{param_name}" description should start with a capital letter
38+
"PR09", # Parameter "{param_name}" description should finish with "."
39+
"PR10", # Parameter "{param_name}" requires a space before the colon separating the parameter name and type
40+
"RT01", # No Returns section found
41+
"RT02", # The first line of the Returns section should contain only the type, unless multiple values are being returned
42+
"RT03", # Return value has no description
43+
"RT04", # Return value description should start with a capital letter
44+
"RT05", # Return value description should finish with "."
45+
"YD01", # No Yields section found
46+
]

0 commit comments

Comments
 (0)