fixed linting, attempted to reformat some docstrings to match numpy style

kaseylove · kaseylove · commit 4bf49a2324bc · 2026-01-27T12:09:57.000-05:00
diff --git a/src/rushd/flow.py b/src/rushd/flow.py
@@ -93,7 +93,7 @@ def load_csv_with_metadata(
     filename_regex: Optional[str] = None,
     *,
     columns: Optional[List[str]] = None,
-    csv_kwargs: Optional[Dict[str, Any]] = {},
+    csv_kwargs: Optional[Dict[str, Any]] = None,
 ) -> pd.DataFrame:
     """
     Load .csv data into DataFrame with associated metadata.
@@ -186,7 +186,7 @@ def load_groups_with_metadata(
     filename_regex: Optional[str] = None,
     *,
     columns: Optional[List[str]] = None,
-    csv_kwargs: Optional[Dict[str, Any]] = {},
+    csv_kwargs: Optional[Dict[str, Any]] = None,
 ) -> pd.DataFrame:
     """
     Load .csv data into DataFrame with associated metadata by group.
@@ -264,7 +264,7 @@ def load_csv(
     filename_regex: Optional[str] = None,
     *,
     columns: Optional[List[str]] = None,
-    csv_kwargs: Optional[Dict[str, Any]] = {},
+    csv_kwargs: Optional[Dict[str, Any]] = None,
 ) -> pd.DataFrame:
     """
     Load .csv data into DataFrame without additional metadata.
diff --git a/src/rushd/io.py b/src/rushd/io.py
@@ -1,14 +1,15 @@
 """
 A submodule implementing common IO handling mechanisms.
 
-## Rationale
+Rationale
+---------
 File and folder management is a common problem when
 handling large datasets. You often want to separate
 out large data from your code. How do you keep track
 of where your data is, especially if moving between
 different computers/clusters?
 
-`rushd.io` adds convenience functions to handle
+``rushd.io`` adds convenience functions to handle
 common cases, as well as writing metadata with
 your output files that identify input files.
 """
@@ -124,6 +125,12 @@ def git_version() -> Optional[str]:
     if there are edits that have not been saved.
     Returns None if the current working directory is
     not contained within a git repository.
+
+    Returns
+    -------
+    str or None
+        Current version control state as a string, or None if the current working directory is not
+        contained within a git repository
     """
     git_log = subprocess.run(
         ["git", "log", "-n1", "--format=format:%H"], check=False, capture_output=True
@@ -158,7 +165,8 @@ def _is_relative_to(path: Path, base_path: Path) -> bool:
 
     Returns
     -------
-    True if `path` can be written as a relative path to `base_path`, False otherwise
+    bool
+        True if `path` can be written as a relative path to `base_path`, False otherwise
     """
     try:
         _ = path.relative_to(base_path)
@@ -188,7 +196,8 @@ def infile(filename: Union[str, Path], tag: Optional[str] = None, should_hash: b
 
     Returns
     -------
-    A Path object that represents the same file as `filename`.
+    Path
+        A Path object that represents the same file as `filename`.
     """
     if not isinstance(filename, Path):
         filename = Path(filename)
@@ -225,6 +234,20 @@ def outfile(filename: Union[str, Path], tag: Optional[str] = None) -> Path:
     Any needed subdirectories will be created if the outfile is relative
     to datadir or rootdir.
 
+    Example
+    -------
+    For output file `out.txt`, writes a YAML file `out.txt.yaml`
+    that encodes the following type of metadata:
+    ::
+
+        type: tracked_outfile
+        name: out.txt
+        date: 2022-01-31
+        git_version: 13a81aa2a7b1035f6b59c2323b0a7c457eb1657e
+        dependencies:
+        - file: some_infile.csv
+            path_type: datadir_relative
+
     Parameters
     ----------
     filename: str or Path
@@ -234,22 +257,8 @@ def outfile(filename: Union[str, Path], tag: Optional[str] = None) -> Path:
 
     Returns
     -------
-    A Path object that represents the same file as `filename`.
-
-    Side-effects
-    ------------
-    For output file `out.txt`, writes a YAML file `out.txt.yaml`
-    that encodes the following type of metadata:
-
-    ```yaml
-    type: tracked_outfile
-    name: out.txt
-    date: 2022-01-31
-    git_version: 13a81aa2a7b1035f6b59c2323b0a7c457eb1657e
-    dependencies:
-      - file: some_infile.csv
-        path_type: datadir_relative
-    ```
+    Path
+        A Path object that represents the same file as `filename`.
     """
     if not isinstance(filename, Path):
         filename = Path(filename)
@@ -326,9 +335,10 @@ def cache_dataframe(cache_path: Union[Path, str]) -> Callable[..., Callable[...,
 
     Returns
     -------
-    A function that generates a dataframe with optional caching.
-    An extra keyword argument, 'invalidate' is added that invalidates
-    the cache if needed
+    Callable[..., Callable[..., pd.DataFrame]]
+        A function that generates a dataframe with optional caching.
+        An extra keyword argument, 'invalidate' is added that invalidates
+        the cache if needed
     """
     if not isinstance(cache_path, Path):
         savepath = Path(cache_path)
diff --git a/src/rushd/qpcr.py b/src/rushd/qpcr.py
@@ -44,7 +44,7 @@ def load_single_csv_with_metadata(
     *,
     well_column: Optional[str] = "well",
     columns: Optional[List[str]] = None,
-    csv_kwargs: Optional[Dict[str, Any]] = {},
+    csv_kwargs: Optional[Dict[str, Any]] = None,
     is_default: Optional[bool] = False,
 ) -> pd.DataFrame:
     """
@@ -92,15 +92,16 @@ def load_single_csv_with_metadata(
     # Check that a single file (not a directory) has been passed
     if data_path.is_dir():
         raise DataPathError(
-            "'data_path' must be a single file. To load multiple files, use 'load_csv_with_metadata'"
+            "'data_path' must be a single file. To load multiple files, use"
+            "'load_csv_with_metadata'"
         )
     file = data_path
 
     # Overwrite args with those relevant for
     if is_default:
         well_column = "Pos"
         columns = ["Cp"]
-        csv_kwargs = dict(sep="\t", header=1)
+        csv_kwargs = {"delimiter": "\t", "header": 1}
 
     # Load the first row so we get the column names
     df_onerow = pd.read_csv(file, nrows=1, **csv_kwargs)
@@ -131,7 +132,7 @@ def load_plates_with_metadata(
     *,
     well_column: Optional[str] = "well",
     columns: Optional[List[str]] = None,
-    csv_kwargs: Optional[Dict[str, Any]] = {},
+    csv_kwargs: Optional[Dict[str, Any]] = None,
     is_default: Optional[bool] = False,
 ) -> pd.DataFrame:
     """
diff --git a/src/rushd/well_mapper.py b/src/rushd/well_mapper.py
@@ -3,11 +3,12 @@
 Rationale
 ---------
 Helper module that parses plate specifications of the form:
-```yaml
-MEF-low: A1-E1
-MEF-bulk: F1-H1, A2-H2, A3-B3
-retroviral: A1-H12
-```
+::
+
+    MEF-low: A1-E1
+    MEF-bulk: F1-H1, A2-H2, A3-B3
+    retroviral: A1-H12
+
 and returns a dictionary that lets you map from well number to a
 plate specification.
 
@@ -22,19 +23,17 @@
 A *well specification* is a string containing a comma-separated list of
 *region specifiers*. A region specifier is one of two forms, a single
 well form:
+::
 
-```
     A1
     B05
-```
 
 or a rectangular region form:
+::
 
-```
     A1-A12
     B05-D8
     B05 - C02
-```
 
 As seen in these examples, the rectangular region form is distinguished
 by the presence of a hyphen between two single-well identifiers. Whitespace
@@ -46,54 +45,49 @@
 
 Within a single specifier, duplicate entries are *ignored*. That is, the following
 specifiers are all equivalent:
+::
 
-```
     A5-B7
     A5,A6,A7,B5,B6,B7
     A5-B7,B6
     A5-B7,B5-B7
-```
 
 A *plate specification* is either a dictionary (if order is not important)
 or a sequence of dictionaries (if order is important). The difference between these
 in a YAML underlying format is:
+::
 
-```yaml
-test: A5-A7
-test2: A5-A9
-```
+    test: A5-A7
+    test2: A5-A9
 
-which yields `{'test': 'A5-A7', 'test2': 'A5-A9'}`
+which yields ``{'test': 'A5-A7', 'test2': 'A5-A9'}``
 and
+::
 
-```yaml
-- test: A5-A7
-- test2: A5-A9
-```
+    - test: A5-A7
+    - test2: A5-A9
 
-which yields `[{'test': 'A5-A7'}, {'test2': 'A5-A9'}]`
+which yields ``[{'test': 'A5-A7'}, {'test2': 'A5-A9'}]``
 
 This module reads either of these formats. It iterates over each of the well specifications,
 building up a dictionary that maps wells to conditions. If multiple well specifications overlap,
 then condition names are merged in the order in which they appear, separated by a separator
 (by default, a period). This allows very concise condition layouts, such as the following:
+::
 
-```yaml
-conditions:
-    MEF: A1-C12
-    293: D1-F12
-    untransformed: A1-D3
-    experimental: A4-D12
-```
+    conditions:
+        MEF: A1-C12
+        293: D1-F12
+        untransformed: A1-D3
+        experimental: A4-D12
 
 will return a well map of the form:
+::
 
-```
-{'A1': 'MEF.untransformed', ..., 'C10: 293.experimental'}
-```
+    {'A1': 'MEF.untransformed', ..., 'C10: 293.experimental'}
 
-Both the non-normalized (e.g. no leading zeros, `A1`) and normalized
-(e.g. with leading zeros, `A01`) forms are returned for mapping.
+Both the non-normalized (e.g. no leading zeros, ``A1``) and normalized
+(e.g. with leading zeros, ``A01``) forms are returned for mapping.
 """
 
 import itertools