Skip to content

Commit 4bf49a2

Browse files
committed
fixed linting, attempted to reformat some docstrings to match numpy style
1 parent c3d1ce9 commit 4bf49a2

File tree

4 files changed

+68
-63
lines changed

4 files changed

+68
-63
lines changed

src/rushd/flow.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def load_csv_with_metadata(
9393
filename_regex: Optional[str] = None,
9494
*,
9595
columns: Optional[List[str]] = None,
96-
csv_kwargs: Optional[Dict[str, Any]] = {},
96+
csv_kwargs: Optional[Dict[str, Any]] = None,
9797
) -> pd.DataFrame:
9898
"""
9999
Load .csv data into DataFrame with associated metadata.
@@ -186,7 +186,7 @@ def load_groups_with_metadata(
186186
filename_regex: Optional[str] = None,
187187
*,
188188
columns: Optional[List[str]] = None,
189-
csv_kwargs: Optional[Dict[str, Any]] = {},
189+
csv_kwargs: Optional[Dict[str, Any]] = None,
190190
) -> pd.DataFrame:
191191
"""
192192
Load .csv data into DataFrame with associated metadata by group.
@@ -264,7 +264,7 @@ def load_csv(
264264
filename_regex: Optional[str] = None,
265265
*,
266266
columns: Optional[List[str]] = None,
267-
csv_kwargs: Optional[Dict[str, Any]] = {},
267+
csv_kwargs: Optional[Dict[str, Any]] = None,
268268
) -> pd.DataFrame:
269269
"""
270270
Load .csv data into DataFrame without additional metadata.

src/rushd/io.py

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
"""
22
A submodule implementing common IO handling mechanisms.
33
4-
## Rationale
4+
Rationale
5+
---------
56
File and folder management is a common problem when
67
handling large datasets. You often want to separate
78
out large data from your code. How do you keep track
89
of where your data is, especially if moving between
910
different computers/clusters?
1011
11-
`rushd.io` adds convenience functions to handle
12+
``rushd.io`` adds convenience functions to handle
1213
common cases, as well as writing metadata with
1314
your output files that identify input files.
1415
"""
@@ -124,6 +125,12 @@ def git_version() -> Optional[str]:
124125
if there are edits that have not been saved.
125126
Returns None if the current working directory is
126127
not contained within a git repository.
128+
129+
Returns
130+
-------
131+
str or None
132+
Current version control state as a string, or None if the current working directory is not
133+
contained within a git repository
127134
"""
128135
git_log = subprocess.run(
129136
["git", "log", "-n1", "--format=format:%H"], check=False, capture_output=True
@@ -158,7 +165,8 @@ def _is_relative_to(path: Path, base_path: Path) -> bool:
158165
159166
Returns
160167
-------
161-
True if `path` can be written as a relative path to `base_path`, False otherwise
168+
bool
169+
True if `path` can be written as a relative path to `base_path`, False otherwise
162170
"""
163171
try:
164172
_ = path.relative_to(base_path)
@@ -188,7 +196,8 @@ def infile(filename: Union[str, Path], tag: Optional[str] = None, should_hash: b
188196
189197
Returns
190198
-------
191-
A Path object that represents the same file as `filename`.
199+
Path
200+
A Path object that represents the same file as `filename`.
192201
"""
193202
if not isinstance(filename, Path):
194203
filename = Path(filename)
@@ -225,6 +234,20 @@ def outfile(filename: Union[str, Path], tag: Optional[str] = None) -> Path:
225234
Any needed subdirectories will be created if the outfile is relative
226235
to datadir or rootdir.
227236
237+
Example
238+
-------
239+
For output file `out.txt`, writes a YAML file `out.txt.yaml`
240+
that encodes the following type of metadata:
241+
::
242+
243+
type: tracked_outfile
244+
name: out.txt
245+
date: 2022-01-31
246+
git_version: 13a81aa2a7b1035f6b59c2323b0a7c457eb1657e
247+
dependencies:
248+
- file: some_infile.csv
249+
path_type: datadir_relative
250+
228251
Parameters
229252
----------
230253
filename: str or Path
@@ -234,22 +257,8 @@ def outfile(filename: Union[str, Path], tag: Optional[str] = None) -> Path:
234257
235258
Returns
236259
-------
237-
A Path object that represents the same file as `filename`.
238-
239-
Side-effects
240-
------------
241-
For output file `out.txt`, writes a YAML file `out.txt.yaml`
242-
that encodes the following type of metadata:
243-
244-
```yaml
245-
type: tracked_outfile
246-
name: out.txt
247-
date: 2022-01-31
248-
git_version: 13a81aa2a7b1035f6b59c2323b0a7c457eb1657e
249-
dependencies:
250-
- file: some_infile.csv
251-
path_type: datadir_relative
252-
```
260+
Path
261+
A Path object that represents the same file as `filename`.
253262
"""
254263
if not isinstance(filename, Path):
255264
filename = Path(filename)
@@ -326,9 +335,10 @@ def cache_dataframe(cache_path: Union[Path, str]) -> Callable[..., Callable[...,
326335
327336
Returns
328337
-------
329-
A function that generates a dataframe with optional caching.
330-
An extra keyword argument, 'invalidate' is added that invalidates
331-
the cache if needed
338+
Callable[..., Callable[..., pd.DataFrame]]
339+
A function that generates a dataframe with optional caching.
340+
An extra keyword argument, 'invalidate' is added that invalidates
341+
the cache if needed
332342
"""
333343
if not isinstance(cache_path, Path):
334344
savepath = Path(cache_path)

src/rushd/qpcr.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def load_single_csv_with_metadata(
4444
*,
4545
well_column: Optional[str] = "well",
4646
columns: Optional[List[str]] = None,
47-
csv_kwargs: Optional[Dict[str, Any]] = {},
47+
csv_kwargs: Optional[Dict[str, Any]] = None,
4848
is_default: Optional[bool] = False,
4949
) -> pd.DataFrame:
5050
"""
@@ -92,15 +92,16 @@ def load_single_csv_with_metadata(
9292
# Check that a single file (not a directory) has been passed
9393
if data_path.is_dir():
9494
raise DataPathError(
95-
"'data_path' must be a single file. To load multiple files, use 'load_csv_with_metadata'"
95+
"'data_path' must be a single file. To load multiple files, use"
96+
"'load_csv_with_metadata'"
9697
)
9798
file = data_path
9899

99100
# Overwrite args with those relevant for
100101
if is_default:
101102
well_column = "Pos"
102103
columns = ["Cp"]
103-
csv_kwargs = dict(sep="\t", header=1)
104+
csv_kwargs = {"delimiter": "\t", "header": 1}
104105

105106
# Load the first row so we get the column names
106107
df_onerow = pd.read_csv(file, nrows=1, **csv_kwargs)
@@ -131,7 +132,7 @@ def load_plates_with_metadata(
131132
*,
132133
well_column: Optional[str] = "well",
133134
columns: Optional[List[str]] = None,
134-
csv_kwargs: Optional[Dict[str, Any]] = {},
135+
csv_kwargs: Optional[Dict[str, Any]] = None,
135136
is_default: Optional[bool] = False,
136137
) -> pd.DataFrame:
137138
"""

src/rushd/well_mapper.py

Lines changed: 27 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33
Rationale
44
---------
55
Helper module that parses plate specifications of the form:
6-
```yaml
7-
MEF-low: A1-E1
8-
MEF-bulk: F1-H1, A2-H2, A3-B3
9-
retroviral: A1-H12
10-
```
6+
::
7+
8+
MEF-low: A1-E1
9+
MEF-bulk: F1-H1, A2-H2, A3-B3
10+
retroviral: A1-H12
11+
1112
and returns a dictionary that lets you map from well number to a
1213
plate specification.
1314
@@ -22,19 +23,17 @@
2223
A *well specification* is a string containing a comma-separated list of
2324
*region specifiers*. A region specifier is one of two forms, a single
2425
well form:
26+
::
2527
26-
```
2728
A1
2829
B05
29-
```
3030
3131
or a rectangular region form:
32+
::
3233
33-
```
3434
A1-A12
3535
B05-D8
3636
B05 - C02
37-
```
3837
3938
As seen in these examples, the rectangular region form is distinguished
4039
by the presence of a hyphen between two single-well identifiers. Whitespace
@@ -46,54 +45,49 @@
4645
4746
Within a single specifier, duplicate entries are *ignored*. That is, the following
4847
specifiers are all equivalent:
48+
::
4949
50-
```
5150
A5-B7
5251
A5,A6,A7,B5,B6,B7
5352
A5-B7,B6
5453
A5-B7,B5-B7
55-
```
5654
5755
A *plate specification* is either a dictionary (if order is not important)
5856
or a sequence of dictionaries (if order is important). The difference between these
5957
in a YAML underlying format is:
58+
::
6059
61-
```yaml
62-
test: A5-A7
63-
test2: A5-A9
64-
```
60+
test: A5-A7
61+
test2: A5-A9
6562
66-
which yields `{'test': 'A5-A7', 'test2': 'A5-A9'}`
63+
which yields ``{'test': 'A5-A7', 'test2': 'A5-A9'}``
6764
and
65+
::
6866
69-
```yaml
70-
- test: A5-A7
71-
- test2: A5-A9
72-
```
67+
- test: A5-A7
68+
- test2: A5-A9
7369
74-
which yields `[{'test': 'A5-A7'}, {'test2': 'A5-A9'}]`
70+
which yields ``[{'test': 'A5-A7'}, {'test2': 'A5-A9'}]``
7571
7672
This module reads either of these formats. It iterates over each of the well specifications,
7773
building up a dictionary that maps wells to conditions. If multiple well specifications overlap,
7874
then condition names are merged in the order in which they appear, separated by a separator
7975
(by default, a period). This allows very concise condition layouts, such as the following:
76+
::
8077
81-
```yaml
82-
conditions:
83-
MEF: A1-C12
84-
293: D1-F12
85-
untransformed: A1-D3
86-
experimental: A4-D12
87-
```
78+
conditions:
79+
MEF: A1-C12
80+
293: D1-F12
81+
untransformed: A1-D3
82+
experimental: A4-D12
8883
8984
will return a well map of the form:
85+
::
9086
91-
```
92-
{'A1': 'MEF.untransformed', ..., 'C10: 293.experimental'}
93-
```
87+
{'A1': 'MEF.untransformed', ..., 'C10: 293.experimental'}
9488
95-
Both the non-normalized (e.g. no leading zeros, `A1`) and normalized
96-
(e.g. with leading zeros, `A01`) forms are returned for mapping.
89+
Both the non-normalized (e.g. no leading zeros, ``A1``) and normalized
90+
(e.g. with leading zeros, ``A01``) forms are returned for mapping.
9791
"""
9892

9993
import itertools

0 commit comments

Comments
 (0)