11"""
22A submodule implementing common IO handling mechanisms.
33
4- ## Rationale
4+ Rationale
5+ ---------
56File and folder management is a common problem when
67handling large datasets. You often want to separate
78out large data from your code. How do you keep track
89of where your data is, especially if moving between
910different computers/clusters?
1011
11- `rushd.io` adds convenience functions to handle
12+ `` rushd.io` ` adds convenience functions to handle
1213common cases, as well as writing metadata with
1314your output files that identify input files.
1415"""
@@ -124,6 +125,12 @@ def git_version() -> Optional[str]:
124125 if there are edits that have not been saved.
125126 Returns None if the current working directory is
126127 not contained within a git repository.
128+
129+ Returns
130+ -------
131+ str or None
132+ Current version control state as a string, or None if the current working directory is not
133+ contained within a git repository
127134 """
128135 git_log = subprocess .run (
129136 ["git" , "log" , "-n1" , "--format=format:%H" ], check = False , capture_output = True
@@ -158,7 +165,8 @@ def _is_relative_to(path: Path, base_path: Path) -> bool:
158165
159166 Returns
160167 -------
161- True if `path` can be written as a relative path to `base_path`, False otherwise
168+ bool
169+ True if `path` can be written as a relative path to `base_path`, False otherwise
162170 """
163171 try :
164172 _ = path .relative_to (base_path )
@@ -188,7 +196,8 @@ def infile(filename: Union[str, Path], tag: Optional[str] = None, should_hash: b
188196
189197 Returns
190198 -------
191- A Path object that represents the same file as `filename`.
199+ Path
200+ A Path object that represents the same file as `filename`.
192201 """
193202 if not isinstance (filename , Path ):
194203 filename = Path (filename )
@@ -225,6 +234,20 @@ def outfile(filename: Union[str, Path], tag: Optional[str] = None) -> Path:
225234 Any needed subdirectories will be created if the outfile is relative
226235 to datadir or rootdir.
227236
237+ Example
238+ -------
239+ For output file `out.txt`, writes a YAML file `out.txt.yaml`
240+ that encodes the following type of metadata:
241+ ::
242+
243+ type: tracked_outfile
244+ name: out.txt
245+ date: 2022-01-31
246+ git_version: 13a81aa2a7b1035f6b59c2323b0a7c457eb1657e
247+ dependencies:
248+ - file: some_infile.csv
249+ path_type: datadir_relative
250+
228251 Parameters
229252 ----------
230253 filename: str or Path
@@ -234,22 +257,8 @@ def outfile(filename: Union[str, Path], tag: Optional[str] = None) -> Path:
234257
235258 Returns
236259 -------
237- A Path object that represents the same file as `filename`.
238-
239- Side-effects
240- ------------
241- For output file `out.txt`, writes a YAML file `out.txt.yaml`
242- that encodes the following type of metadata:
243-
244- ```yaml
245- type: tracked_outfile
246- name: out.txt
247- date: 2022-01-31
248- git_version: 13a81aa2a7b1035f6b59c2323b0a7c457eb1657e
249- dependencies:
250- - file: some_infile.csv
251- path_type: datadir_relative
252- ```
260+ Path
261+ A Path object that represents the same file as `filename`.
253262 """
254263 if not isinstance (filename , Path ):
255264 filename = Path (filename )
@@ -326,9 +335,10 @@ def cache_dataframe(cache_path: Union[Path, str]) -> Callable[..., Callable[...,
326335
327336 Returns
328337 -------
329- A function that generates a dataframe with optional caching.
330- An extra keyword argument, 'invalidate' is added that invalidates
331- the cache if needed
338+ Callable[..., Callable[..., pd.DataFrame]]
339+ A function that generates a dataframe with optional caching.
340+ An extra keyword argument, 'invalidate' is added that invalidates
341+ the cache if needed
332342 """
333343 if not isinstance (cache_path , Path ):
334344 savepath = Path (cache_path )
0 commit comments