Skip to content

Commit 5da48f7

Browse files
committed
removed wip features, see future_features branch instead
1 parent 20209c1 commit 5da48f7

File tree

3 files changed

+0
-215
lines changed

3 files changed

+0
-215
lines changed

src/rushd/ddpcr.py

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -177,52 +177,3 @@ def load_ddpcr(
177177

178178
return data
179179

180-
181-
def calculate_copy_number(
182-
df: pd.DataFrame,
183-
exp_channel: str,
184-
ref_channel: str,
185-
gates: Dict[str, float],
186-
*,
187-
ref_copy_num: float = 2.0,
188-
) -> pd.DataFrame:
189-
"""
190-
Calculates copy number of an experimental target relative to a
191-
reference target.
192-
193-
Adds a column to the DataFrame with this computed value.
194-
Math is based on ... TODO
195-
196-
Parameters
197-
----------
198-
df: pandas DataFrame
199-
Data on which to calculate. Must contain columns corresponding
200-
to the experimental and reference channels.
201-
exp_channel: str
202-
Column in df containing measurements for the experimental target.
203-
ref_channel: str
204-
Column in df containing measurements for the reference target.
205-
gates: dict of (str: float) pairs
206-
Gates specifying threshold for positive droplets, one for each
207-
experimental and reference channel.
208-
ref_copy_num: float, default 2.0
209-
Known copy number of the reference gene. If not specified, defaults
210-
to 2.0 (diploid).
211-
212-
Returns
213-
-------
214-
The original DataFrame with a new column 'copy_num' containing the computed
215-
values.
216-
"""
217-
# TODO: throw error if channels not in df
218-
# TODO: check if there are no (negative) droplets before calculating
219-
data_exp = df[exp_channel]
220-
data_ref = df[ref_channel]
221-
222-
# Compute copies per droplet: -ln(num_negative / num_total)
223-
copies_exp = -np.log((data_exp < gates[exp_channel]).sum() / len(data_exp))
224-
copies_ref = -np.log((data_ref < gates[ref_channel]).sum() / len(data_ref))
225-
226-
# Normalize copies to the reference gene
227-
df['copy_num'] = copies_exp / copies_ref * ref_copy_num
228-
return df

src/rushd/flow.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -332,13 +332,6 @@ def load_csv(
332332
return data
333333

334334

335-
# draw & optionally assign gates based on quantile (df, channels, quantile(s), multi_gate) -> (df, gates)
336-
# assign '[channel]_positive', assign multi-gate '[channel1]_[channel2]_gated'
337-
# assign gates only (df, gates, multi_gate) -> (df)
338-
# calculate stats (df, by, channels, stat_list) -> (df_stats)
339-
340-
341-
# Rewrite as 'calculate_titer'
342335
def moi(
343336
data_frame: pd.DataFrame,
344337
color_column_name: str,

src/rushd/qpcr.py

Lines changed: 0 additions & 159 deletions
Original file line numberDiff line numberDiff line change
@@ -225,162 +225,3 @@ def load_plates_with_metadata(
225225
return data
226226

227227

228-
def calculate_standard(
229-
df: pd.DataFrame,
230-
amt_col: str,
231-
cp_col: str,
232-
ax: Optional[matplotlib.axes] = None
233-
) -> List[scipy.stats._stats_py.LinregressResult, float]:
234-
"""
235-
Calculate a standard curve for qPCR data.
236-
237-
For the given data, treats the values in 'amt_col' as
238-
input amounts and values in 'cp_col' as the corresponding
239-
cycle counts (Cp, aka Ct) from the qPCR output. Computes a linear
240-
regression on log10(amount) vs Cp, and returns this fit as well
241-
as the efficiency.
242-
243-
If axes are passed, plots the linear fit on the data, annotating
244-
the R^2 value and efficiency.
245-
246-
Parameters
247-
----------
248-
df: pandas DataFrame
249-
Data to use to fit.
250-
amt_col: str
251-
Name of column containing input amounts.
252-
cp_col: str
253-
Name of the column containing Cp values.
254-
ax: matplotlib.axes (optional)
255-
Axes on which to plot the data and fit.
256-
257-
Returns
258-
-------
259-
A tuple of the fit (output of a call to scipy.stats.linregress)
260-
and the calculated efficiency (float).
261-
"""
262-
if amt_col not in df.columns:
263-
raise ColumnError(f"Data is missing the 'amt_col' column {amt_col}")
264-
if cp_col not in df.columns:
265-
raise ColumnError(f"Data is missing the 'cp_col' column {cp_col}")
266-
267-
# Remove zero values and log10-transform input amounts
268-
df_subset = df[df[amt_col]>0].copy()
269-
df_subset['log10_'+amt_col] = df_subset[amt_col].astype(float).apply(np.log10)
270-
271-
# Fit data
272-
x = df_subset['log10_'+amt_col]
273-
y = df_subset[cp_col].astype(float)
274-
fit = scipy.stats.linregress(x, y)
275-
efficiency = (10**(-1/fit.slope) - 1)*100 # percentage
276-
277-
# Plot result
278-
if ax is not None:
279-
ax.scatter(df[amt_col], df[cp_col], label='data', ec='white', lw=0.75)
280-
xs = np.logspace(min(df_subset['log10_'+amt_col]), max(df_subset['log10_'+amt_col]), 1000)
281-
ys = fit.slope * np.log10(xs) + fit.intercept
282-
ax.plot(xs, ys, color='crimson', label='linear\nregression')
283-
ax.set_xscale('symlog', linthresh=min(df_subset[amt_col]))
284-
pad = 0.01
285-
ax.legend(loc='upper right', bbox_to_anchor=(1-pad, 1-pad))
286-
ax.annotate(f'$R^2$: {abs(fit.rvalue):0.3f}', (0+pad*2, 0.1), xycoords='axes fraction',
287-
ha='left', va='bottom', size='medium')
288-
ax.annotate(f'Efficiency: {efficiency:0.1f}%', (0+pad*2, 0+pad*2), xycoords='axes fraction',
289-
ha='left', va='bottom', size='medium')
290-
291-
return fit, efficiency
292-
293-
294-
def calculate_input_amount(
295-
y: float, # TODO: list of float
296-
fit: Union[scipy.stats._stats_py.LinregressResult, List[float]],
297-
) -> float:
298-
"""
299-
Given a cycle count (Cp, aka Ct value) and a linear regression fit,
300-
compute the amount of input.
301-
302-
Note that the linear regression fit is expected to have been performed
303-
on the log10-transform of the input amounts. Units of the returned value
304-
match those of the non-transformed input amount data.
305-
306-
Parameters
307-
----------
308-
y: float
309-
Cycle count (Cp, aka Ct value).
310-
fit: scipy LinregressResult object or list of two floats
311-
Linear fit to use. Accepts either the output of a call
312-
to scipy.stats.linregress or a list of the fit values
313-
[slope, intercept].
314-
315-
Returns
316-
-------
317-
A float of the calculated amount.
318-
"""
319-
if type(fit) is scipy.stats._stats_py.LinregressResult:
320-
return 10**((float(y)-fit.intercept)/fit.slope)
321-
if len(fit) < 2:
322-
raise InputError("'fit' is expected to be a list containing [slope, intercept]. Alternatively, pass a scipy LinregressResult object.")
323-
return 10**((float(y)-fit[1])/fit[0])
324-
325-
# TODO: add 'type' arg for dsDNA, ssDNA, ssRNA
326-
def convert_moles_to_mass(
327-
moles: Union[float, List[float]],
328-
length: Union[float, List[float]]
329-
) -> Union[float, List[float]]:
330-
"""
331-
For a given amount of DNA in moles, use its length
332-
to calculate its mass.
333-
334-
Formula from NEB:
335-
g = mol x (bp x 615.94 g/mol/bp + 36.04 g/mol)
336-
- mass of dsDNA (g) = moles dsDNA x (molecular weight of dsDNA (g/mol))
337-
- molecular weight of dsDNA = (number of base pairs of dsDNA x average molecular weight of a base pair) + 36.04 g/mol
338-
- average molecular weight of a base pair = 615.94 g/mol, excluding the water molecule removed during polymerization
339-
and assuming deprotonated phosphate hydroxyls
340-
- the additional 36.04 g/mol accounts for the 2 -OH and 2 -H added back to the ends
341-
- bases are assumed to be unmodified
342-
343-
Parameters
344-
----------
345-
moles: float or list of float
346-
Amount of dsDNA in moles.
347-
length: float or list of float
348-
Number of base pairs of the dsDNA (or average length of a heterogeneous sample).
349-
350-
Returns
351-
-------
352-
A float or list of floats of the calculated mass in grams.
353-
"""
354-
return np.array(moles) * (np.array(length) * 615.96 + 36.04)
355-
356-
357-
# TODO: add 'type' arg for dsDNA, ssDNA, ssRNA
358-
def convert_mass_to_moles(
359-
mass: Union[float, List[float]],
360-
length: Union[float, List[float]]
361-
) -> Union[float, List[float]]:
362-
"""
363-
For a given amount of DNA in moles, use its length
364-
to calculate its mass.
365-
366-
Formula from NEB:
367-
mol = g / (bp x 615.94 g/mol/bp + 36.04 g/mol)
368-
- moles dsDNA = mass of dsDNA (g) / (molecular weight of dsDNA (g/mol))
369-
- molecular weight of dsDNA = (number of base pairs of dsDNA x average molecular weight of a base pair) + 36.04 g/mol
370-
- average molecular weight of a base pair = 615.94 g/mol, excluding the water molecule removed during polymerization
371-
and assuming deprotonated phosphate hydroxyls
372-
- the additional 36.04 g/mol accounts for the 2 -OH and 2 -H added back to the ends
373-
- bases are assumed to be unmodified
374-
375-
Parameters
376-
----------
377-
mass: float or list of float
378-
Mass of dsDNA in grams.
379-
length: float or list of float
380-
Number of base pairs of the dsDNA (or average length of a heterogeneous sample).
381-
382-
Returns
383-
-------
384-
A float or list of floats of the calculated amount in moles.
385-
"""
386-
return np.array(mass) / (np.array(length) * 615.96 + 36.04)

0 commit comments

Comments
 (0)