@@ -225,162 +225,3 @@ def load_plates_with_metadata(
225225 return data
226226
227227
228- def calculate_standard (
229- df : pd .DataFrame ,
230- amt_col : str ,
231- cp_col : str ,
232- ax : Optional [matplotlib .axes ] = None
233- ) -> List [scipy .stats ._stats_py .LinregressResult , float ]:
234- """
235- Calculate a standard curve for qPCR data.
236-
237- For the given data, treats the values in 'amt_col' as
238- input amounts and values in 'cp_col' as the corresponding
239- cycle counts (Cp, aka Ct) from the qPCR output. Computes a linear
240- regression on log10(amount) vs Cp, and returns this fit as well
241- as the efficiency.
242-
243- If axes are passed, plots the linear fit on the data, annotating
244- the R^2 value and efficiency.
245-
246- Parameters
247- ----------
248- df: pandas DataFrame
249- Data to use to fit.
250- amt_col: str
251- Name of column containing input amounts.
252- cp_col: str
253- Name of the column containing Cp values.
254- ax: matplotlib.axes (optional)
255- Axes on which to plot the data and fit.
256-
257- Returns
258- -------
259- A tuple of the fit (output of a call to scipy.stats.linregress)
260- and the calculated efficiency (float).
261- """
262- if amt_col not in df .columns :
263- raise ColumnError (f"Data is missing the 'amt_col' column { amt_col } " )
264- if cp_col not in df .columns :
265- raise ColumnError (f"Data is missing the 'cp_col' column { cp_col } " )
266-
267- # Remove zero values and log10-transform input amounts
268- df_subset = df [df [amt_col ]> 0 ].copy ()
269- df_subset ['log10_' + amt_col ] = df_subset [amt_col ].astype (float ).apply (np .log10 )
270-
271- # Fit data
272- x = df_subset ['log10_' + amt_col ]
273- y = df_subset [cp_col ].astype (float )
274- fit = scipy .stats .linregress (x , y )
275- efficiency = (10 ** (- 1 / fit .slope ) - 1 )* 100 # percentage
276-
277- # Plot result
278- if ax is not None :
279- ax .scatter (df [amt_col ], df [cp_col ], label = 'data' , ec = 'white' , lw = 0.75 )
280- xs = np .logspace (min (df_subset ['log10_' + amt_col ]), max (df_subset ['log10_' + amt_col ]), 1000 )
281- ys = fit .slope * np .log10 (xs ) + fit .intercept
282- ax .plot (xs , ys , color = 'crimson' , label = 'linear\n regression' )
283- ax .set_xscale ('symlog' , linthresh = min (df_subset [amt_col ]))
284- pad = 0.01
285- ax .legend (loc = 'upper right' , bbox_to_anchor = (1 - pad , 1 - pad ))
286- ax .annotate (f'$R^2$: { abs (fit .rvalue ):0.3f} ' , (0 + pad * 2 , 0.1 ), xycoords = 'axes fraction' ,
287- ha = 'left' , va = 'bottom' , size = 'medium' )
288- ax .annotate (f'Efficiency: { efficiency :0.1f} %' , (0 + pad * 2 , 0 + pad * 2 ), xycoords = 'axes fraction' ,
289- ha = 'left' , va = 'bottom' , size = 'medium' )
290-
291- return fit , efficiency
292-
293-
294- def calculate_input_amount (
295- y : float , # TODO: list of float
296- fit : Union [scipy .stats ._stats_py .LinregressResult , List [float ]],
297- ) -> float :
298- """
299- Given a cycle count (Cp, aka Ct value) and a linear regression fit,
300- compute the amount of input.
301-
302- Note that the linear regression fit is expected to have been performed
303- on the log10-transform of the input amounts. Units of the returned value
304- match those of the non-transformed input amount data.
305-
306- Parameters
307- ----------
308- y: float
309- Cycle count (Cp, aka Ct value).
310- fit: scipy LinregressResult object or list of two floats
311- Linear fit to use. Accepts either the output of a call
312- to scipy.stats.linregress or a list of the fit values
313- [slope, intercept].
314-
315- Returns
316- -------
317- A float of the calculated amount.
318- """
319- if type (fit ) is scipy .stats ._stats_py .LinregressResult :
320- return 10 ** ((float (y )- fit .intercept )/ fit .slope )
321- if len (fit ) < 2 :
322- raise InputError ("'fit' is expected to be a list containing [slope, intercept]. Alternatively, pass a scipy LinregressResult object." )
323- return 10 ** ((float (y )- fit [1 ])/ fit [0 ])
324-
325- # TODO: add 'type' arg for dsDNA, ssDNA, ssRNA
326- def convert_moles_to_mass (
327- moles : Union [float , List [float ]],
328- length : Union [float , List [float ]]
329- ) -> Union [float , List [float ]]:
330- """
331- For a given amount of DNA in moles, use its length
332- to calculate its mass.
333-
334- Formula from NEB:
335- g = mol x (bp x 615.94 g/mol/bp + 36.04 g/mol)
336- - mass of dsDNA (g) = moles dsDNA x (molecular weight of dsDNA (g/mol))
337- - molecular weight of dsDNA = (number of base pairs of dsDNA x average molecular weight of a base pair) + 36.04 g/mol
338- - average molecular weight of a base pair = 615.94 g/mol, excluding the water molecule removed during polymerization
339- and assuming deprotonated phosphate hydroxyls
340- - the additional 36.04 g/mol accounts for the 2 -OH and 2 -H added back to the ends
341- - bases are assumed to be unmodified
342-
343- Parameters
344- ----------
345- moles: float or list of float
346- Amount of dsDNA in moles.
347- length: float or list of float
348- Number of base pairs of the dsDNA (or average length of a heterogeneous sample).
349-
350- Returns
351- -------
352- A float or list of floats of the calculated mass in grams.
353- """
354- return np .array (moles ) * (np .array (length ) * 615.96 + 36.04 )
355-
356-
357- # TODO: add 'type' arg for dsDNA, ssDNA, ssRNA
358- def convert_mass_to_moles (
359- mass : Union [float , List [float ]],
360- length : Union [float , List [float ]]
361- ) -> Union [float , List [float ]]:
362- """
363- For a given amount of DNA in moles, use its length
364- to calculate its mass.
365-
366- Formula from NEB:
367- mol = g / (bp x 615.94 g/mol/bp + 36.04 g/mol)
368- - moles dsDNA = mass of dsDNA (g) / (molecular weight of dsDNA (g/mol))
369- - molecular weight of dsDNA = (number of base pairs of dsDNA x average molecular weight of a base pair) + 36.04 g/mol
370- - average molecular weight of a base pair = 615.94 g/mol, excluding the water molecule removed during polymerization
371- and assuming deprotonated phosphate hydroxyls
372- - the additional 36.04 g/mol accounts for the 2 -OH and 2 -H added back to the ends
373- - bases are assumed to be unmodified
374-
375- Parameters
376- ----------
377- mass: float or list of float
378- Mass of dsDNA in grams.
379- length: float or list of float
380- Number of base pairs of the dsDNA (or average length of a heterogeneous sample).
381-
382- Returns
383- -------
384- A float or list of floats of the calculated amount in moles.
385- """
386- return np .array (mass ) / (np .array (length ) * 615.96 + 36.04 )
0 commit comments