#Pandas Cheat Sheet Mostly collected from Stackflow answers.
##Pandas
df refers to Pandas data frame
###Print unique value of a pandas column
print df['column_name'].unique()
df['column_name'].mean()
###Get the datatype of pandas column
df.column_name.dtype###Drop a particular column Ref:
df = df.drop('column_name', 1)
df.drop([Column Name or list],inplace=True,axis=1) #will delete one or more columns inplace.Dont use inplace if you don't want to change the original df
###Select column using column name ####Select single column
df['column name']df1 = df[['column_name_1', 'column_name_2']]####Change column name using column index no Ref: See the comments
df.columns.values[x] = 'new column name'x = column no
my_columns = ['a', 'b', 'c']
df.columns = my_columnsdf.index.name = 'new_name'print df.column.values[x]###Date Time Index
format = '%d/%m/%y %H:%M:%S'
#pd = pandas
df['Date_Time'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], format=format)
df.set_index(df['Date_Time'], inplace=True)print df['2014-05-20']['column_name']df = df.resample('D', how=np.sum) # D for daydf_30_min = df_15_min.resample('30Min', how=np.sum, label='right', closed='right')label ='right' and closed='right' makes sure that value between 1.30 and 2.00 is assigned to 2.00 ###Merge two dataframes Ref:
df3 = df1.join(df2, how='right')| Merge | Description |
|---|---|
| left | Use keys from left frame only |
| right | Use keys from right frame only |
| outer | Use union of keys from both frames |
| inner | Use intersection of keys from both frames |
cols = df.columns.tolist()
cols = cols[-1:] + cols[:-1] # bring last column to first
df = df[cols]df['new_column_name'] = (df['column'] == value).astype(int)def datesep(df):
"""
:param df: dataframe
:param column_name: date column name
:return: date array, month array, year array
"""
date = pd.DatetimeIndex(df.index).day
month = pd.DatetimeIndex(df.index).month
year = pd.DatetimeIndex(df.index).year
return date, month, year#Instead of using `x = x + y` use
x +=y
#<x> <operator>= ydf.ix[row,column]series = df['column_name']['2014-09-01':'2014-09-15']
df_new = pd.Dataframe([series[i] for i in range(len(series))], columns=['column_name'], index=series.index)##Matplotlib Plots ###Time Series Bar plots with dates in X-axis
fig= plt.figure()
plt.bar(df.index, df['column_name'], width=0.35, color='b')
plt.bar(df_1.index, df_1['column_name'],width=0.35, color='g')
fig.autofmt_xdate(rotation=90)
plt.show()http://stackoverflow.com/questions/5902371/matplotlib-bar-chart-with-dates
###Using Tex, Latex in matplotlib plots
"""
{x : .1f} x refers to position of string in .format(starts with 0)
and .1 refers to one decimal approximation,
use .2 for two decimal places
use .0f for without decimal places
"""
plt.text(x=-0.25, y=3000, fontsize=15,
s=r"\textbf{{$ y = {0:1f} x^2 + {1:.1f} x + {2:..1f} $}}".format(coeff_stage_area_cal[0],
coeff_stage_area_cal[1],
coeff_stage_area_cal[2]))####String formatting involving power
plt.text(x=0.15, y=11, fontsize=15, s=r'$Infiltration = {0:.2f}h^{{{1:.2f}}}$'.format(popt[0], popt[1]))###String Formatting General Python
| Conversion | Meaning |
|---|---|
'd' |
Signed integer decimal. |
'i' |
Signed integer decimal. |
'e' |
Floating point exponential format (lowercase). |
'E' |
Floating point exponential format (uppercase). |
'f' |
Floating point decimal format. |
'F' |
Floating point decimal format. |
def f2(seq):
checked = []
for e in seq:
if e not in checked:
checked.append(e)
return checked###Formats
-%d is the day number
-%m is the month number
-%b is the month abbreviation
-%y is the year last two digits
-%Y is the all year
delta = date_1 - date_2
print delta.daysdate_1, date_2 should be a datetime object, or use date(2009, 09, 24) Ref:
def my_add(a, b):
total = a + b This section is for my reference. It will contain sections from tutorials which helped me to grasp concepts. I am starting with Python Class .
Source: https://docs.python.org/2/tutorial/classes.html
class Dog:
kind = 'canine' # class variable shared by all instances
def __init__(self, name):
self.name = name # instance variable unique to each instance
>>> d = Dog('Fido')
>>> e = Dog('Buddy')
>>> d.kind # shared by all dogs
'canine'
>>> e.kind # shared by all dogs
'canine'
>>> d.name # unique to d
'Fido'
>>> e.name # unique to e
'Buddy' class Dog:
def __init__(self, name):
self.name = name
self.tricks = [] # creates a new empty list for each dog
def add_trick(self, trick):
self.tricks.append(trick)
>>> d = Dog('Fido')
>>> e = Dog('Buddy')
>>> d.add_trick('roll over')
>>> e.add_trick('play dead')
>>> d.tricks
['roll over']
>>> e.tricks
['play dead']
def f1(self, x, y):
return min(x, x+y)
class C:
f = f1
def g(self):
return 'hello world'
h = gNow
f,gandhare all attributes of class :class:Cthat refer to function objects, and consequently they are all methods of instances of :class:C---hbeing exactly equivalent tog. Note that this practice usually only serves to confuse the reader of a program.
class Bag:
def __init__(self):
self.data = []
def add(self, x):
self.data.append(x)
def addtwice(self, x):
self.add(x)
self.add(x)