Pandas Advanced Tutorial
A tutorial on pandas
import pandas as pd
import numpy as np
from numpy.random import randn
np.random.seed(42)
df = pd.DataFrame(randn(5, 4), index="A B C D E".split(), columns='W X Y Z'.split())
df
# Reset to default 0,1...n index
df.reset_index()
newind = 'CA NY WY OR CO'.split()
df['States'] = newind
df
df.set_index('States',inplace=True)
df
import pandas as pd
# Create dataframe
data = {'Company':['GOOG','GOOG','MSFT','MSFT','FB','FB'],
'Person':['Sam','Charlie','Amy','Vanessa','Carl','Sarah'],
'Sales':[200,120,340,124,243,350]}
df = pd.DataFrame(data)
df
df.groupby('Company')
by_comp = df.groupby("Company")
by_comp.mean()
df.groupby('Company').mean()
by_comp.std()
by_comp.min()
by_comp.max()
by_comp.count()
by_comp.describe()
by_comp.describe().transpose()
by_comp.describe().transpose()['GOOG']
df = pd.DataFrame({'col1':[1,2,3,4],'col2':[444,555,666,444],'col3':['abc','def','ghi','xyz']})
df.head()
def times2(x):
return x*2
df['col1'].apply(times2)
df['col3'].apply(len)
df['col1'].sum()