Pandas Tutorial

import pandas as pd
import numpy as np

from numpy.random import randn
np.random.seed(42)

df = pd.DataFrame(randn(5, 4), index="A B C D E".split(), columns='W X Y Z'.split())
df

Reset index

# Reset to default 0,1...n index
df.reset_index()

newind = 'CA NY WY OR CO'.split()

df['States'] = newind
df

df.set_index('States',inplace=True)
df

Groupby

import pandas as pd
# Create dataframe
data = {'Company':['GOOG','GOOG','MSFT','MSFT','FB','FB'],
       'Person':['Sam','Charlie','Amy','Vanessa','Carl','Sarah'],
       'Sales':[200,120,340,124,243,350]}
df = pd.DataFrame(data)

df

df.groupby('Company')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000018A0C6A41D0>

by_comp = df.groupby("Company")

by_comp.mean()

df.groupby('Company').mean()

by_comp.std()

by_comp.min()

by_comp.max()

by_comp.count()

by_comp.describe()

by_comp.describe().transpose()

by_comp.describe().transpose()['GOOG']

Sales  count      2.000000
       mean     160.000000
       std       56.568542
       min      120.000000
       25%      140.000000
       50%      160.000000
       75%      180.000000
       max      200.000000
Name: GOOG, dtype: float64

apply

df = pd.DataFrame({'col1':[1,2,3,4],'col2':[444,555,666,444],'col3':['abc','def','ghi','xyz']})
df.head()

def times2(x):
    return x*2

df['col1'].apply(times2)

0    2
1    4
2    6
3    8
Name: col1, dtype: int64

df['col3'].apply(len)

0    3
1    3
2    3
3    3
Name: col3, dtype: int64

df['col1'].sum()

10

	W	X	Y	Z
A	1.465649	-0.225776	0.067528	-1.424748
B	-0.544383	0.110923	-1.150994	0.375698
C	-0.600639	-0.291694	-0.601707	1.852278
D	-0.013497	-1.057711	0.822545	-1.220844
E	0.208864	-1.959670	-1.328186	0.196861

	index	W	X	Y	Z
0	A	1.465649	-0.225776	0.067528	-1.424748
1	B	-0.544383	0.110923	-1.150994	0.375698
2	C	-0.600639	-0.291694	-0.601707	1.852278
3	D	-0.013497	-1.057711	0.822545	-1.220844
4	E	0.208864	-1.959670	-1.328186	0.196861

	W	X	Y	Z	States
A	1.465649	-0.225776	0.067528	-1.424748	CA
B	-0.544383	0.110923	-1.150994	0.375698	NY
C	-0.600639	-0.291694	-0.601707	1.852278	WY
D	-0.013497	-1.057711	0.822545	-1.220844	OR
E	0.208864	-1.959670	-1.328186	0.196861	CO

	W	X	Y	Z
States
CA	1.465649	-0.225776	0.067528	-1.424748
NY	-0.544383	0.110923	-1.150994	0.375698
WY	-0.600639	-0.291694	-0.601707	1.852278
OR	-0.013497	-1.057711	0.822545	-1.220844
CO	0.208864	-1.959670	-1.328186	0.196861

	Company	Person	Sales
0	GOOG	Sam	200
1	GOOG	Charlie	120
2	MSFT	Amy	340
3	MSFT	Vanessa	124
4	FB	Carl	243
5	FB	Sarah	350

	Sales
	count	mean	std	min	25%	50%	75%	max
Company
FB	2.0	296.5	75.660426	243.0	269.75	296.5	323.25	350.0
GOOG	2.0	160.0	56.568542	120.0	140.00	160.0	180.00	200.0
MSFT	2.0	232.0	152.735065	124.0	178.00	232.0	286.00	340.0

	Company	FB	GOOG	MSFT
Sales	count	2.000000	2.000000	2.000000
	mean	296.500000	160.000000	232.000000
	std	75.660426	56.568542	152.735065
	min	243.000000	120.000000	124.000000
	25%	269.750000	140.000000	178.000000
	50%	296.500000	160.000000	232.000000
	75%	323.250000	180.000000	286.000000
	max	350.000000	200.000000	340.000000

	col1	col2	col3
0	1	444	abc
1	2	555	def
2	3	666	ghi
3	4	444	xyz