Skip to main content

Sham Sui Po, Hong Kong

Seaborn Cheat Sheet 2023

Github Repository

import numpy  as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

Datasets

Office Sales

!wget https://raw.githubusercontent.com/ashok-python/data/master/dm_office_sales.csv -P datasets
office_sales_df = pd.read_csv('datasets/dm_office_sales.csv')
office_sales_df.head(3)
divisionlevel of educationtraining levelwork experiencesalarysales
0printerssome college2691684372302
1printersassociate's degree210119679495660
2peripheralshigh school0982045320453

Student Performance

!wget https://raw.githubusercontent.com/rashida048/Datasets/master/StudentsPerformance.csv -P datasets
students_performance_df = pd.read_csv('datasets/StudentsPerformance.csv')
students_performance_df.head(3)
genderrace/ethnicityparental level of educationlunchtest preparation coursemath scorereading scorewriting score
0femalegroup Bbachelor's degreestandardnone727274
1femalegroup Csome collegestandardcompleted699088
2femalegroup Bmaster's degreestandardnone909593

Toronto Weather

!wget https://github.com/datagy/mediumdata/raw/master/toronto-weather.xlsx -P datasets
cols = ['LOCAL_DATE', 'MEAN_TEMPERATURE']
renames = {'LOCAL_DATE': 'Date', 'MEAN_TEMPERATURE': 'Temperature'}

country_table_df = pd.read_excel('datasets/toronto-weather.xlsx', usecols=cols, parse_dates=['LOCAL_DATE'])
country_table_df.rename(columns=renames, inplace=True)
country_table_df['Day'] = country_table_df['Date'].dt.day
country_table_df['Month'] = country_table_df['Date'].dt.month
country_table_df = country_table_df[country_table_df['Day'] <= 28]

country_table_df = pd.pivot_table(
data=country_table_df,
index='Month',
columns='Day',
values='Temperature'
)

country_table_df.iloc[:3, :3]

Credit Card Approval Prediction

!wget https://raw.githubusercontent.com/BrunoHerick/analiseCartaoCredito/main/application_record.csv -P datasets
credit_approv_df = pd.read_csv('datasets/application_record.csv')
credit_approv_df.head(3).transpose()
012
ID500880450088055008806
CODE_GENDERMMM
FLAG_OWN_CARYYY
FLAG_OWN_REALTYYYY
CNT_CHILDREN000
AMT_INCOME_TOTAL427500.0427500.0112500.0
NAME_INCOME_TYPEWorkingWorkingWorking
NAME_EDUCATION_TYPEHigher educationHigher educationSecondary / secondary special
NAME_FAMILY_STATUSCivil marriageCivil marriageMarried
NAME_HOUSING_TYPERented apartmentRented apartmentHouse / apartment
DAYS_BIRTH-12005-12005-21474
DAYS_EMPLOYED-4542-4542-1134
FLAG_MOBIL111
FLAG_WORK_PHONE110
FLAG_PHONE000
FLAG_EMAIL000
OCCUPATION_TYPENaNNaNSecurity staff
CNT_FAM_MEMBERS222
CLASSIFICAObombombom

Tips

!wget https://raw.githubusercontent.com/plotly/datasets/master/tips.csv -P datasets
tips_df = pd.read_csv('datasets/tips.csv')
tips_df.head(3).transpose()
012
total_bill16.9910.3421.01
tip1.011.663.5
sexFemaleMaleMale
smokerNoNoNo
daySunSunSun
timeDinnerDinnerDinner
size233

Scatter Plots

plt.figure(figsize=(10, 7))

sns.set(style='darkgrid')

# hue/style by categorical column
sns.scatterplot(
x='salary',
y='sales',
data=office_sales_df,
s=40,
alpha=0.6,
hue='level of education',
palette='nipy_spectral',
style='division'
).set_title('Salary vs Sales')

plt.savefig('assets/Seaborn_Cheat_Sheet_01.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

plt.figure(figsize=(10, 6))

# hue/size by continuous column
sns.scatterplot(
x='salary',
y='sales',
data=office_sales_df,
hue='work experience',
palette='cool',
size='training level'
).set_title('Salary vs Sales')

plt.savefig('assets/Seaborn_Cheat_Sheet_02.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

plt.figure(figsize=(10, 6))

credit_approv_df['DAYS_BIRTH_INV'] = credit_approv_df['DAYS_BIRTH'].apply(lambda num: num*(-1))
credit_approv_df['DAYS_EMPLOYED_INV'] = credit_approv_df['DAYS_EMPLOYED'].apply(lambda num: num*(-1))

# hue/size by continuous column
plot = sns.scatterplot(
x='DAYS_BIRTH_INV',
y='DAYS_EMPLOYED_INV',
data=credit_approv_df,
hue='CODE_GENDER',
palette='winter',
size='AMT_INCOME_TOTAL',
alpha=0.3
)

plot.set_title('Age vs Days Employed by Gender and Total Income')
plot.set_ylim(0, 17500)

plt.savefig('assets/Seaborn_Cheat_Sheet_20.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Continuous Distribution Plots

Rug Plot

plt.figure(figsize=(10, 3))
plt.title('Salary Distribution based on Training')

sns.rugplot(
data=office_sales_df,
x='salary',
height=0.75,
hue='training level',
palette='gist_rainbow'
)

plt.savefig('assets/Seaborn_Cheat_Sheet_03.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Histogram

plt.figure(figsize=(10, 5))
plt.title('Salary Distribution based on Training')

sns.histplot(
data=office_sales_df,
x='salary',
bins=50,
hue='training level',
palette='winter',
kde=True
)

plt.savefig('assets/Seaborn_Cheat_Sheet_04.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

credit_approv_df['AGE_YEARS'] = credit_approv_df['DAYS_BIRTH'].apply(lambda num: round(num/(-365)))

plt.figure(figsize=(10, 5))
plt.title('Age in Years Distribution')

sns.histplot(
data=credit_approv_df,
x='AGE_YEARS',
bins=45,
element='step',
hue='CODE_GENDER',
palette='tab20'
)

plt.savefig('assets/Seaborn_Cheat_Sheet_21.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Kernel Density Estimation

plt.figure(figsize=(10, 5))
plt.title('Salary Distribution based on Training')

sns.kdeplot(
data=office_sales_df,
clip=[
office_sales_df['salary'].min(),
office_sales_df['salary'].max()
],
x='salary',
hue='training level',
palette='viridis',
fill=True
)

plt.savefig('assets/Seaborn_Cheat_Sheet_05.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Categorical Plots

Count Plot

plt.figure(figsize=(10, 5))
plt.title('Sales Personal per Division by Training')

sns.countplot(
data=office_sales_df,
x='division',
hue='training level',
palette='seismic'
)

plt.savefig('assets/Seaborn_Cheat_Sheet_06.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Barplot

plt.figure(figsize=(10, 5))
plt.title('Mean Salary based on Education and Division')
sns.set(style='darkgrid')
sns.barplot(
data=office_sales_df,
x='level of education',
y='salary',
estimator=np.mean,
errorbar='sd',
hue='division',
palette='magma'
)
plt.legend(bbox_to_anchor=(1.01,1.01))
plt.savefig('assets/Seaborn_Cheat_Sheet_07.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Categorical Distribution Plots

Boxplot

plt.figure(figsize=(10, 5))
plt.title('Math Score by Gender and Prep Course Attendance')

sns.boxplot(
data=students_performance_df,
y='gender',
x='math score',
hue='test preparation course',
palette='cool',
orient='h'
)
plt.savefig('assets/Seaborn_Cheat_Sheet_08.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

ten_percent = round((len(credit_approv_df)/100*10)) 

plt.figure(figsize=(10, 5))
plt.title('Lower 10% Total Income by Family Status and House Ownership')

plot = sns.boxplot(
data=credit_approv_df.tail(ten_percent),
y='AMT_INCOME_TOTAL',
x='NAME_FAMILY_STATUS',
hue='FLAG_OWN_REALTY',
palette='winter',
orient='v',
linewidth=0.5,
fliersize=1
)

plot.set_ylim(
(credit_approv_df.tail(ten_percent)['AMT_INCOME_TOTAL'].min() - 10000),
(credit_approv_df.tail(ten_percent)['AMT_INCOME_TOTAL'].max() + 10000)
)

plt.savefig('assets/Seaborn_Cheat_Sheet_22.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Violinplot

plt.figure(figsize=(12, 5))
plt.title('Tips Distribution')

sns.violinplot(
x=tips_df['tip'],
color='mediumspringgreen'
)

plt.savefig('assets/Seaborn_Cheat_Sheet_25.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

plt.figure(figsize=(10, 5))
plt.title('Math Score by Gender and Prep Course Attendance')

sns.violinplot(
data=students_performance_df,
x='gender',
y='math score',
hue='test preparation course',
palette='cool',
orient='v',
inner='quartile',
bw=0.3,
split=True
)
plt.legend(loc='lower right')
plt.savefig('assets/Seaborn_Cheat_Sheet_09.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Swarmplot

plt.figure(figsize=(10, 5))
plt.title('Math Score by Gender and Parental Education')

sns.swarmplot(
data=students_performance_df,
x='math score',
y='gender',
hue='parental level of education',
palette='tab10'
)
plt.legend(loc='lower left')
plt.savefig('assets/Seaborn_Cheat_Sheet_10.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

colour_palette = ['dodgerblue', 'mediumspringgreen']

plt.figure(figsize=(12, 5))
plt.title('Tips by Day and Time of the Day')

sns.swarmplot(
data=tips_df,
x='day',
y='tip',
hue='time',
palette=colour_palette
)
plt.legend(loc='upper right')
plt.savefig('assets/Seaborn_Cheat_Sheet_24.webp', bbox_inches='tight')
![Seaborn Cheat Sheet 2023](./Seaborn_Cheat_Sheet_24.webp)

Boxenplot

plt.figure(figsize=(10, 5))
plt.title('Math Score by Gender and Prep Course Attendance')

sns.boxenplot(
data=students_performance_df,
x='gender',
y='math score',
hue='test preparation course',
palette='seismic',
orient='v'
)
plt.savefig('assets/Seaborn_Cheat_Sheet_11.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Comparison Plots

Jointplot

sns.jointplot(
data=students_performance_df,
x='reading score',
y='math score',
kind='scatter',
hue='gender',
palette='winter',
alpha=0.4
)

plt.savefig('assets/Seaborn_Cheat_Sheet_12.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

plot = sns.jointplot(
data=students_performance_df,
x='reading score',
y='math score',
kind='kde',
fill=True,
color='dodgerblue'
)

plot.fig.suptitle('Math Score vs Reading Score by Gender',
fontsize=6, fontdict={"weight": "normal"})

plt.savefig('assets/Seaborn_Cheat_Sheet_13.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Pairplot

sns.pairplot(
data=students_performance_df,
hue='gender',
palette='terrain'
)

plt.savefig('assets/Seaborn_Cheat_Sheet_14.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Grid Plots

Catplot

sns.catplot(
data=students_performance_df,
x='lunch',
y='math score',
kind='boxen',
hue='test preparation course',
palette='mako',
col='test preparation course',
row='gender'
)

plt.savefig('assets/Seaborn_Cheat_Sheet_15.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Pairgrid

grid = sns.PairGrid(students_performance_df)

grid = grid.map_upper(sns.scatterplot, color = 'dodgerblue')
grid = grid.map_lower(sns.kdeplot, cmap = 'winter')
grid = grid.map_diag(sns.histplot, color='fuchsia')

plt.savefig('assets/Seaborn_Cheat_Sheet_16.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

grid = sns.PairGrid(
data=students_performance_df,
hue='gender',
palette='cool'
)

grid = grid.map_upper(
sns.scatterplot,
size=students_performance_df["test preparation course"],
alpha=0.8
)
grid = grid.map_lower(
sns.scatterplot,
size=students_performance_df["race/ethnicity"],
style=students_performance_df["race/ethnicity"]
)
grid = grid.map_diag(sns.kdeplot)

grid = grid.add_legend(title="", adjust_subtitles=True)

plt.savefig('assets/Seaborn_Cheat_Sheet_17.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Matrix Plots

Heatmap

plt.figure(figsize=(20, 8), dpi=200)
plt.title('Average daily temperature of Toronto, Canada in 2020')

sns.heatmap(
country_table_df,
linewidth=0.5,
cmap='coolwarm',
annot=True
)

plt.savefig('assets/Seaborn_Cheat_Sheet_18.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

credit_approv_df_only_numeric = credit_approv_df.drop([
'CODE_GENDER',
'FLAG_OWN_CAR',
'FLAG_OWN_REALTY',
'NAME_INCOME_TYPE',
'NAME_EDUCATION_TYPE',
'NAME_FAMILY_STATUS',
'NAME_FAMILY_STATUS',
'NAME_HOUSING_TYPE',
'OCCUPATION_TYPE',
'CLASSIFICAO',
'CNT_CHILDREN',
'FLAG_MOBIL'
], axis=1)

credit_approv_df_dropna = credit_approv_df_numeric.dropna(how='all')


plt.figure(figsize=(20, 8), dpi=200)
plt.title('Correlation Heatmap Credit Card Approval Dataset')

sns.heatmap(
credit_approv_df_dropna.corr(),
linewidth=0.5,
cmap='seismic',
annot=True
)

plt.savefig('assets/Seaborn_Cheat_Sheet_23.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Clustermap

sns.clustermap(
country_table_df,
linewidth=0.5,
cmap='coolwarm',
annot=True,
col_cluster=False
)

plt.savefig('assets/Seaborn_Cheat_Sheet_19.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Facet Grid

plot = sns.FacetGrid(
tips_df,
col='time',
row='smoker',
hue='day',
palette='plasma_r',
sharex=True
)

plot = plot.map(
plt.hist,
'tip'
)

plot = plot.add_legend()

plt.savefig('assets/Seaborn_Cheat_Sheet_26.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

plot = sns.FacetGrid(
tips_df,
col='day',
row='sex',
hue='time',
palette='plasma'
)

plot = plot.map(
plt.scatter,
'total_bill', 'tip',
)

plot = plot.add_legend()
plt.savefig('assets/Seaborn_Cheat_Sheet_27.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023