Seaborn Cheat Sheet 2023
- Seaborn Cheat Sheet 2023
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
Datasets
Office Sales
!wget https://raw.githubusercontent.com/ashok-python/data/master/dm_office_sales.csv -P datasets
office_sales_df = pd.read_csv('datasets/dm_office_sales.csv')
office_sales_df.head(3)
division | level of education | training level | work experience | salary | sales | |
---|---|---|---|---|---|---|
0 | printers | some college | 2 | 6 | 91684 | 372302 |
1 | printers | associate's degree | 2 | 10 | 119679 | 495660 |
2 | peripherals | high school | 0 | 9 | 82045 | 320453 |
Student Performance
!wget https://raw.githubusercontent.com/rashida048/Datasets/master/StudentsPerformance.csv -P datasets
students_performance_df = pd.read_csv('datasets/StudentsPerformance.csv')
students_performance_df.head(3)
gender | race/ethnicity | parental level of education | lunch | test preparation course | math score | reading score | writing score | |
---|---|---|---|---|---|---|---|---|
0 | female | group B | bachelor's degree | standard | none | 72 | 72 | 74 |
1 | female | group C | some college | standard | completed | 69 | 90 | 88 |
2 | female | group B | master's degree | standard | none | 90 | 95 | 93 |
Toronto Weather
!wget https://github.com/datagy/mediumdata/raw/master/toronto-weather.xlsx -P datasets
cols = ['LOCAL_DATE', 'MEAN_TEMPERATURE']
renames = {'LOCAL_DATE': 'Date', 'MEAN_TEMPERATURE': 'Temperature'}
country_table_df = pd.read_excel('datasets/toronto-weather.xlsx', usecols=cols, parse_dates=['LOCAL_DATE'])
country_table_df.rename(columns=renames, inplace=True)
country_table_df['Day'] = country_table_df['Date'].dt.day
country_table_df['Month'] = country_table_df['Date'].dt.month
country_table_df = country_table_df[country_table_df['Day'] <= 28]
country_table_df = pd.pivot_table(
data=country_table_df,
index='Month',
columns='Day',
values='Temperature'
)
country_table_df.iloc[:3, :3]
Credit Card Approval Prediction
!wget https://raw.githubusercontent.com/BrunoHerick/analiseCartaoCredito/main/application_record.csv -P datasets
credit_approv_df = pd.read_csv('datasets/application_record.csv')
credit_approv_df.head(3).transpose()
0 | 1 | 2 | |
---|---|---|---|
ID | 5008804 | 5008805 | 5008806 |
CODE_GENDER | M | M | M |
FLAG_OWN_CAR | Y | Y | Y |
FLAG_OWN_REALTY | Y | Y | Y |
CNT_CHILDREN | 0 | 0 | 0 |
AMT_INCOME_TOTAL | 427500.0 | 427500.0 | 112500.0 |
NAME_INCOME_TYPE | Working | Working | Working |
NAME_EDUCATION_TYPE | Higher education | Higher education | Secondary / secondary special |
NAME_FAMILY_STATUS | Civil marriage | Civil marriage | Married |
NAME_HOUSING_TYPE | Rented apartment | Rented apartment | House / apartment |
DAYS_BIRTH | -12005 | -12005 | -21474 |
DAYS_EMPLOYED | -4542 | -4542 | -1134 |
FLAG_MOBIL | 1 | 1 | 1 |
FLAG_WORK_PHONE | 1 | 1 | 0 |
FLAG_PHONE | 0 | 0 | 0 |
FLAG_EMAIL | 0 | 0 | 0 |
OCCUPATION_TYPE | NaN | NaN | Security staff |
CNT_FAM_MEMBERS | 2 | 2 | 2 |
CLASSIFICAO | bom | bom | bom |
Tips
!wget https://raw.githubusercontent.com/plotly/datasets/master/tips.csv -P datasets
tips_df = pd.read_csv('datasets/tips.csv')
tips_df.head(3).transpose()
0 | 1 | 2 | |
---|---|---|---|
total_bill | 16.99 | 10.34 | 21.01 |
tip | 1.01 | 1.66 | 3.5 |
sex | Female | Male | Male |
smoker | No | No | No |
day | Sun | Sun | Sun |
time | Dinner | Dinner | Dinner |
size | 2 | 3 | 3 |
Scatter Plots
plt.figure(figsize=(10, 7))
sns.set(style='darkgrid')
# hue/style by categorical column
sns.scatterplot(
x='salary',
y='sales',
data=office_sales_df,
s=40,
alpha=0.6,
hue='level of education',
palette='nipy_spectral',
style='division'
).set_title('Salary vs Sales')
plt.savefig('assets/Seaborn_Cheat_Sheet_01.webp', bbox_inches='tight')
plt.figure(figsize=(10, 6))
# hue/size by continuous column
sns.scatterplot(
x='salary',
y='sales',
data=office_sales_df,
hue='work experience',
palette='cool',
size='training level'
).set_title('Salary vs Sales')
plt.savefig('assets/Seaborn_Cheat_Sheet_02.webp', bbox_inches='tight')
plt.figure(figsize=(10, 6))
credit_approv_df['DAYS_BIRTH_INV'] = credit_approv_df['DAYS_BIRTH'].apply(lambda num: num*(-1))
credit_approv_df['DAYS_EMPLOYED_INV'] = credit_approv_df['DAYS_EMPLOYED'].apply(lambda num: num*(-1))
# hue/size by continuous column
plot = sns.scatterplot(
x='DAYS_BIRTH_INV',
y='DAYS_EMPLOYED_INV',
data=credit_approv_df,
hue='CODE_GENDER',
palette='winter',
size='AMT_INCOME_TOTAL',
alpha=0.3
)
plot.set_title('Age vs Days Employed by Gender and Total Income')
plot.set_ylim(0, 17500)
plt.savefig('assets/Seaborn_Cheat_Sheet_20.webp', bbox_inches='tight')
Continuous Distribution Plots
Rug Plot
plt.figure(figsize=(10, 3))
plt.title('Salary Distribution based on Training')
sns.rugplot(
data=office_sales_df,
x='salary',
height=0.75,
hue='training level',
palette='gist_rainbow'
)
plt.savefig('assets/Seaborn_Cheat_Sheet_03.webp', bbox_inches='tight')
Histogram
plt.figure(figsize=(10, 5))
plt.title('Salary Distribution based on Training')
sns.histplot(
data=office_sales_df,
x='salary',
bins=50,
hue='training level',
palette='winter',
kde=True
)
plt.savefig('assets/Seaborn_Cheat_Sheet_04.webp', bbox_inches='tight')
credit_approv_df['AGE_YEARS'] = credit_approv_df['DAYS_BIRTH'].apply(lambda num: round(num/(-365)))
plt.figure(figsize=(10, 5))
plt.title('Age in Years Distribution')
sns.histplot(
data=credit_approv_df,
x='AGE_YEARS',
bins=45,
element='step',
hue='CODE_GENDER',
palette='tab20'
)
plt.savefig('assets/Seaborn_Cheat_Sheet_21.webp', bbox_inches='tight')
Kernel Density Estimation
plt.figure(figsize=(10, 5))
plt.title('Salary Distribution based on Training')
sns.kdeplot(
data=office_sales_df,
clip=[
office_sales_df['salary'].min(),
office_sales_df['salary'].max()
],
x='salary',
hue='training level',
palette='viridis',
fill=True
)
plt.savefig('assets/Seaborn_Cheat_Sheet_05.webp', bbox_inches='tight')
Categorical Plots
Count Plot
plt.figure(figsize=(10, 5))
plt.title('Sales Personal per Division by Training')
sns.countplot(
data=office_sales_df,
x='division',
hue='training level',
palette='seismic'
)
plt.savefig('assets/Seaborn_Cheat_Sheet_06.webp', bbox_inches='tight')
Barplot
plt.figure(figsize=(10, 5))
plt.title('Mean Salary based on Education and Division')
sns.set(style='darkgrid')
sns.barplot(
data=office_sales_df,
x='level of education',
y='salary',
estimator=np.mean,
errorbar='sd',
hue='division',
palette='magma'
)
plt.legend(bbox_to_anchor=(1.01,1.01))
plt.savefig('assets/Seaborn_Cheat_Sheet_07.webp', bbox_inches='tight')
Categorical Distribution Plots
Boxplot
plt.figure(figsize=(10, 5))
plt.title('Math Score by Gender and Prep Course Attendance')
sns.boxplot(
data=students_performance_df,
y='gender',
x='math score',
hue='test preparation course',
palette='cool',
orient='h'
)
plt.savefig('assets/Seaborn_Cheat_Sheet_08.webp', bbox_inches='tight')
ten_percent = round((len(credit_approv_df)/100*10))
plt.figure(figsize=(10, 5))
plt.title('Lower 10% Total Income by Family Status and House Ownership')
plot = sns.boxplot(
data=credit_approv_df.tail(ten_percent),
y='AMT_INCOME_TOTAL',
x='NAME_FAMILY_STATUS',
hue='FLAG_OWN_REALTY',
palette='winter',
orient='v',
linewidth=0.5,
fliersize=1
)
plot.set_ylim(
(credit_approv_df.tail(ten_percent)['AMT_INCOME_TOTAL'].min() - 10000),
(credit_approv_df.tail(ten_percent)['AMT_INCOME_TOTAL'].max() + 10000)
)
plt.savefig('assets/Seaborn_Cheat_Sheet_22.webp', bbox_inches='tight')
Violinplot
plt.figure(figsize=(12, 5))
plt.title('Tips Distribution')
sns.violinplot(
x=tips_df['tip'],
color='mediumspringgreen'
)
plt.savefig('assets/Seaborn_Cheat_Sheet_25.webp', bbox_inches='tight')
plt.figure(figsize=(10, 5))
plt.title('Math Score by Gender and Prep Course Attendance')
sns.violinplot(
data=students_performance_df,
x='gender',
y='math score',
hue='test preparation course',
palette='cool',
orient='v',
inner='quartile',
bw=0.3,
split=True
)
plt.legend(loc='lower right')
plt.savefig('assets/Seaborn_Cheat_Sheet_09.webp', bbox_inches='tight')
Swarmplot
plt.figure(figsize=(10, 5))
plt.title('Math Score by Gender and Parental Education')
sns.swarmplot(
data=students_performance_df,
x='math score',
y='gender',
hue='parental level of education',
palette='tab10'
)
plt.legend(loc='lower left')
plt.savefig('assets/Seaborn_Cheat_Sheet_10.webp', bbox_inches='tight')
colour_palette = ['dodgerblue', 'mediumspringgreen']
plt.figure(figsize=(12, 5))
plt.title('Tips by Day and Time of the Day')
sns.swarmplot(
data=tips_df,
x='day',
y='tip',
hue='time',
palette=colour_palette
)
plt.legend(loc='upper right')
plt.savefig('assets/Seaborn_Cheat_Sheet_24.webp', bbox_inches='tight')
![Seaborn Cheat Sheet 2023](./Seaborn_Cheat_Sheet_24.webp)
Boxenplot
plt.figure(figsize=(10, 5))
plt.title('Math Score by Gender and Prep Course Attendance')
sns.boxenplot(
data=students_performance_df,
x='gender',
y='math score',
hue='test preparation course',
palette='seismic',
orient='v'
)
plt.savefig('assets/Seaborn_Cheat_Sheet_11.webp', bbox_inches='tight')
Comparison Plots
Jointplot
sns.jointplot(
data=students_performance_df,
x='reading score',
y='math score',
kind='scatter',
hue='gender',
palette='winter',
alpha=0.4
)
plt.savefig('assets/Seaborn_Cheat_Sheet_12.webp', bbox_inches='tight')
plot = sns.jointplot(
data=students_performance_df,
x='reading score',
y='math score',
kind='kde',
fill=True,
color='dodgerblue'
)
plot.fig.suptitle('Math Score vs Reading Score by Gender',
fontsize=6, fontdict={"weight": "normal"})
plt.savefig('assets/Seaborn_Cheat_Sheet_13.webp', bbox_inches='tight')
Pairplot
sns.pairplot(
data=students_performance_df,
hue='gender',
palette='terrain'
)
plt.savefig('assets/Seaborn_Cheat_Sheet_14.webp', bbox_inches='tight')
Grid Plots
Catplot
sns.catplot(
data=students_performance_df,
x='lunch',
y='math score',
kind='boxen',
hue='test preparation course',
palette='mako',
col='test preparation course',
row='gender'
)
plt.savefig('assets/Seaborn_Cheat_Sheet_15.webp', bbox_inches='tight')
Pairgrid
grid = sns.PairGrid(students_performance_df)
grid = grid.map_upper(sns.scatterplot, color = 'dodgerblue')
grid = grid.map_lower(sns.kdeplot, cmap = 'winter')
grid = grid.map_diag(sns.histplot, color='fuchsia')
plt.savefig('assets/Seaborn_Cheat_Sheet_16.webp', bbox_inches='tight')
grid = sns.PairGrid(
data=students_performance_df,
hue='gender',
palette='cool'
)
grid = grid.map_upper(
sns.scatterplot,
size=students_performance_df["test preparation course"],
alpha=0.8
)
grid = grid.map_lower(
sns.scatterplot,
size=students_performance_df["race/ethnicity"],
style=students_performance_df["race/ethnicity"]
)
grid = grid.map_diag(sns.kdeplot)
grid = grid.add_legend(title="", adjust_subtitles=True)
plt.savefig('assets/Seaborn_Cheat_Sheet_17.webp', bbox_inches='tight')
Matrix Plots
Heatmap
plt.figure(figsize=(20, 8), dpi=200)
plt.title('Average daily temperature of Toronto, Canada in 2020')
sns.heatmap(
country_table_df,
linewidth=0.5,
cmap='coolwarm',
annot=True
)
plt.savefig('assets/Seaborn_Cheat_Sheet_18.webp', bbox_inches='tight')
credit_approv_df_only_numeric = credit_approv_df.drop([
'CODE_GENDER',
'FLAG_OWN_CAR',
'FLAG_OWN_REALTY',
'NAME_INCOME_TYPE',
'NAME_EDUCATION_TYPE',
'NAME_FAMILY_STATUS',
'NAME_FAMILY_STATUS',
'NAME_HOUSING_TYPE',
'OCCUPATION_TYPE',
'CLASSIFICAO',
'CNT_CHILDREN',
'FLAG_MOBIL'
], axis=1)
credit_approv_df_dropna = credit_approv_df_numeric.dropna(how='all')
plt.figure(figsize=(20, 8), dpi=200)
plt.title('Correlation Heatmap Credit Card Approval Dataset')
sns.heatmap(
credit_approv_df_dropna.corr(),
linewidth=0.5,
cmap='seismic',
annot=True
)
plt.savefig('assets/Seaborn_Cheat_Sheet_23.webp', bbox_inches='tight')
Clustermap
sns.clustermap(
country_table_df,
linewidth=0.5,
cmap='coolwarm',
annot=True,
col_cluster=False
)
plt.savefig('assets/Seaborn_Cheat_Sheet_19.webp', bbox_inches='tight')
Facet Grid
plot = sns.FacetGrid(
tips_df,
col='time',
row='smoker',
hue='day',
palette='plasma_r',
sharex=True
)
plot = plot.map(
plt.hist,
'tip'
)
plot = plot.add_legend()
plt.savefig('assets/Seaborn_Cheat_Sheet_26.webp', bbox_inches='tight')
plot = sns.FacetGrid(
tips_df,
col='day',
row='sex',
hue='time',
palette='plasma'
)
plot = plot.map(
plt.scatter,
'total_bill', 'tip',
)
plot = plot.add_legend()
plt.savefig('assets/Seaborn_Cheat_Sheet_27.webp', bbox_inches='tight')