Skip to main content

Sham Sui Po, Hong Kong

Analyzing Credit Card Customer Churn Behaviour

Problem Statement: A manager at the bank is disturbed with more and more customers leaving their credit card services. They would really appreciate if one could predict for them who is considering leaving the bank so they can proactively go to the customer to provide them better services and reverse the customers' decision in their favour.

Github Repository

Dataset

ClientnumNum Client number. Unique identifier for the customer holding the account
Attrition_Flagchar Internal event (customer activity) variable
Customer_AgeNum Demographic variable - Customer's Age in Years
GenderChar Demographic variable - M=Male, F=Female
Dependent_countNum Demographic variable - Number of people dependents
Education_LevelChar Demographic variable - Educational Qualification of the account holder(example: high school, college graduate, etc.)
Marital_StatusChar Demographic variable - Married, Single, Unknown
Income_CategoryChar Demographic variable - Annual Income Category of the account holder (< 40K, 40K - 60K, 60K - 80K, 80K-120K, > 120K, Unknown)
Card_CategoryChar Product Variable - Type of Card (Blue, Silver, Gold, Platinum)
Months_on_bookNum Months on book (Time of Relationship)
Total_Relationship_CountNum Total no. of products held by the customer
Months_Inactive_12_monNum No. of months inactive in the last 12 months
Contacts_Count_12_monNum No. of Contacts in the last 12 months
Credit_LimitNum Credit Limit on the Credit Card
Total_Revolving_BalNum Total Revolving Balance on the Credit Card
Avg_Open_To_BuyNum Open to Buy Credit Line (Average of last 12 months)
Total_Amt_Chng_Q4_Q1Num Change in Transaction Amount (Q4 over Q1)
Total_Trans_AmtNum Total Transaction Amount (Last 12 months)
Total_Trans_CtNum Total Transaction Count (Last 12 months)
Total_Ct_Chng_Q4_Q1Num Change in Transaction Count (Q4 over Q1)
Avg_Utilization_RatioNum Average Card Utilization Ratio
!wget https://github.com/tassneam/Credit-Card-Customers-Prediction/raw/main/BankChurners.csv -P dataset
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.graph_objs as go
from plotly.offline import iplot
import seaborn as sns
cc_df = pd.read_csv('dataset/BankChurners.csv')
cc_df.head(5).transpose()
# 5 rows × 23 columns
01234
CLIENTNUM768805383818770008713982108769911858709106358
Attrition_FlagExisting CustomerExisting CustomerExisting CustomerExisting CustomerExisting Customer
Customer_Age4549514040
GenderMFMFM
Dependent_count35343
Education_LevelHigh SchoolGraduateGraduateHigh SchoolUneducated
Marital_StatusMarriedSingleMarriedUnknownMarried
Income_Category60𝐾−80KLess than 40K80𝐾−120KLess than 40K60𝐾−80K
Card_CategoryBlueBlueBlueBlueBlue
Months_on_book3944363421
Total_Relationship_Count56435
Months_Inactive_12_mon1114
Contacts_Count_12_mon32010
Credit_Limit12691.08256.03418.03313.04716.0
Total_Revolving_Bal777864025170
Avg_Open_To_Buy11914.07392.03418.0796.04716.0
Total_Amt_Chng_Q4_Q11.3351.5412.5941.4052.175
Total_Trans_Amt1144129118871171816
Total_Trans_Ct4233202028
Total_Ct_Chng_Q4_Q11.6253.7142.3332.3332.5
Avg_Utilization_Ratio0.0610.1050.00.760.0
Naive_Bayes_Classifier0.0000930.0000570.0000210.0001340.000022
classificationTrueTrueTrueTrueTrue

Preprocessing

Duplicates

print(cc_df.shape, cc_df['CLIENTNUM'].nunique())
# there are as many ClientIDs as there are rows :thumbsup:
# (2998, 23) 2998
cc_df.drop_duplicates(inplace=True)
cc_df.shape
# nothing is dropped :thumbsup:
# (2998, 23)

Subsetting

cc_df.columns
# Index(['CLIENTNUM', 'Attrition_Flag', 'Customer_Age', 'Gender',
# 'Dependent_count', 'Education_Level', 'Marital_Status',
# 'Income_Category', 'Card_Category', 'Months_on_book',
# 'Total_Relationship_Count', 'Months_Inactive_12_mon',
# 'Contacts_Count_12_mon', 'Credit_Limit', 'Total_Revolving_Bal',
# 'Avg_Open_To_Buy', 'Total_Amt_Chng_Q4_Q1', 'Total_Trans_Amt',
# 'Total_Trans_Ct', 'Total_Ct_Chng_Q4_Q1', 'Avg_Utilization_Ratio',
# 'Naive_Bayes_Classifier', 'classification'],
# dtype='object')
# drop what you don't need
cc_df_ss = cc_df[['CLIENTNUM', 'Attrition_Flag', 'Customer_Age', 'Gender',
'Dependent_count', 'Education_Level', 'Marital_Status',
'Income_Category', 'Card_Category', 'Months_on_book',
'Total_Relationship_Count', 'Months_Inactive_12_mon',
'Contacts_Count_12_mon', 'Credit_Limit', 'Total_Revolving_Bal',
'Avg_Open_To_Buy', 'Total_Amt_Chng_Q4_Q1', 'Total_Trans_Amt',
'Total_Trans_Ct', 'Total_Ct_Chng_Q4_Q1', 'Avg_Utilization_Ratio']]

cc_df_ss.shape
# (2998, 21)

Datatypes

# check if int/float/datetime values are not strings
cc_df_ss.dtypes
CLIENTNUMint64
Attrition_Flagobject
Customer_Ageint64
Genderobject
Dependent_countint64
Education_Levelobject
Marital_Statusobject
Income_Categoryobject
Card_Categoryobject
Months_on_bookint64
Total_Relationship_Countint64
Months_Inactive_12_monint64
Contacts_Count_12_monint64
Credit_Limitfloat64
Total_Revolving_Balint64
Avg_Open_To_Buyfloat64
Total_Amt_Chng_Q4_Q1float64
Total_Trans_Amtint64
Total_Trans_Ctint64
Total_Ct_Chng_Q4_Q1float64
Avg_Utilization_Ratiofloat64
dtype: object

Missing Values

# test for missing data
cc_df_ss.isnull().sum()
CLIENTNUM0
Attrition_Flag0
Customer_Age0
Gender0
Dependent_count0
Education_Level0
Marital_Status0
Income_Category0
Card_Category0
Months_on_book0
Total_Relationship_Count0
Months_Inactive_12_mon0
Contacts_Count_12_mon0
Credit_Limit0
Total_Revolving_Bal0
Avg_Open_To_Buy0
Total_Amt_Chng_Q4_Q10
Total_Trans_Amt0
Total_Trans_Ct0
Total_Ct_Chng_Q4_Q10
Avg_Utilization_Ratio0

Data Transformation

Binning

print(
cc_df_ss['Customer_Age'].min(),
cc_df_ss['Customer_Age'].max()
)
# 26 73 => bins=[20,30,40,50,60,70,80]
bins=[20,30,40,50,60,70,80]
labels=['20s','30s','40s','50s','60s','70s','80s']

cc_df_ss['Customer_Age_Bins'] = pd.cut(
cc_df_ss['Customer_Age'],
bins,
labels,
include_lowest=True
)

cc_df_ss.head(5).transpose()
01234
CLIENTNUM768805383818770008713982108769911858709106358
Attrition_FlagExisting CustomerExisting CustomerExisting CustomerExisting CustomerExisting Customer
Customer_Age4549514040
GenderMFMFM
Dependent_count35343
Education_LevelHigh SchoolGraduateGraduateHigh SchoolUneducated
Marital_StatusMarriedSingleMarriedUnknownMarried
Income_Category60𝐾−80KLess than 40K80𝐾−120KLess than 40K60𝐾−80K
Card_CategoryBlueBlueBlueBlueBlue
Months_on_book3944363421
Total_Relationship_Count56435
Months_Inactive_12_mon11141
Contacts_Count_12_mon32010
Credit_Limit12691.08256.03418.03313.04716.0
Total_Revolving_Bal777864025170
Avg_Open_To_Buy11914.07392.03418.0796.04716.0
Total_Amt_Chng_Q4_Q11.3351.5412.5941.4052.175
Total_Trans_Amt1144129118871171816
Total_Trans_Ct4233202028
Total_Ct_Chng_Q4_Q11.6253.7142.3332.3332.5
Avg_Utilization_Ratio0.0610.1050.00.760.0
Customer_Age_Bins(40.0, 50.0](40.0, 50.0](50.0, 60.0](30.0, 40.0](30.0, 40.0]

Data Exploration

Scatterplots

cc_df_ss.columns
# find correlations using a pairplot
# remove unecessary columns
temp = cc_df_ss[['Attrition_Flag', 'Customer_Age', 'Gender',
'Dependent_count', 'Education_Level', 'Marital_Status',
'Income_Category', 'Card_Category', 'Months_on_book',
'Total_Relationship_Count', 'Months_Inactive_12_mon',
'Contacts_Count_12_mon', 'Credit_Limit', 'Total_Revolving_Bal',
'Avg_Open_To_Buy', 'Total_Amt_Chng_Q4_Q1', 'Total_Trans_Amt',
'Total_Trans_Ct', 'Total_Ct_Chng_Q4_Q1', 'Avg_Utilization_Ratio']]
# remove categorical columns
numeric_data_df = temp._get_numeric_data()
numeric_data_df.head(5).transpose()
01234
Customer_Age45.00049.00051.00040.00040.000
Dependent_count3.0005.0003.0004.0003.000
Months_on_book39.00044.00036.00034.00021.000
Total_Relationship_Count5.0006.0004.0003.0005.000
Months_Inactive_12_mon1.0001.0001.0004.0001.000
Contacts_Count_12_mon3.0002.0000.0001.0000.000
Credit_Limit12691.0008256.0003418.0003313.0004716.000
Total_Revolving_Bal777.000864.0000.0002517.0000.000
Avg_Open_To_Buy11914.0007392.0003418.000796.0004716.000
Total_Amt_Chng_Q4_Q11.3351.5412.5941.4052.175
Total_Trans_Amt1144.0001291.0001887.0001171.000816.000
Total_Trans_Ct42.00033.00020.00020.00028.000
Total_Ct_Chng_Q4_Q11.6253.7142.3332.3332.500
Avg_Utilization_Ratio0.0610.1050.0000.7600.000
pairgrid = sns.PairGrid(
data=numeric_data_df,
diag_sharey=False,
corner=True
)

pairgrid.map_lower(sns.scatterplot)
pairgrid.map_diag(sns.histplot)

plt.savefig('../assets/CC_Customer_Churn_13.webp', bbox_inches='tight')

Credit Card Customer Churn Prediction

# dive deeper into plots with interesting correlations
plt.figure(figsize=(10, 6))

# hue/size by continuous column
sns.scatterplot(
data=cc_df_ss,
x='Credit_Limit',
y='Avg_Utilization_Ratio',
hue='Gender',
palette='winter',
size='Customer_Age'
).set_title('Credit Limit vs Avg Utilization Ratio')

plt.savefig('../assets/CC_Customer_Churn_14.webp', bbox_inches='tight')

Credit Card Customer Churn Prediction

Investigate Subgroups

# compare high to low spender
bins = [
cc_df_ss['Total_Trans_Amt'].min(),
1000,
cc_df_ss['Total_Trans_Amt'].max()
]

labels = ['Group 1', "Group 2"]

cc_df_ss_temp = cc_df_ss.copy()

cc_df_ss_temp['Total_Trans_Amt_Grp'] = pd.cut(
cc_df_ss['Total_Trans_Amt'],
bins=bins,
labels=labels,
include_lowest=True
)

cc_df_ss_temp.head(1).transpose()
0
CLIENTNUM768805383
Attrition_FlagExisting Customer
Customer_Age45
GenderM
Dependent_count3
Education_LevelHigh School
Marital_StatusMarried
Income_Category60𝐾−80K
Card_CategoryBlue
Months_on_book39
Total_Relationship_Count5
Months_Inactive_12_mon1
Contacts_Count_12_mon3
Credit_Limit12691.0
Total_Revolving_Bal777
Avg_Open_To_Buy11914.0
Total_Amt_Chng_Q4_Q11.335
Total_Trans_Amt1144
Total_Trans_Ct42
Total_Ct_Chng_Q4_Q11.625
Avg_Utilization_Ratio0.061
Customer_Age_Bins(40.0, 50.0]
Total_Trans_Amt_GrpGroup 2
cc_df_ss_temp = cc_df_ss_temp.groupby(['Total_Trans_Amt_Grp', 'Attrition_Flag']).agg({
'CLIENTNUM':'nunique',
'Customer_Age':'median',
'Dependent_count':'median',
'Months_on_book':'median',
'Total_Relationship_Count':'median',
'Months_Inactive_12_mon':'median',
'Contacts_Count_12_mon':'median',
'Credit_Limit':'median',
'Total_Revolving_Bal':'median',
'Avg_Open_To_Buy':'median',
'Total_Amt_Chng_Q4_Q1':'median',
'Total_Trans_Amt':'median',
'Total_Trans_Ct':'median',
'Total_Ct_Chng_Q4_Q1':'median',
'Avg_Utilization_Ratio':'median',
})

cc_df_ss_temp.transpose()
Total_Trans_Amt_GrpGroup 1Group 2
Attrition_FlagAttrited CustomerExisting CustomerAttrited CustomerExisting Customer
CLIENTNUM142.000019.00082.002755.000
Customer_Age49.000044.00048.0046.000
Dependent_count2.00002.0002.002.000
Months_on_book36.000036.00036.0036.000
Total_Relationship_Count3.00005.0003.005.000
Months_Inactive_12_mon3.00002.0003.002.000
Contacts_Count_12_mon3.00002.0003.003.000
Credit_Limit4740.50006884.0007618.005550.000
Total_Revolving_Bal0.00001330.0000.001475.000
Avg_Open_To_Buy3854.00005653.0006410.004237.000
Total_Amt_Chng_Q4_Q10.72500.7810.770.761
Total_Trans_Amt810.0000949.0001353.001805.000
Total_Trans_Ct20.000024.00033.0043.000
Total_Ct_Chng_Q4_Q10.46450.9090.460.682
Avg_Utilization_Ratio0.00000.1520.000.210
cc_df_ss_temp.transpose()['Group 1'].loc[
[
'Credit_Limit',
'Avg_Open_To_Buy',
'Total_Trans_Amt'
]
]
Attrition_FlagAttrited CustomerExisting Customer
Credit_Limit4740.56884.0
Avg_Open_To_Buy3854.05653.0
Total_Trans_Amt810.0949.0
cc_df_ss_temp.transpose()['Group 1'].loc[
['Credit_Limit','Avg_Open_To_Buy','Total_Trans_Amt']
].plot.bar(
figsize=(10,5),
subplots=False,
legend=True,
sharey=True,
layout=(1,2),
title='Group 1 (Low Spender)'
)

Credit Card Customer Churn Prediction

cc_df_ss_temp.transpose()['Group 2'].loc[
['Credit_Limit','Avg_Open_To_Buy','Total_Trans_Amt']
].plot.bar(
figsize=(10,5),
subplots=False,
legend=True,
sharey=True,
layout=(1,2),
title='Group 1 (High Spender)'
)

Credit Card Customer Churn Prediction

plot = sns.FacetGrid(
cc_df_ss,
row='Attrition_Flag',
height=5,
aspect=2
)

plot.map_dataframe(
sns.kdeplot,
x='Total_Ct_Chng_Q4_Q1'
)

plt.xlim(0,2)

plt.savefig('../assets/CC_Customer_Churn_15.webp', bbox_inches='tight')

Credit Card Customer Churn Prediction

palette = sns.color_palette('winter', 2)

def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, color='black', fontsize=13,
ha='left', va='center', transform=ax.transAxes)

sns.set_theme(
style='white',
rc={'axes.facecolor': (0, 0, 0, 0), 'axes.linewidth':2}
)

fg = sns.FacetGrid(
cc_df_ss,
palette=palette,
hue='Attrition_Flag',
row='Attrition_Flag',
aspect=5,
height=3
)

fg.map_dataframe(
sns.kdeplot,
x='Total_Ct_Chng_Q4_Q1',
fill=True,
alpha=1
)

fg.map_dataframe(
sns.kdeplot,
x='Total_Ct_Chng_Q4_Q1',
color='white'
)

fg.map(label, 'Attrition_Flag')
fg.fig.subplots_adjust(hspace=-.5)
fg.set_titles('')
fg.set(yticks=[], ylabel='', xlabel='Total Count Change Q4 to Q1')
fg.despine(left=True)

plt.suptitle('Customer Q4 to Q1 Transaction Drop', y=0.98)
plt.xlim(-0.5,1.5)

Credit Card Customer Churn Prediction

Histograms

plt.hist( 
cc_df_ss['Customer_Age'],
bins=7,
histtype='step'
)
plt.title('Customer Age Histogram')
plt.xlabel('Age Group')
plt.ylabel('Count')

Credit Card Customer Churn Prediction

# find churn percentage
cc_df_ss['Attrition_Flag'].value_counts()

# Existing Customer 2774
# Attrited Customer 224
# Name: Attrition_Flag, dtype: int64
percentage = cc_df_ss['Attrition_Flag'].value_counts()['Attrited Customer'] / cc_df_ss.shape[0] * 100
print(f"Attrited Customers: {round(percentage)}%")

# Attrited Customers: 7%
plt.hist(
cc_df_ss['Months_on_book'],
bins=20,
histtype='step'
)

plt.title('Customer Months on book (Time of Relationship)')
plt.xlabel('Months on Book')
plt.ylabel('Count')

Credit Card Customer Churn Prediction

plt.hist(
cc_df_ss['Credit_Limit'],
)

plt.vlines(
x=cc_df_ss['Credit_Limit'].mean(),
ymin=0, ymax=1500, colors='0.75',
linestyles='dashed', label='MEAN'
)

plt.vlines(
x=cc_df_ss['Credit_Limit'].median(),
ymin=0, ymax=1500, colors='0.75',
linestyles='dotted', label='MEDIAN'
)

plt.title('Customer Credit Limit on the Credit Card')
plt.xlabel('Credit Limit')
plt.ylabel('Count')
plt.legend()

Credit Card Customer Churn Prediction

The Mean is more influenced by outliers than the Median function. Use median() when your distribution deviates from a normal distribution.

plt.hist(
cc_df_ss['Total_Trans_Ct'],
bins=75,
histtype='step'
)

plt.vlines(
x=cc_df_ss['Total_Trans_Ct'].mean(),
ymin=0, ymax=200, colors='0.75',
linestyles='dashed', label='MEAN'
)

plt.vlines(
x=cc_df_ss['Total_Trans_Ct'].median(),
ymin=0, ymax=200, colors='0.75',
linestyles='dotted', label='MEDIAN'
)

plt.title('Customer Total Transaction Count (Last 12 months)')
plt.xlabel('Total Transaction Count')
plt.ylabel('Count')
plt.legend()

Credit Card Customer Churn Prediction

plt.hist(
cc_df_ss['Total_Trans_Amt'],
bins=100,
histtype='step'
)

plt.vlines(
x=cc_df_ss['Total_Trans_Amt'].mean(),
ymin=0, ymax=130, colors='0.75',
linestyles='dashed', label='MEAN'
)

plt.vlines(
x=cc_df_ss['Total_Trans_Amt'].median(),
ymin=0, ymax=130, colors='0.75',
linestyles='dotted', label='MEDIAN'
)

plt.title('Customer Total Transaction Amount (Last 12 months)')
plt.xlabel('Total Transaction Amount')
plt.ylabel('Count')
plt.legend()

Credit Card Customer Churn Prediction

Data Transformation

Normalization

def normalize(column):
upper = column.max()
lower = column.min()
norm = (column - lower)/(upper - lower)

return norm
cc_df_ss['Credit_Limit_Norm'] = normalize(cc_df_ss['Credit_Limit'])
plt.hist(
x=cc_df_ss['Credit_Limit_Norm'],
bins=40,
histtype='step'
)

plt.vlines(
x=cc_df_ss['Credit_Limit_Norm'].mean(),
ymin=0, ymax=500, colors='0.75',
linestyles='dashed', label='MEAN'
)

plt.vlines(
x=cc_df_ss['Credit_Limit_Norm'].median(),
ymin=0, ymax=500, colors='0.75',
linestyles='dotted', label='MEDIAN'
)

plt.title('Customer Credit Limit on the Credit Card')
plt.xlabel('Credit Limit (Normalized)')
plt.ylabel('Count')
plt.legend()

Credit Card Customer Churn Prediction

Log Transform

cc_df_ss['Credit_Limit_Log'] = np.log(cc_df_ss['Credit_Limit'])
plt.hist(
x=cc_df_ss['Credit_Limit_Log'],
bins=40,
histtype='step'
)

plt.vlines(
x=cc_df_ss['Credit_Limit_Log'].mean(),
ymin=0, ymax=500, colors='0.75',
linestyles='dashed', label='MEAN'
)

plt.vlines(
x=cc_df_ss['Credit_Limit_Log'].median(),
ymin=0, ymax=500, colors='0.75',
linestyles='dotted', label='MEDIAN'
)

plt.title('Customer Credit Limit on the Credit Card')
plt.xlabel('Credit Limit (Log)')
plt.ylabel('Count')
plt.legend()

Credit Card Customer Churn Prediction

fig, axes = plt.subplots(1, 3, figsize=(15, 5))
fig.suptitle('Before and After Transformation')

sns.histplot(
data=cc_df_ss,
x='Credit_Limit',
bins=50,
hue='Gender',
palette='winter',
kde=True,
ax=axes[0]
)

sns.histplot(
data=cc_df_ss,
x='Credit_Limit_Norm',
bins=50,
hue='Gender',
palette='winter',
kde=True,
ax=axes[1]
)

sns.histplot(
data=cc_df_ss,
x='Credit_Limit_Log',
bins=50,
hue='Gender',
palette='winter',
kde=True,
ax=axes[2]
)

Credit Card Customer Churn Prediction

More Distribution Plot

Box Plot

plt.figure(figsize=(10, 5))
plt.title('Total Transaction Amount by Income and Gender')

plot = sns.boxplot(
data=cc_df_ss,
y='Total_Trans_Amt',
x='Income_Category',
hue='Gender',
palette='winter',
orient='v',
linewidth=0.5,
fliersize=1
)

Credit Card Customer Churn Prediction

Pyramid Chart

# count customers in age bins and classify by gender
cc_gen_age = cc_df_ss.groupby(
['Gender', 'Customer_Age_Bins']
)['CLIENTNUM'].nunique().reset_index()

cc_gen_age.head(5)
GenderCustomer_Age_BinsCLIENTNUM
0F(19.999, 30.0]65
1F(30.0, 40.0]343
2F(40.0, 50.0]328
3F(50.0, 60.0]323
4F(60.0, 70.0]107
women_bins = np.array(-1 * cc_gen_age[cc_gen_age['Gender'] == 'F']['CLIENTNUM'])
men_bins = np.array(cc_gen_age[cc_gen_age['Gender'] == 'M']['CLIENTNUM'])
y = list(range(20, 100, 10))

layout = go.Layout(
title='Customer Age Distribution by Gender',
yaxis=go.layout.YAxis(title='Age'),
xaxis=go.layout.XAxis(
range=[-400, 650],
tickvals=[-300, -150, 0, 150, 300, 600],
ticktext=[300, 150, 0, 150, 300, 600],
title='Customer'),
barmode='overlay',
bargap=0.1)

p_data = [go.Bar(y=y,
x=men_bins,
orientation='h',
name='Men',
hoverinfo='x',
marker=dict(color='dodgerblue')
),
go.Bar(y=y,
x=woman_bins,
orientation='h',
name='Women',
text=-1 * women_bins.astype('int'),
hoverinfo='text',
marker=dict(color='mediumspringgreen')
)]

iplot(dict(data=p_data, layout=layout))

Credit Card Customer Churn Prediction

Bar Chart

plt.figure(figsize=(10, 5))
plt.title('Credit Limit by Age and Gender')
sns.set(style='darkgrid')
sns.barplot(
data=cc_df_ss,
x='Customer_Age_Bins',
y='Credit_Limit',
estimator=np.median,
errorbar='sd',
hue='Gender',
palette='winter'
)
plt.legend(bbox_to_anchor=(1.01,1.01))
plt.savefig('../assets/CC_Customer_Churn_11.webp', bbox_inches='tight')

Credit Card Customer Churn Prediction

Aggregations

cc_df_attr = cc_df_ss.groupby(['Attrition_Flag']).agg({
'CLIENTNUM':'nunique',
'Customer_Age':'median',
'Dependent_count':'median',
'Months_on_book':'median',
'Total_Relationship_Count':'median',
'Months_Inactive_12_mon':'median',
'Contacts_Count_12_mon':'median',
'Credit_Limit':'median',
'Total_Revolving_Bal':'median',
'Avg_Open_To_Buy':'median',
'Total_Amt_Chng_Q4_Q1':'median',
'Total_Trans_Amt':'median',
'Total_Trans_Ct':'median',
'Total_Ct_Chng_Q4_Q1':'median',
'Avg_Utilization_Ratio':'median',
})

cc_df_attr_trans = cc_df_attr.transpose().reset_index()
cc_df_attr_trans
Attrition_FlagindexAttrited CustomerExisting Customer
0CLIENTNUM224.00002774.000
1Customer_Age48.000046.000
2Dependent_count2.00002.000
3Months_on_book36.000036.000
4Total_Relationship_Count3.00005.000
5Months_Inactive_12_mon3.00002.000
6Contacts_Count_12_mon3.00003.000
7Credit_Limit5687.50005553.000
8Total_Revolving_Bal0.00001474.500
9Avg_Open_To_Buy5189.50004245.000
10Total_Amt_Chng_Q4_Q10.73750.762
11Total_Trans_Amt911.00001802.000
12Total_Trans_Ct24.000042.000
13Total_Ct_Chng_Q4_Q10.46200.682
14Avg_Utilization_Ratio0.00000.209
# sort by greatest difference
cc_df_attr_trans['Diff'] = cc_df_attr_trans['Attrited Customer'] / cc_df_attr_trans['Existing Customer'] - 1
cc_df_attr_trans = cc_df_attr_trans.sort_values('Diff')
cc_df_attr_trans
Attrition_FlagindexAttrited CustomerExisting CustomerDiff
8Total_Revolving_Bal0.00001474.500-1.000000
14Avg_Utilization_Ratio0.00000.209-1.000000
0CLIENTNUM224.00002774.000-0.919250
11Total_Trans_Amt911.00001802.000-0.494451
12Total_Trans_Ct24.000042.000-0.428571
4Total_Relationship_Count3.00005.000-0.400000
13Total_Ct_Chng_Q4_Q10.46200.682-0.322581
10Total_Amt_Chng_Q4_Q10.73750.762-0.032152
2Dependent_count2.00002.0000.000000
3Months_on_book36.000036.0000.000000
6Contacts_Count_12_mon3.00003.0000.000000
7Credit_Limit5687.50005553.0000.024221
1Customer_Age48.000046.0000.043478
9Avg_Open_To_Buy5189.50004245.0000.222497
5Months_Inactive_12_mon3.00002.0000.500000
plt.figure(figsize=(10, 5))
plt.title('Relative Difference between Existing and Attrited Customers')
sns.set(style='darkgrid')
sns.barplot(
data=cc_df_attr_trans,
x='Diff',
y='index',
estimator=np.median,
errorbar='sd',
palette='winter',
orient='h'
)

plt.savefig('../assets/CC_Customer_Churn_12.webp', bbox_inches='tight')

Credit Card Customer Churn Prediction