Skip to main content

Sham Sui Po, Hong Kong

Plotly & Seaborn to Explore Text Dataset

Github Repository

Dataset

Women's E-Commerce Clothing Reviews

Content

This dataset includes 23486 rows and 10 feature variables. Each row corresponds to a customer review, and includes the variables:

  • Clothing ID: Integer Categorical variable that refers to the specific piece being reviewed.
  • Age: Positive Integer variable of the reviewers age.
  • Title: String variable for the title of the review.
  • Review Text: String variable for the review body.
  • Rating: Positive Ordinal Integer variable for the product score granted by the customer from 1 Worst, to 5 Best.
  • Recommended IND: Binary variable stating where the customer recommends the product where 1 is recommended, 0 is not recommended.
  • Positive Feedback Count: Positive Integer documenting the number of other customers who found this review positive.
  • Division Name: Categorical name of the product high level division.
  • Department Name: Categorical name of the product department name.
  • Class Name: Categorical name of the product class name.
import json
import cufflinks as cf
import matplotlib.pyplot as plt
import nltk
import numpy as np
import pandas as pd
import plotly as py
import seaborn as sns
from sklearn.feature_extraction.text import CountVectorizer
from textblob import TextBlob
from contractions import contractions
from plotly.offline import iplot
py.offline.init_notebook_mode(connected=True)
cf.go_offline()
! wget 'https://github.com/ya-stack/Women-s-Ecommerce-Clothing-Reviews/raw/master/Womens%20Clothing%20E-Commerce%20Reviews.csv' -P dataset
reviews_df = pd.read_csv('dataset/Womens_Clothing_E-Commerce_Reviews.csv')
reviews_df.head(5)
Unnamed: 0Clothing IDAgeTitleReview TextRatingRecommended INDPositive Feedback CountDivision NameDepartment NameClass Name
0076733NaNAbsolutely wonderful - silky and sexy and comf...410InitmatesIntimateIntimates
11108034NaNLove this dress! it's sooo pretty. i happene...514GeneralDressesDresses
22107760Some major design flawsI had such high hopes for this dress and reall...300GeneralDressesDresses
33104950My favorite buy!I love, love, love this jumpsuit. it's fun, fl...510General PetiteBottomsPants
4484747Flattering shirtThis shirt is very flattering to all due to th...516GeneralTopsBlouses

Data Cleaning

reviews_df_drop= reviews_df.drop(labels=['Unnamed: 0', 'Clothing ID', 'Title'], axis=1)
reviews_df_drop.head(5)
AgeReview TextRatingRecommended INDPositive Feedback CountDivision NameDepartment NameClass Name
033Absolutely wonderful - silky and sexy and comf...410InitmatesIntimateIntimates
134Love this dress! it's sooo pretty. i happene...514GeneralDressesDresses
260I had such high hopes for this dress and reall...300GeneralDressesDresses
350I love, love, love this jumpsuit. it's fun, fl...510General PetiteBottomsPants
447This shirt is very flattering to all due to th...516GeneralTopsBlouses
reviews_df_drop.sum(numeric_only=True)
Age1014561
Rating98548
Recommended IND19314
Positive Feedback Count59559
dtype: int64
reviews_df_drop.dropna(subset=['Review Text', 'Division Name'], inplace=True)
reviews_df_drop.sum(numeric_only=True)

Remove Contractions

' '.join(reviews_df_drop['Review Text'].tolist())
def expand_contractions(x):
if type(x) is str:
# remove backslashes
x = x.replace('\\', '')
# expand contractions
for key in contractions:
value = contractions[key]
x = x.replace(key, value)
return x
else:
return x
# test function on text
x = "y'all it\'s sooo pretty, i am 5\' 5\'\', about 135"
print(expand_contractions(x))
# you all it is sooo pretty, i am 5' 5'', about 135
# test on a dataframe
string_series = pd.Series([
"i\'m glad i did bc it\'s never...",
"I\'m 5\"5\' and ain't 125 lbs."
], dtype="string")

string_df = pd.DataFrame({'strings':string_series.values})
string_df['strings'].apply(lambda x: expand_contractions(x)).head(2)

# 0 I am glad i did because it is never...
# 1 I am 5"5' and is not 125 lbs.
# Name: strings, dtype: object
reviews_df_expanded = reviews_df_drop.copy()
reviews_df_expanded['Review Text'] = reviews_df_drop['Review Text'].apply(
lambda x: expand_contractions(x)
)
' '.join(reviews_df_expanded['Review Text'].tolist())

Feature Engineering

reviews_df_expanded['Polarity'] = reviews_df_expanded['Review Text'].apply(
lambda x: TextBlob(x).sentiment.polarity
)
reviews_df_expanded['Review Length'] = reviews_df_expanded['Review Text'].apply(
lambda x: len(x)
)
reviews_df_expanded['Word Count'] = reviews_df_expanded['Review Text'].apply(
lambda x: len(x.split())
)
def get_avg_word_len(x):
words = x.split()
word_len = 0
for word in words:
word_len = word_len + len(word)

return word_len/len(words)
reviews_df_expanded['Average Word Length'] = reviews_df_expanded['Review Text'].apply(
lambda x: get_avg_word_len(x)
)
reviews_df_expanded.head(5)
AgeReview TextRatingRecommended INDPositive Feedback CountDivision NameDepartment NameClass NamePolarityReview LengthWord CountAverage Word Length
033Absolutely wonderful - silky and sexy and comf...410InitmatesIntimateIntimates0.6333335385.750000
134Love this dress! it is sooo pretty. i happen...514GeneralDressesDresses0.339583316653.800000
260I had such high hopes for this dress and reall...300GeneralDressesDresses0.073675500984.112245
350I love, love, love this jumpsuit. it is fun, f...510General PetiteBottomsPants0.550000125234.478261
447This shirt is very flattering to all due to th...516GeneralTopsBlouses0.512891192364.361111

Numerical Plots

Sentiment Polarity

reviews_df_expanded['Polarity'].iplot(
title='Sentiment Polarity',
theme='polar',
yTitle='Sentiment [-1,1]',
xTitle='Reviews'
)

Visualizing Text Data

reviews_df_expanded['Polarity'].iplot(
kind='hist',
title='Sentiment Polarity',
theme='polar',
xTitle='Sentiment [-1,1]',
yTitle='Frequency'
)

Visualizing Text Data

Ratings vs. Age

reviews_df_expanded['Rating'].iplot(
kind='hist',
theme='polar',
title='Rating Histogram',
xTitle='Rating',
yTitle='Frequency'
)

Visualizing Text Data

reviews_df_expanded['Age'].iplot(
kind='hist',
theme='polar',
title='Age Histogram',
xTitle='Age',
yTitle='Frequency'
)

Visualizing Text Data

sns.jointplot(
data=reviews_df_expanded,
x='Age',
y='Polarity',
kind='scatter',
hue='Rating',
palette='winter',
alpha=0.4
)

plt.savefig('../assets/E-Commerce_Clothing_Reviews_05.webp', bbox_inches='tight')

Visualizing Text Data

Word Count vs Average Word Length

reviews_df_expanded['Word Count'].iplot(
kind='hist',
theme='polar',
title='Word Count',
xTitle='Word Count',
yTitle='Frequency'
)

Visualizing Text Data

reviews_df_expanded['Average Word Length'].iplot(
kind='hist',
theme='polar',
title='Average Word Length',
xTitle='Average Word Length',
yTitle='Frequency'
)

Visualizing Text Data

sns.jointplot(
data=reviews_df_expanded,
x='Word Count',
y='Average Word Length',
kind='scatter',
hue='Rating',
palette='winter',
alpha=0.4
)

plt.savefig('../assets/E-Commerce_Clothing_Reviews_08.webp', bbox_inches='tight')

Visualizing Text Data

Categorical Plots

Department, Division and Class

departments = reviews_df_expanded['Department Name'].value_counts()
divisions = reviews_df_expanded['Division Name'].value_counts()
classes = reviews_df_expanded['Class Name'].value_counts()
Class NamesCountDivision NameCountDepartment NameCount
Dresses6145Tops10048General13365
Knits4626Dresses6145General Petite7837
Blouses2983Bottoms3662Initmates1426
Sweaters1380Intimate1653
Pants1350Jackets1002
Jeans1104Trend118
Fine gauge1059
Skirts903
Jackets683
Lounge669
Swim332
Outerwear319
Shorts304
Sleep214
Legwear158
Intimates147
Layering132
Trend118
Casual bottoms1
Chemises1
Name: Class Name, dtype: int64
reviews_df_expanded['Department Name'].value_counts().iplot(
kind='bar',
theme='polar',
title='Reviews by Department',
xTitle='Departments',
yTitle='Review Count'
)

Visualizing Text Data

reviews_df_expanded['Division Name'].value_counts().iplot(
kind='bar',
theme='polar',
title='Reviews by Division',
xTitle='Division',
yTitle='Review Count'
)

Visualizing Text Data

reviews_df_expanded['Class Name'].value_counts().iplot(
kind='bar',
theme='polar',
title='Reviews by Class',
xTitle='Class',
yTitle='Review Count'
)

Visualizing Text Data

Unigram, Bigram and Trigram

Get Top Words Function

# pre-processing example - get word frequency from string
x = ['i would have, you would have ordered it online because it is petite. i bought a petite and am 5\'8\".']
vec = CountVectorizer().fit(x)
vec.vocabulary_.items()

# vocabulary items and position
# dict_items([('would', 10), ('have', 4), ('you', 11), ('ordered', 8), ('it', 6), ('online', 7), ('because', 2), ('is', 5), ('petite', 9), ('bought', 3), ('and', 1), ('am', 0)])
words = vec.transform(x)
words_sum = words.sum(axis=0)
words_sum

# frequency of words (correlates to position index above)
# matrix([[1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1]])
word_frequency = []

for word, idx in vec.vocabulary_.items():
word_frequency.append((word, words_sum[0, idx]))

word_frequency = sorted(word_frequency, key=lambda x: x[1], reverse=True)

word_frequency
# [('would', 2),
# ('have', 2),
# ('it', 2),
# ('petite', 2),
# ('you', 1),
# ('ordered', 1),
# ('online', 1),
# ('because', 1),
# ('is', 1),
# ('bought', 1),
# ('and', 1),
# ('am', 1)]
def get_ngrams_by_frequency(nrange, data, n):
# ngram_range : tuple (min_n, max_n), default=(1, 1)
vec = CountVectorizer(ngram_range=nrange).fit(data)
vec.vocabulary_.items()

words = vec.transform(data)
words_sum = words.sum(axis=0)

word_frequency = []

for word, idx in vec.vocabulary_.items():
word_frequency.append((word, words_sum[0, idx]))

top_words = sorted(word_frequency, key=lambda data: data[1], reverse=True)

return top_words[:n]
# return 5 top words
get_ngrams_by_frequency((1, 1), x, 5)
# [('would', 2), ('have', 2), ('it', 2), ('petite', 2), ('you', 1)]

Unigrams Feature Extraction

top_20_unigrams = get_ngrams_by_frequency(
nrange=(1, 1),
data=reviews_df_expanded['Review Text'],
n=20
)

top_20_unigrams_df = pd.DataFrame(top_20_unigrams, columns=['Unigram', 'Frequency'])
top_20_unigrams_df = top_20_unigrams_df.set_index('Unigram')
top_20_unigrams_df
UnigramFrequency
the76159
it49162
and48993
is37696
this25758
to24592
in20722
not18781
but16549
on15325
for13994
of13472
was12923
with12797
so12017
am11625
my11027
dress10561
that9560
love8945
top_20_unigrams_df.iplot(
kind='bar',
theme='polar',
title='Top 20 Unigrams',
yTitle='Frequency',
xTitle='Unigram'
)

Visualizing Text Data

Bigrams Feature Extraction

top_20_bigrams = get_ngrams_by_frequency(
nrange=(2, 2),
data=reviews_df_expanded['Review Text'],
n=20
)

top_20_bigrams_df = pd.DataFrame(top_20_bigrams, columns=['Bigram', 'Frequency'])
top_20_bigrams_df = top_20_bigrams_df.set_index('Bigram')
top_20_bigrams_df
BigramFrequency
it is12528
in the7169
and the5611
this dress4755
on the4337
of the3933
and it3723
it was3287
does not3228
this top2937
this is2772
is very2725
the fabric2653
love the2641
did not2486
love this2331
to wear2266
but it2196
the dress2178
do not2118
top_20_bigrams_df.iplot(
kind='bar',
theme='polar',
title='Top 20 Bigrams',
yTitle='Frequency',
xTitle='Bigram'
)

Visualizing Text Data

Trigrams Feature Extraction

top_20_trigrams = get_ngrams_by_frequency(
nrange=(3, 3),
data=reviews_df_expanded['Review Text'],
n=20
)

top_20_trigrams_df = pd.DataFrame(top_20_trigrams, columns=['Trigram', 'Frequency'])
top_20_trigrams_df = top_20_trigrams_df.set_index('Trigram')
top_20_trigrams_df
TrigramFrequency
do not does1759
not does not1758
true to size1316
the fabric is1301
and it is1124
this dress is1123
it is very976
but it is921
it is not910
the material is896
if you are733
in the store728
on the model722
the fit is696
this top is672
the color is630
love this dress603
it is so585
tried it on574
and it was550
top_20_trigrams_df.iplot(
kind='bar',
theme='polar',
title='Top 20 Trigrams',
yTitle='Frequency',
xTitle='Trigram'
)

Visualizing Text Data

Removing Stop Words

def get_ngrams_no_stop_by_frequency(nrange, data, n):
# ngram_range : tuple (min_n, max_n), default=(1, 1)
vec = CountVectorizer(ngram_range=nrange, stop_words='english').fit(data)
vec.vocabulary_.items()

words = vec.transform(data)
words_sum = words.sum(axis=0)

word_frequency = []

for word, idx in vec.vocabulary_.items():
word_frequency.append((word, words_sum[0, idx]))

top_words = sorted(word_frequency, key=lambda data: data[1], reverse=True)

return top_words[:n]
top_20_unigrams_no_stop = get_ngrams_no_stop_by_frequency(
nrange=(1, 1),
data=reviews_df_expanded['Review Text'],
n=20
)

top_20_unigrams_no_stop_df = pd.DataFrame(top_20_unigrams_no_stop, columns=['Unigram', 'Frequency'])
top_20_unigrams_no_stop_df = top_20_unigrams_no_stop_df.set_index('Unigram')

top_20_unigrams_no_stop_df.iplot(
kind='bar',
theme='polar',
title='Top 20 Unigrams',
yTitle='Frequency',
xTitle='Unigram'
)

Visualizing Text Data

top_20_bigrams_no_stop = get_ngrams_no_stop_by_frequency(
nrange=(2, 2),
data=reviews_df_expanded['Review Text'],
n=20
)

top_20_bigrams_no_stop_df = pd.DataFrame(top_20_bigrams_no_stop, columns=['Bigram', 'Frequency'])
top_20_bigrams_no_stop_df = top_20_bigrams_no_stop_df.set_index('Bigram')

top_20_bigrams_no_stop_df.iplot(
kind='bar',
theme='polar',
title='Top 20 Bigrams',
yTitle='Frequency',
xTitle='Bigram'
)

Visualizing Text Data

top_20_trigrams_no_stop = get_ngrams_no_stop_by_frequency(
nrange=(3, 3),
data=reviews_df_expanded['Review Text'],
n=20
)

top_20_trigrams_no_stop_df = pd.DataFrame(top_20_trigrams_no_stop, columns=['Bigram', 'Frequency'])
top_20_trigrams_no_stop_df = top_20_trigrams_no_stop_df.set_index('Bigram')

top_20_trigrams_no_stop_df.iplot(
kind='bar',
theme='polar',
title='Top 20 Trigrams',
yTitle='Frequency',
xTitle='Trigrams'
)

Visualizing Text Data

Parts-of-Speech (PoS-Tags)

nltk.download('averaged_perceptron_tagger')
nltk.download('punkt')
nltk.download('tagsets')
blob = TextBlob(str(reviews_df_expanded['Review Text']))
pos_df = pd.DataFrame(blob.tags, columns=['Words', 'PoS'])
pos_df
WordsPoS
00CD
1AbsolutelyRB
2wonderfulJJ
3silkyNN
4andCC
...
106TextNNP
107LengthNNP
10822628CD
109dtypeNN
110objectNN
111 rows × 2 columns
nltk.help.upenn_tagset()
$: dollar
$ -$ --$ A$ C$ HK$ M$ NZ$ S$ U.S.$ US$
'': closing quotation mark
' ''
(: opening parenthesis
( [ {
): closing parenthesis
) ] }
,: comma
,
--: dash
--
.: sentence terminator
. ! ?
:: colon or ellipsis
: ; ...
CC: conjunction, coordinating
& 'n and both but either et for less minus neither nor or plus so
therefore times v. versus vs. whether yet
CD: numeral, cardinal
mid-1890 nine-thirty forty-two one-tenth ten million 0.5 one forty-
seven 1987 twenty '79 zero two 78-degrees eighty-four IX '60s .025
fifteen 271,124 dozen quintillion DM2,000 ...
DT: determiner
all an another any both del each either every half la many much nary
neither no some such that the them these this those
EX: existential there
there
FW: foreign word
gemeinschaft hund ich jeux habeas Haementeria Herr K'ang-si vous
lutihaw alai je jour objets salutaris fille quibusdam pas trop Monte
terram fiche oui corporis ...
IN: preposition or conjunction, subordinating
astride among uppon whether out inside pro despite on by throughout
below within for towards near behind atop around if like until below
next into if beside ...
JJ: adjective or numeral, ordinal
third ill-mannered pre-war regrettable oiled calamitous first separable
ectoplasmic battery-powered participatory fourth still-to-be-named
multilingual multi-disciplinary ...
JJR: adjective, comparative
bleaker braver breezier briefer brighter brisker broader bumper busier
calmer cheaper choosier cleaner clearer closer colder commoner costlier
cozier creamier crunchier cuter ...
JJS: adjective, superlative
calmest cheapest choicest classiest cleanest clearest closest commonest
corniest costliest crassest creepiest crudest cutest darkest deadliest
dearest deepest densest dinkiest ...
LS: list item marker
A A. B B. C C. D E F First G H I J K One SP-44001 SP-44002 SP-44005
SP-44007 Second Third Three Two * a b c d first five four one six three
two
MD: modal auxiliary
can cannot could couldn't dare may might must need ought shall should
shouldn't will would
NN: noun, common, singular or mass
common-carrier cabbage knuckle-duster Casino afghan shed thermostat
investment slide humour falloff slick wind hyena override subhumanity
machinist ...
NNP: noun, proper, singular
Motown Venneboerger Czestochwa Ranzer Conchita Trumplane Christos
Oceanside Escobar Kreisler Sawyer Cougar Yvette Ervin ODI Darryl CTCA
Shannon A.K.C. Meltex Liverpool ...
NNPS: noun, proper, plural
Americans Americas Amharas Amityvilles Amusements Anarcho-Syndicalists
Andalusians Andes Andruses Angels Animals Anthony Antilles Antiques
Apache Apaches Apocrypha ...
NNS: noun, common, plural
undergraduates scotches bric-a-brac products bodyguards facets coasts
divestitures storehouses designs clubs fragrances averages
subjectivists apprehensions muses factory-jobs ...
PDT: pre-determiner
all both half many quite such sure this
POS: genitive marker
' 's
PRP: pronoun, personal
hers herself him himself hisself it itself me myself one oneself ours
ourselves ownself self she thee theirs them themselves they thou thy us
PRP$: pronoun, possessive
her his mine my our ours their thy your
RB: adverb
occasionally unabatingly maddeningly adventurously professedly
stirringly prominently technologically magisterially predominately
swiftly fiscally pitilessly ...
RBR: adverb, comparative
further gloomier grander graver greater grimmer harder harsher
healthier heavier higher however larger later leaner lengthier less-
perfectly lesser lonelier longer louder lower more ...
RBS: adverb, superlative
best biggest bluntest earliest farthest first furthest hardest
heartiest highest largest least less most nearest second tightest worst
RP: particle
aboard about across along apart around aside at away back before behind
by crop down ever fast for forth from go high i.e. in into just later
low more off on open out over per pie raising start teeth that through
under unto up up-pp upon whole with you
SYM: symbol
% & ' '' ''. ) ). * + ,. < = > @ A[fj] U.S U.S.S.R * ** ***
TO: "to" as preposition or infinitive marker
to
UH: interjection
Goodbye Goody Gosh Wow Jeepers Jee-sus Hubba Hey Kee-reist Oops amen
huh howdy uh dammit whammo shucks heck anyways whodunnit honey golly
man baby diddle hush sonuvabitch ...
VB: verb, base form
ask assemble assess assign assume atone attention avoid bake balkanize
bank begin behold believe bend benefit bevel beware bless boil bomb
boost brace break bring broil brush build ...
VBD: verb, past tense
dipped pleaded swiped regummed soaked tidied convened halted registered
cushioned exacted snubbed strode aimed adopted belied figgered
speculated wore appreciated contemplated ...
VBG: verb, present participle or gerund
telegraphing stirring focusing angering judging stalling lactating
hankerin' alleging veering capping approaching traveling besieging
encrypting interrupting erasing wincing ...
VBN: verb, past participle
multihulled dilapidated aerosolized chaired languished panelized used
experimented flourished imitated reunifed factored condensed sheared
unsettled primed dubbed desired ...
VBP: verb, present tense, not 3rd person singular
predominate wrap resort sue twist spill cure lengthen brush terminate
appear tend stray glisten obtain comprise detest tease attract
emphasize mold postpone sever return wag ...
VBZ: verb, present tense, 3rd person singular
bases reconstructs marks mixes displeases seals carps weaves snatches
slumps stretches authorizes smolders pictures emerges stockpiles
seduces fizzes uses bolsters slaps speaks pleads ...
WDT: WH-determiner
that what whatever which whichever
WP: WH-pronoun
that what whatever whatsoever which who whom whosoever
WP$: WH-pronoun, possessive
whose
WRB: Wh-adverb
how however whence whenever where whereby whereever wherein whereof why
``: opening quotation mark
` ``
pos_df = pos_df['PoS'].value_counts()
pos_df
NN23
DT15
JJ13
CD11
PRP8
RB6
VBZ5
VBP5
IN5
CC4
NNP4
VBD4
TO3
NNS2
VB2
PDT1
Name: PoS, dtype: int64
pos_df.iplot(
kind='bar',
theme='polar',
title='Parts-of-Speech',
yTitle='Count',
xTitle='Tags'
)

Visualizing Text Data

Bivariant Analysis

sns.pairplot(
data=reviews_df_expanded,
hue='Rating',
palette='winter'
)

Visualizing Text Data

plt.rcParams['figure.figsize'] = [10,5]

sns.catplot(
data=reviews_df_expanded,
x='Department Name',
y='Polarity',
hue='Rating',
palette='winter',
height=5,
aspect=2
)

Visualizing Text Data

sns.catplot(
kind='box',
data=reviews_df_expanded,
x='Department Name',
y='Polarity',
hue='Rating',
palette='winter',
height=5,
aspect=2
)

Visualizing Text Data

sns.catplot(
kind='box',
data=reviews_df_expanded,
x='Department Name',
y='Review Length',
hue='Rating',
palette='winter',
height=5,
aspect=2
)

Visualizing Text Data

Recommendation Sentiment Polarity Distribution

recommended_ps = reviews_df_expanded[reviews_df_expanded['Recommended IND']==1]['Polarity']
recommended_ps.head(5)
Polarity
00.633333
10.339583
30.550000
40.512891
60.133750
not_recommended_ps = reviews_df_expanded[reviews_df_expanded['Recommended IND']==0]['Polarity']
not_recommended_ps.head(5)
Polarity
20.073675
50.178750
10-0.097149
22-0.045595
250.261508
trace_pos = go.Histogram(x=recommended_ps, name='Recommended', opacity=0.8)
trace_neg = go.Histogram(x=not_recommended_ps, name='Not Recommended', opacity=0.8)

data = [trace_pos, trace_neg]
layout = go.Layout(barmode='overlay', title='Recommendation Sentiment Polarity Distribution')

fig=go.Figure(data=data, layout=layout)
fig.data[0].marker.color = ('mediumspringgreen')
fig.data[1].marker.color = ('dodgerblue')
iplot(fig)

Visualizing Text Data

Recommendation Rating Distribution

recommended_rating_ps = reviews_df_expanded[reviews_df_expanded['Recommended IND']==1]['Rating']
not_recommended_rating_ps = reviews_df_expanded[reviews_df_expanded['Recommended IND']==0]['Rating']
trace_pos_rating = go.Histogram(x=recommended_rating_ps, name='Recommended', opacity=0.3)
trace_neg_rating = go.Histogram(x=not_recommended_rating_ps, name='Not Recommended', opacity=0.3)

data2 = [trace_pos_rating, trace_neg_rating]
layout2 = go.Layout(barmode='overlay', title='Recommendation Rating Distribution')

fig=go.Figure(data=data2, layout=layout2)
fig.data[0].marker.color = ('mediumspringgreen')
fig.data[1].marker.color = ('dodgerblue')
iplot(fig)

Visualizing Text Data

Joint Plot

sns.jointplot(
data=reviews_df_expanded,
y='Positive Feedback Count',
x='Polarity',
hue='Recommended IND',
palette='winter',
kind='kde',
fill=True
)

Visualizing Text Data