import numpy as np
from numpy import isnan
import pandas as pd
from sklearn.impute import KNNImputer
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import plotly.graph_objs as go
import plotly.express as px
from scipy.stats import shapiro
from scipy.stats import chi2_contingency
from scipy.stats import chi2
import scipy.stats as stats
from numpy import median
from numpy import std
from IPython.display import Image
import warnings
warnings.filterwarnings('ignore')
dataset = pd.read_csv('marketing_data.csv')
dataset.head()
ID
Year_Birth
Education
Marital_Status
Income
Kidhome
Teenhome
Dt_Customer
Recency
MntWines
…
NumStorePurchases
NumWebVisitsMonth
AcceptedCmp3
AcceptedCmp4
AcceptedCmp5
AcceptedCmp1
AcceptedCmp2
Response
Complain
Country
0
1826
1970
Graduation
Divorced
84,835.00
0
0
6/16/14
0
189
…
6
1
0
0
0
0
0
1
0
SP
1
1
1961
Graduation
Single
57,091.00
0
0
6/15/14
0
464
…
7
5
0
0
0
0
1
1
0
CA
2
10476
1958
Graduation
Married
67,267.00
0
1
5/13/14
0
134
…
5
2
0
0
0
0
0
0
0
US
3
1386
1967
Graduation
Together
32,474.00
1
1
05-11-2014
0
10
…
2
7
0
0
0
0
0
0
0
AUS
4
5371
1989
Graduation
Single
21,474.00
1
0
04-08-2014
0
6
…
2
7
1
0
0
0
0
1
0
SP
dataset.rename(columns = {' Income ' :'Income'}, inplace = True) # Rename the column
dataset['Income'] = dataset['Income'].str.replace(',', '')
dataset['Income'] = dataset['Income'].astype(float)