1 분 소요

Hierar

Cust_Spend_Data.csv 파일을 통해서 고객의 의류소비, 음료소비, 음식소비 대이터를 통해서 비슷한 고객으로 그루핑 하자

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
df = pd.read_csv('Cust_Spend_Data.csv')
df.head()
Cust_ID Name Avg_Mthly_Spend No_Of_Visits Apparel_Items FnV_Items Staples_Items
0 1 A 10000 2 1 1 0
1 2 B 7000 3 0 10 9
2 3 C 7000 7 1 3 4
3 4 D 6500 5 1 1 4
4 5 E 6000 6 0 12 3
X = df.iloc[:,2:]
import scipy.cluster.hierarchy as sch
sch.dendrogram(sch.linkage(X,method='ward'))
plt.title('Dendrogram')
plt.xlabel('Customers')
plt.ylabel('Euclidean distance')
plt.show()

png

from sklearn.cluster import AgglomerativeClustering
# 덴드로그램에서 가장 적절한 기준으로 그룹을 묶는다면 3개의 그룹이 적당할거 같다.
hc = AgglomerativeClustering(n_clusters=3)
y_pred = hc.fit_predict(X)
df['Gruop'] = y_pred
df
Cust_ID Name Avg_Mthly_Spend No_Of_Visits Apparel_Items FnV_Items Staples_Items Gruop
0 1 A 10000 2 1 1 0 2
1 2 B 7000 3 0 10 9 1
2 3 C 7000 7 1 3 4 1
3 4 D 6500 5 1 1 4 1
4 5 E 6000 6 0 12 3 1
5 6 F 4000 3 0 1 8 0
6 7 G 2500 5 0 11 2 0
7 8 H 2500 3 0 1 1 0
8 9 I 2000 2 0 2 2 0
9 10 J 1000 4 0 1 7 0
sb.pairplot(data=X)
plt.show()

png

df.corr()
Cust_ID Avg_Mthly_Spend No_Of_Visits Apparel_Items FnV_Items Staples_Items Gruop
Cust_ID 1.000000 -0.972438 -0.129550 -0.645777 -0.183822 -0.012012 -0.892269
Avg_Mthly_Spend -0.972438 1.000000 0.102229 0.715422 0.081890 -0.107431 0.960886
No_Of_Visits -0.129550 0.102229 1.000000 0.270666 0.376214 0.042796 0.093495
Apparel_Items -0.645777 0.715422 0.270666 1.000000 -0.387325 -0.301169 0.723747
FnV_Items -0.183822 0.081890 0.376214 -0.387325 1.000000 0.093025 0.074517
Staples_Items -0.012012 -0.107431 0.042796 -0.301169 0.093025 1.000000 -0.208063
Gruop -0.892269 0.960886 0.093495 0.723747 0.074517 -0.208063 1.000000

댓글남기기