예제
df = pd.read_csv('Salary_Data.csv')
YearsExperience | Salary | |
---|---|---|
0 | 1.1 | 39343.0 |
1 | 1.3 | 46205.0 |
2 | 1.5 | 37731.0 |
3 | 2.0 | 43525.0 |
4 | 2.2 | 39891.0 |
5 | 2.9 | 56642.0 |
6 | 3.0 | 60150.0 |
7 | 3.2 | 54445.0 |
8 | 3.2 | 64445.0 |
9 | 3.7 | 57189.0 |
10 | 3.9 | 63218.0 |
11 | 4.0 | 55794.0 |
12 | 4.0 | 56957.0 |
13 | 4.1 | 57081.0 |
14 | 4.5 | 61111.0 |
15 | 4.9 | 67938.0 |
16 | 5.1 | 66029.0 |
17 | 5.3 | 83088.0 |
18 | 5.9 | 81363.0 |
19 | 6.0 | 93940.0 |
20 | 6.8 | 91738.0 |
21 | 7.1 | 98273.0 |
22 | 7.9 | 101302.0 |
23 | 8.2 | 113812.0 |
24 | 8.7 | 109431.0 |
25 | 9.0 | 105582.0 |
26 | 9.5 | 116969.0 |
27 | 9.6 | 112635.0 |
28 | 10.3 | 122391.0 |
29 | 10.5 | 121872.0 |
df.corr()
YearsExperience | Salary | |
---|---|---|
YearsExperience | 1.000000 | 0.978242 |
Salary | 0.978242 | 1.000000 |
예제
df = pd.read_csv('customer_data.csv')
label | id | fea_1 | fea_2 | fea_3 | fea_4 | fea_5 | fea_6 | fea_7 | fea_8 | fea_9 | fea_10 | fea_11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 54982665 | 5 | 1245.5 | 3 | 77000.0 | 2 | 15 | 5 | 109 | 5 | 151300 | 244.948974 |
1 | 0 | 59004779 | 4 | 1277.0 | 1 | 113000.0 | 2 | 8 | -1 | 100 | 3 | 341759 | 207.173840 |
2 | 0 | 58990862 | 7 | 1298.0 | 1 | 110000.0 | 2 | 11 | -1 | 101 | 5 | 72001 | 1.000000 |
3 | 1 | 58995168 | 7 | 1335.5 | 1 | 151000.0 | 2 | 11 | 5 | 110 | 3 | 60084 | 1.000000 |
4 | 0 | 54987320 | 7 | NaN | 2 | 59000.0 | 2 | 11 | 5 | 108 | 4 | 450081 | 197.403141 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1120 | 0 | 58988196 | 5 | 1289.0 | 1 | 173000.0 | 2 | 15 | 5 | 112 | 3 | 350702 | 200.000000 |
1121 | 0 | 58987926 | 5 | NaN | 2 | 50000.0 | 2 | 15 | 5 | 108 | 4 | 450000 | 169.000000 |
1122 | 0 | 58995381 | 7 | 1220.0 | 3 | 76000.0 | 2 | 11 | 2 | 90 | 5 | 71002 | 1.000000 |
1123 | 0 | 58998054 | 4 | 1250.0 | 3 | 137000.0 | 2 | 8 | 5 | 90 | 5 | 72000 | 1.000000 |
1124 | 0 | 54989781 | 4 | 1415.0 | 3 | 93000.0 | 2 | 8 | 5 | 113 | 4 | 151300 | 273.861279 |
c = df.corr()
label | id | fea_1 | fea_2 | fea_3 | fea_4 | fea_5 | fea_6 | fea_7 | fea_8 | fea_9 | fea_10 | fea_11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
label | 1.000000e+00 | -0.024619 | 0.052071 | -0.078808 | 0.055658 | -0.131459 | 0.017293 | 0.028907 | -0.012271 | -0.021251 | 5.590642e-17 | 0.000881 | 0.002444 |
id | -2.461878e-02 | 1.000000 | -0.094888 | 0.037017 | -0.011086 | -0.015092 | -0.046856 | -0.066836 | -0.027143 | 0.025433 | -1.304576e-02 | 0.023954 | 0.029668 |
fea_1 | 5.207118e-02 | -0.094888 | 1.000000 | -0.012046 | 0.004391 | -0.105110 | -0.020977 | 0.346629 | -0.030144 | 0.049951 | -5.500809e-02 | 0.108505 | 0.095053 |
fea_2 | -7.880778e-02 | 0.037017 | -0.012046 | 1.000000 | -0.246883 | 0.284455 | 0.003515 | 0.001176 | 0.003682 | -0.022591 | 9.718013e-02 | -0.029273 | 0.059464 |
fea_3 | 5.565787e-02 | -0.011086 | 0.004391 | -0.246883 | 1.000000 | -0.119909 | -0.017062 | 0.161899 | 0.253059 | 0.025249 | 1.285708e-01 | -0.117266 | 0.066774 |
fea_4 | -1.314593e-01 | -0.015092 | -0.105110 | 0.284455 | -0.119909 | 1.000000 | 0.035862 | -0.137654 | -0.046294 | -0.105451 | 1.000482e-01 | 0.066553 | 0.039042 |
fea_5 | 1.729286e-02 | -0.046856 | -0.020977 | 0.003515 | -0.017062 | 0.035862 | 1.000000 | 0.055280 | -0.017898 | 0.003813 | -2.165627e-02 | 0.105127 | -0.101328 |
fea_6 | 2.890698e-02 | -0.066836 | 0.346629 | 0.001176 | 0.161899 | -0.137654 | 0.055280 | 1.000000 | -0.010971 | 0.028963 | 6.183006e-02 | 0.216362 | 0.128467 |
fea_7 | -1.227143e-02 | -0.027143 | -0.030144 | 0.003682 | 0.253059 | -0.046294 | -0.017898 | -0.010971 | 1.000000 | 0.078722 | -1.652961e-02 | -0.245453 | 0.021797 |
fea_8 | -2.125122e-02 | 0.025433 | 0.049951 | -0.022591 | 0.025249 | -0.105451 | 0.003813 | 0.028963 | 0.078722 | 1.000000 | -1.009981e-01 | 0.095880 | 0.133031 |
fea_9 | 5.590642e-17 | -0.013046 | -0.055008 | 0.097180 | 0.128571 | 0.100048 | -0.021656 | 0.061830 | -0.016530 | -0.100998 | 1.000000e+00 | -0.038176 | -0.048769 |
fea_10 | 8.808212e-04 | 0.023954 | 0.108505 | -0.029273 | -0.117266 | 0.066553 | 0.105127 | 0.216362 | -0.245453 | 0.095880 | -3.817559e-02 | 1.000000 | 0.271108 |
fea_11 | 2.444255e-03 | 0.029668 | 0.095053 | 0.059464 | 0.066774 | 0.039042 | -0.101328 | 0.128467 | 0.021797 | 0.133031 | -4.876862e-02 | 0.271108 | 1.000000 |
uc = c.unstack()
uc.sort_values(kind='quicksort', ascending=False) # quicksort 방식 정렬, 내림차순
sorted = uc.sort_values(kind='quicksort', ascending=False) # quicksort 방식 정렬, 내림차순
sorted.head(20)
label label 1.000000 id id 1.000000 fea_10 fea_10 1.000000 fea_9 fea_9 1.000000 fea_8 fea_8 1.000000 fea_7 fea_7 1.000000 fea_6 fea_6 1.000000 fea_4 fea_4 1.000000 fea_3 fea_3 1.000000 fea_2 fea_2 1.000000 fea_1 fea_1 1.000000 fea_5 fea_5 1.000000 fea_11 fea_11 1.000000 fea_6 fea_1 0.346629 fea_1 fea_6 0.346629 fea_2 fea_4 0.284455 fea_4 fea_2 0.284455 fea_11 fea_10 0.271108 fea_10 fea_11 0.271108 fea_7 fea_3 0.253059 dtype: float64
예제
plt.scatter(df['fea_6'],df['fea_1'])
사인파형 곡선 만들기
# 사인파형 곡선이 표시되도록 라인차트를 그려보세요
# x : 0 ~ 360
# y : np.sin()
deg = np.arange(0, 360)
rad = np.radians(deg)
sinv = np.sin(rad)
cosv = np.cos(rad)
plt.scatter(deg, sinv, c='b', s=2, label='Sine Value')
plt.scatter(deg, cosv, c='r', s=2, label='Cosine Value')
plt.xlabel('Degree')
plt.ylabel('Trig.values')
plt.legend()
plt.show()
arr = np.random.normal(5,1,100) # 평균 5, 표준편차 1, 갯수 100
plt.hist(arr) # 히스토그램(데이터 빈도수 표시)
plt.show()