from sklearn.datasets import make_classification
예제
x, y = make_classification(n_samples=100, n_features=5, n_informative=2, random_state=7) # x는 지표, y는 정답, random_state : 랜덤 씨앗
x.shape # (100, 5)
import matplotlib.pyplot as plt
plt.scatter(x[:,0], x[:,1], s=2, c='r') # x의 0열, x의 1열, 크기는 2, 색은 빨강
plt.show()
예제
import pandas as pd
df = pd.read_csv('Salary_Data.csv')
df
'''
YearsExperience Salary
0 1.1 39343.0
1 1.3 46205.0
2 1.5 37731.0
3 2.0 43525.0
4 2.2 39891.0
5 2.9 56642.0
6 3.0 60150.0
7 3.2 54445.0
8 3.2 64445.0
9 3.7 57189.0
10 3.9 63218.0
11 4.0 55794.0
12 4.0 56957.0
13 4.1 57081.0
14 4.5 61111.0
15 4.9 67938.0
16 5.1 66029.0
17 5.3 83088.0
18 5.9 81363.0
19 6.0 93940.0
20 6.8 91738.0
21 7.1 98273.0
22 7.9 101302.0
23 8.2 113812.0
24 8.7 109431.0
25 9.0 105582.0
26 9.5 116969.0
27 9.6 112635.0
28 10.3 122391.0
29 10.5 121872.0
'''
plt.plot(df.iloc[:,0], df.iloc[:,1])
plt.xlabel('YearsExperience')
plt.ylabel('Salary')
plt.title('Salary Data')
plt.show()
예제
df = pd.read_csv('sample_classification.csv')
plt.scatter(df['x'],df['y'], c=df['label'])
plt.title('sample_classification')
plt.xlabel('X 좌표')
plt.ylabel('Y 좌표')
plt.show()