import pandas as pd import matplotlib.pyplot as plt import numpy as np from sklearn.linear_model import Perceptron """感知机模型""" # 数据线性可分,二分类数据 class Model: def __init__(self): # 初始w/b的值 self.w = np.ones(len(data[0]) - 1, dtype=np.float32) self.b = 0 self.l_rate = 0.1 # 求y的值 def sign(self, x, w, b): y = np.dot(x, w) + b return y # 梯度下降法(GD),根据损失函数的梯度,对w,b进行更新(请补全) def fit(self, x_train, y_train): is_wrong = False while not is_wrong: wrong_count = 0; for d in range(len(x_train)): x = x_train[d] y = y_train[d] if y * self.sign(x, self.w, self.b) <= 0: self.w = self.w + self.l_rate * np.dot(y, x) self.b = self.b + self.l_rate * y wrong_count += 1 if wrong_count == 0: is_wrong = True return 'perception model!' # 得分 def score(self): pass # 导入数据集 df = pd.read_csv('Iris.csv', usecols=[1, 2, 3, 4, 5]) """绘制训练集散点图,观察数据集的线性可分性""" # 绘制图形的画板尺寸为8*5 plt.figure(figsize=(8, 5)) # 散点图的x坐标、y坐标、标签(请补全三种标签散点分布图可视化) for species, group in df.groupby('Species'): plt.scatter(group['SepalLength'], group['SepalWidth'], alpha=0.8, label=species) plt.xlabel('SepalLength') plt.ylabel('SepalWidth') # '鸢尾花萼片的长度与宽度的散点分布' plt.title('Scattered distribution of length and width of iris sepals.') # 显示标签 plt.legend() plt.show() # 取前100条数据中的:前2个特征+标签用于训练 data = np.array(df.iloc[:100, [0, 1, -1]]) # 数据类型转换,为了后面的数学计算 X, y = data[:, :-1], data[:, -1] y = np.array([1 if i == 'Iris-setosa' else -1 for i in y]) """感知机模型,开始训练""" # (请补全感知机模型训练) perceptron = Model() # 训练模型 perceptron.fit(X, y) # 最终参数 print(perceptron.w, perceptron.b) # 绘图 x_points = np.linspace(4, 7, 10) y_ = -(perceptron.w[0] * x_points + perceptron.b) / perceptron.w[1] plt.plot(x_points, y_) plt.scatter(df[:50]['SepalLength'], df[:50]['SepalWidth'], label='Iris-setosa') plt.scatter(df[50:100]['SepalLength'], df[50:100]['SepalWidth'], label='Iris-versicolor') plt.xlabel('SepalLength') plt.ylabel('SepalWidth') # '感知机模型训练结果' plt.title('Training results of Perceptron.') plt.legend() plt.show()