import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# 加载和预处理数据
data = pd.read_csv('data.csv')
data['diagnosis'] = data['diagnosis'].map({'M': 1, 'B': 0})
data = data.sample(frac=1).reset_index(drop=True)

# 划分训练集和测试集
train_size = int(0.7 * len(data))
X_train = data.iloc[:train_size, 1:-1].values  # 假设最后一列之前都是特征
y_train = data.iloc[:train_size, -1].values
X_test = data.iloc[train_size:, 1:-1].values
y_test = data.iloc[train_size:, -1].values

# 标准化特征
X_mean = X_train.mean(axis=0)
X_std = X_train.std(axis=0)
X_train = (X_train - X_mean) / X_std
X_test = (X_test - X_mean) / X_std

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def cost(theta, X, y):
    m = len(y)
    h = sigmoid(X.dot(theta))
    epsilon = 1e-5  # 避免对0取对数
    return (-1/m) * np.sum(y * np.log(h + epsilon) + (1 - y) * np.log(1 - h + epsilon))

def gradient_descent(X, y, theta, learning_rate, iterations):
    m = len(y)
    cost_history = []
    for _ in range(iterations):
        gradient = X.T.dot(sigmoid(X.dot(theta)) - y) / m
        theta -= learning_rate * gradient
        cost_history.append(cost(theta, X, y))
    return theta, cost_history

def predict_proba(X, theta):
    return sigmoid(X.dot(theta))

def predict(X, theta, threshold=0.5):
    return predict_proba(X, theta) >= threshold

# 添加偏置项
X_train = np.concatenate([np.ones((X_train.shape[0], 1)), X_train], axis=1)
X_test = np.concatenate([np.ones((X_test.shape[0], 1)), X_test], axis=1)

# 初始化参数
theta = np.zeros(X_train.shape[1])

# 训练模型
theta, cost_history = gradient_descent(X_train, y_train, theta, 0.01, 1000)

# 输出成本历史以监控训练过程
plt.plot(cost_history)
plt.xlabel('Iteration')
plt.ylabel('Cost')
plt.title('Cost Function History')
plt.show()

# 进行预测
y_pred = predict(X_test, theta)

# 手动计算性能指标
accuracy = np.mean(y_pred == y_test)
precision = np.sum((y_pred == 1) & (y_test == 1)) / np.sum(y_pred == 1)
recall = np.sum((y_pred == 1) & (y_test == 1)) / np.sum(y_test == 1)
f1 = 2 * precision * recall / (precision + recall)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)