import pandas as pd from sklearn.model_selection import train_test_split from sklearn.naive_bayes import MultinomialNB from sklearn.feature_extraction.text import CountVectorizer from sklearn.preprocessing import LabelEncoder from sklearn.metrics import accuracy_score, classification_report def spam_classifier(): df = pd.read_csv("https://raw.githubusercontent.com/justmarkham/pycon-2016-tutorial/master/data/sms.tsv", sep='\t', header=None, names=['label', 'message']) X = df['message'] y = LabelEncoder().fit_transform(df['label']) vectorizer = CountVectorizer() X_vec = vectorizer.fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.2, random_state=42) model = MultinomialNB() model.fit(X_train, y_train) y_pred = model.predict(X_test) print("📨 垃圾邮件分类器") print("Accuracy:", accuracy_score(y_test, y_pred)) print(classification_report(y_test, y_pred))