import numpy as np
import matplotlib.pyplot as plt
from pykalman import KalmanFilter
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import pandas as pd
import math


file_path1 = 'D:\\小论文\\小论文2\\experiment_content\\dealt_data\\panama_power.csv'
file_path2 = 'D:\\小论文\\小论文2\\experiment_content\\dealt_data\\tetouan_region1_power.csv'
file_path3 = 'D:\\小论文\\小论文2\\experiment_content\\dealt_data\\tetouan_region2_power.csv'
file_path4 = 'D:\\小论文\\小论文2\\experiment_content\\dealt_data\\tetouan_region3_power.csv'


# 读取CSV文件
df = pd.read_csv(file_path3)
# 取df第二列
df = df.iloc[:, 3]
# 将数据转换为NumPy数组
power = df.values
power = power.astype('float32')
observed_values = power.reshape(-1)


# 初始化卡尔曼滤波器
kf = KalmanFilter(
    transition_matrices=[1],               # 状态转移矩阵（假设状态是标量）
    observation_matrices=[1],              # 观测矩阵（观测值与状态直接相关）
    initial_state_mean=observed_values[0], # 初始状态均值（设为第一个观测值）
    initial_state_covariance=0.5,            # 初始状态协方差（不确定性）
    observation_covariance=0.01,              # 观测噪声协方差（越大说明噪声越大）
    transition_covariance=0.03             # 过程噪声协方差（系统动态的不确定性）
)

# 使用卡尔曼滤波平滑数据
filtered_state_means, _ = kf.filter(observed_values)
filtered_values = filtered_state_means.flatten()

# calculate RMSE
rmse = math.sqrt(mean_squared_error(observed_values, filtered_values))
print('Test RMSE: %.3f' % rmse)

# calculate R^2
R2 = r2_score(observed_values, filtered_values)
print(f"R-squared (R^2): {R2:.3f}")

# calculate MAE
mae = mean_absolute_error(observed_values, filtered_values)
print('Test MAE: %.3f' % mae)


# 绘制结果
plt.figure(figsize=(12, 6))
plt.plot(observed_values, label='Noisy Observations', alpha=0.5)
plt.plot(filtered_values, label='Kalman Filtered', linewidth=2)
plt.legend()
plt.title('Kalman Filter for Time Series Denoising')
plt.show()