function information_gain_ratio = calculate_information_gain_ratio(X, y)
    % 计算原始数据集的熵
    entropyD = calculate_entropy(y);
    
    num_features = size(X, 2);
    information_gain = zeros(1, num_features);
    split_information = zeros(1, num_features);
    
    for i = 1:num_features
        feature_values = unique(X(:, i));  % 特征 i 的取值
        num_values = length(feature_values);
        entropyDA = 0;
        splitA = 0;
        
        for j = 1:num_values
            value = feature_values(j);
            subset_indices = X(:, i) == value;  % 特征 i 取值为 value 的样本索引
            subset_labels = y(subset_indices);  % 特征 i 取值为 value 的样本标签
            subset_entropy = calculate_entropy(subset_labels);  % 计算子集的熵
            
            subset_prob = sum(subset_indices) / length(y);  % 计算特征 i 取值为 value 的概率
            if subset_prob > 0
                entropyDA = entropyDA + subset_prob * subset_entropy;  % 累加条件熵
                splitA = splitA - subset_prob * log2(subset_prob);  % 累加分裂信息
            end
        end
        
        % 计算信息增益和分裂信息
        information_gain(i) = entropyD - entropyDA;
        split_information(i) = splitA;
    end
    
    % 避免分母为0，将分裂信息为0的情况设为一个极小的非零值
    split_information(split_information == 0) = eps;
    
    % 计算信息增益率
    information_gain_ratio = information_gain ./ split_information;
end

function entropy = calculate_entropy(labels)
    class_counts = tabulate(labels);  % 统计各类别的频数
    total_samples = sum(class_counts(:, 2));  % 总样本数
    class_probabilities = class_counts(:, 2) / total_samples;  % 计算各类别的概率
    
    % 将概率为0的情况设为一个极小的非零值
    class_probabilities(class_probabilities == 0) = eps;
    
    entropy = -sum(class_probabilities .* log2(class_probabilities));  % 计算熵
end
