# Sample data creation set.seed(0) data <- matrix(runif(300), nrow=100, ncol=3) # 100 samples with 3 features # Standardize the data data_scaled <- scale(data) # Perform PCA pca <- prcomp(data_scaled, center = TRUE, scale. = TRUE) # Determine the number of components to retain 95% of the variance explained_variance <- pca$sdev^2 / sum(pca$sdev^2) cumulative_variance <- cumsum(explained_variance) num_components <- which(cumulative_variance >= 0.95)[1] # Create a data frame with the selected principal components principal_components <- data.frame(pca$x[, 1:num_components]) colnames(principal_components) <- paste0("PC", 1:num_components) # Plot the principal components library(ggplot2) ggplot(principal_components, aes_string(x = colnames(principal_components)[1], y = colnames(principal_components)[2])) + geom_point() + ggtitle("PCA with 95% Variance Retained") + xlab("Principal Component 1") + ylab("Principal Component 2") # Explained variance and number of components print(explained_variance) print(paste("Number of components selected:", num_components))