# Install and Load Required Libraries
install.packages(c("tuber", "httpuv", "quanteda", "ggplot2", "dplyr", "tm", "wordcloud"))

# Load libraries
library(tuber)
library(httpuv)
library(quanteda)
library(ggplot2)
library(dplyr)
library(tm)
library(wordcloud)

# Authenticate with YouTube Data API v3
yt_oauth(api_key = "AIzaSyDAXUBDSYMwLdieAG0W4lmSToy53f8fTVk")

# Fetch Channel Stats (CNN Channel)
cnn_channel_id <- "UCupvZG-5ko_eiXAupbDfxWw"
cnn_stats <- get_channel_stats(cnn_channel_id)
print(cnn_stats)

# Fetch Videos from CNN’s YouTube Channel
cnn_videos <- yt_search(term = "news", channel_id = cnn_channel_id, max_results = 5)
print(cnn_videos)

# Fetch Video Details and Comments
video_id <- cnn_videos$video_id[1]
video_details <- get_video_details(video_id)
print(video_details)

# Fetch Comments from the First Video
comments <- get_comment_threads(video_id = video_id, max_results = 50)
comment_text <- comments$textDisplay
print(comment_text)

# Analyze Video Stats
video_stats <- do.call(rbind, lapply(cnn_videos$video_id, function(id) {
  details <- get_video_details(id)
  data.frame(
    Title = details$snippet$title,
    Views = as.numeric(details$statistics$viewCount),
    Likes = as.numeric(details$statistics$likeCount),
    Comments = as.numeric(details$statistics$commentCount),
    stringsAsFactors = FALSE
  )
}))

# Print Video Stats
print(video_stats)

# Average Stats
avg_views <- mean(video_stats$Views, na.rm = TRUE)
avg_likes <- mean(video_stats$Likes, na.rm = TRUE)
avg_comments <- mean(video_stats$Comments, na.rm = TRUE)

cat("Average Views:", avg_views, "\n")
cat("Average Likes:", avg_likes, "\n")
cat("Average Comments:", avg_comments, "\n")

# Create a Visualization of Video Stats
ggplot(video_stats, aes(x = Views, y = Likes, label = Title)) +
  geom_point(color = "blue", size = 3) +
  geom_text(aes(label = Title), hjust = 1.1, vjust = 1.1, size = 3) +
  labs(title = "CNN YouTube Video Stats: Views vs Likes", x = "Views", y = "Likes") +
  theme_minimal()

# Text Analysis of Comments with Quanteda
corpus <- corpus(comment_text)
dfm <- dfm(corpus, remove_punct = TRUE, remove = stopwords("en"))
top_words <- topfeatures(dfm, 10)
print(top_words)

# Generate Word Cloud
set.seed(123)
text <- tolower(unlist(comment_text))
word_freq <- table(unlist(strsplit(text, "\\W+")))
word_freq_df <- as.data.frame(word_freq, stringsAsFactors = FALSE)
colnames(word_freq_df) <- c("Word", "Freq")
word_freq_df <- word_freq_df %>% filter(!Word %in% stopwords("en"))

# Create Word Cloud
wordcloud(words = word_freq_df$Word, freq = word_freq_df$Freq, max.words = 50, colors = brewer.pal(8, "Dark2"))

# Sentiment Analysis Visualization
comment_tokens <- tokens(corpus, remove_punct = TRUE)
dfm_sentiment <- dfm(comment_tokens, remove = stopwords("en"))

# Word Frequency Bar Plot
textstat_frequency(dfm_sentiment, n = 10) %>%
  ggplot(aes(x = reorder(feature, frequency), y = frequency)) +
  geom_bar(stat = "identity", fill = "darkblue") +
  coord_flip() +
  labs(title = "Most Common Words in CNN YouTube Comments", x = "Words", y = "Frequency") +
  theme_minimal()