## Trending YouTube Videos

The purpose of this notebook is to retrieve the top 200 trending videos in the US (US), UK (GB) and Canada (CA) using the YouTube data API v3. The channel details are also fetched for each video. The output is written in the following format:


Format: [regionTrending, trendingRank, timeFetched, videoId, videoTitle, videoCategoryId, videoPublishTime, videoDuration,  videoTags, videoViews, videoLikes, videoDislikes, videoCommentCount, videoDescription, vieoLicenced, channelName, channelId, channelDescription, channelPublishedAt, channelViewCount, channelSubsCount, channelVideoCount]

In [1]:
api_key = "AIzaSyAjFH7uhw4uG2g2Y8jK0RsL__X8s-MNfyM" # Enter your API key here

In [2]:
from googleapiclient.discovery import build


import googleapiclient.errors
import datetime    #To timestamp the videos
import csv


In [3]:
youtube = build('youtube', 'v3', developerKey=api_key)
UTCnow = datetime.datetime.utcnow()    #UTC standard time. Not GMT
resultsPerPage = 50    #set to 50 

# US
trendingListUS = []
nextToken = ''
#There are 4 pages with 50 results each. Total of 200 Videos
for currentPage in range(4):
    reqTrend = youtube.videos().list(
        part="snippet,contentDetails,statistics",
        chart="mostPopular",
        regionCode="US",
    maxResults=resultsPerPage,
    pageToken = nextToken
    )
    resTrend = reqTrend.execute()
    trendingListUS.extend(resTrend['items'])
    if 'nextPageToken' not in resTrend:
        break
    nextToken = resTrend['nextPageToken']

#Adding timestamp, trendingRank and region name to each entry
trendingRank = 1
for video in trendingListUS:
    video['timeFetched'] = UTCnow
    video['region'] = 'US'
    video['trendingRank'] = trendingRank
    trendingRank = trendingRank + 1
#print(trendingListUS)


# GB - Great Britain
trendingListGB = []
nextToken = ''
#There are 4 pages with 50 results each. Total of 200 Videos
for currentPage in range(4):
    reqTrend = youtube.videos().list(
        part="snippet,contentDetails,statistics",
        chart="mostPopular",
        regionCode="GB",
    maxResults=resultsPerPage,
    pageToken = nextToken
    )
    resTrend = reqTrend.execute()
    trendingListGB.extend(resTrend['items'])
    if 'nextPageToken' not in resTrend:
        break
    nextToken = resTrend['nextPageToken']

trendingRank = 1
for video in trendingListGB:
    video['timeFetched'] = UTCnow
    video['region'] = 'GB'
    video['trendingRank'] = trendingRank
    trendingRank = trendingRank + 1
#print(trendingListGB)

# CA - Canada
trendingListCA = []
nextToken = ''
#There are 4 pages with 50 results each. Total of 200 Videos
for currentPage in range(4):
    reqTrend = youtube.videos().list(
        part="snippet,contentDetails,statistics",
        chart="mostPopular",
        regionCode="CA",
    maxResults=resultsPerPage,
    pageToken = nextToken
    )
    resTrend = reqTrend.execute()
    trendingListCA.extend(resTrend['items'])
    if 'nextPageToken' not in resTrend:
        break
    nextToken = resTrend['nextPageToken']

trendingRank = 1
for video in trendingListCA:
    video['timeFetched'] = UTCnow
    video['region'] = 'CA'
    video['trendingRank'] = trendingRank
    trendingRank = trendingRank + 1
#print(trendingListCA)

In [4]:
# Combining the lists
trendingList =[]
trendingListFormatted = []
trendingList.extend(trendingListUS)
trendingList.extend(trendingListGB)
trendingList.extend(trendingListCA)



In [5]:
# Fetching channel details in order

channelList = []
for ii in range(len(trendingList)):
    reqChannel = youtube.channels().list(part="snippet,statistics",
        id=trendingList[ii]['snippet']["channelId"])
    channelRes = reqChannel.execute()
    #print(channelRes)
    channelList.extend([channelRes['items']])
    trendingList[ii]["channelInfoDump"] = channelRes['items'][0]
#print(channelList)




In [6]:
# Dumping all collected data into a file for backup

fileName = 'dataDump' + UTCnow.strftime("%d-%b-%Y-%H-%M-%S") + '.txt'

print(f"File name of Data dump: {fileName}")
#print(str(trendingList[0]))
with open(fileName, 'w',encoding='utf-8') as f:
    for line in trendingList:
            f.write(str(line) + '\n')


File name of Data dump: dataDump04-Jul-2020-15-50-29.txt


In [7]:
#Checking if certain dict items are present in the list

for ii in range(len(trendingList)):
    if 'tags' not in trendingList[ii]['snippet']:
        trendingList[ii]['snippet']['tags'] = ''
    if 'title' not in trendingList[0]['snippet']:
        trendingList[ii]['snippet']['title'] = ''
    if 'categoryId' not in trendingList[0]['snippet']:
        trendingList[ii]['snippet']['categoryId'] = ''
    if 'description' not in trendingList[0]['snippet']:
        trendingList[ii]['snippet']['description'] = ''
    if 'likeCount' not in trendingList[ii]['statistics']:
        trendingList[ii]['statistics']['likeCount'] = ''
    if 'viewCount' not in trendingList[ii]['statistics']:
        trendingList[ii]['statistics']['viewCount'] = ''
    if 'dislikeCount' not in trendingList[ii]['statistics']:
        trendingList[ii]['statistics']['dislikeCount'] = ''
    if 'commentCount' not in trendingList[ii]['statistics']:
        trendingList[ii]['statistics']['commentCount'] = ''
    if 'licensedContent' not in trendingList[ii]['contentDetails']:
        trendingList[ii]['contentDetails']['licensedContent'] = ''
# Format: [regionTrending, trendingRank, timeFetched, videoId,videoTitle,videoCategoryId,videoPublishTime,videoDuration, videoTags,videoViews,videoLikes,videoDislikes,videoCommentCount,videoDescription, vieoLicenced, channelName, channelId, channelDescription, channelPublishedAt, channelViewCount, channelSubsCount, channelVideoCount]

headerLine = ['regionTrending', 'trendingRank', 'timeFetched', 'videoId', 'videoTitle', 'videoCategoryId', 'videoPublishTime', 'videoDuration',  'videoTags', 'videoViews', 'videoLikes', 'videoDislikes', 'videoCommentCount', 'videoDescription', 'vieoLicenced', 'channelName', 'channelId', 'channelDescription',  'channelPublishedAt', 'channelViewCount', 'channelSubsCount', 'channelVideoCount']

# print(trendingList[0]['region'], trendingList[0]['trendingRank'], trendingList[0]['timeFetched'], trendingList[0]['id'], trendingList[0]['snippet']['title'], trendingList[0]['snippet']['categoryId'], trendingList[0]['snippet']['publishedAt'], trendingList[0]['contentDetails']['duration'], trendingList[0]['snippet']['tags'])

# print(trendingList[0]['region'], trendingList[0]['trendingRank'], trendingList[0]['timeFetched'], trendingList[0]['id'], trendingList[0]['snippet']['title'], trendingList[0]['snippet']['categoryId'], trendingList[0]['snippet']['publishedAt'], trendingList[0]['contentDetails']['duration'], trendingList[0]['snippet']['tags'], trendingList[0]['statistics']['viewCount'], trendingList[0]['statistics']['likeCount'], trendingList[0]['statistics']['dislikeCount'], trendingList[0]['statistics']['commentCount'], trendingList[0]['snippet']['description'], trendingList[0]['contentDetails']['licensedContent'], trendingList[0]['channelInfoDump']['snippet']['title'], trendingList[0]['channelInfoDump']['id'], trendingList[0]['channelInfoDump']['snippet']['description'], trendingList[0]['channelInfoDump']['snippet']['publishedAt'], trendingList[0]['channelInfoDump']['statistics']['viewCount'], trendingList[0]['channelInfoDump']['statistics']['subscriberCount'], trendingList[0]['channelInfoDump']['statistics']['videoCount'])
# channelName, channelDescription, channelPublishedAt, channelViewCount, channelSubsCount, channelVideoCount



In [8]:
# Creating list of lists

trendingFinalList = []
trendingFinalList.append(headerLine)
#print(trendingFinalList)
tempData = []
for ii in range(len(trendingList)):
    videoDescNew = trendingList[ii]['snippet']['description'].replace("\n","\\n")
    videoDescNew = videoDescNew.replace("\r","\\r")
    #videoDescNew = videoDescNew.replace("\u","\\u")
    #videoDescNew = videoDescNew.replace("\x","\\x")
    channelDescNew = (trendingList[ii]['channelInfoDump']['snippet']['description']).replace("\n","\\n")
    channelDescNew = channelDescNew.replace("\r","\\r")
    #channelDescNew = channelDescNew.replace("\u","\\u")
    #channelDescNew = channelDescNew.replace("\x","\\x")
    tempData = [(trendingList[ii]['region']), (trendingList[ii]['trendingRank']), trendingList[ii]['timeFetched'], trendingList[ii]['id'], (trendingList[ii]['snippet']['title']), trendingList[ii]['snippet']['categoryId'], trendingList[ii]['snippet']['publishedAt'], trendingList[ii]['contentDetails']['duration'], (trendingList[ii]['snippet']['tags']), trendingList[ii]['statistics']['viewCount'], trendingList[ii]['statistics']['likeCount'], trendingList[ii]['statistics']['dislikeCount'], trendingList[ii]['statistics']['commentCount'], videoDescNew, trendingList[ii]['contentDetails']['licensedContent'], (trendingList[ii]['channelInfoDump']['snippet']['title']), trendingList[ii]['channelInfoDump']['id'], channelDescNew, trendingList[ii]['channelInfoDump']['snippet']['publishedAt'], trendingList[ii]['channelInfoDump']['statistics']['viewCount'], trendingList[ii]['channelInfoDump']['statistics']['subscriberCount'], trendingList[ii]['channelInfoDump']['statistics']['videoCount'] ]
    #print(tempData)
    trendingFinalList.append(tempData)
    
#print(trendingFinalList)

In [9]:
# Writing to a CSV file

fileName = 'csvOut' + UTCnow.strftime("%d-%b-%Y-%H-%M-%S") + '.csv'

print(f"File name of CSV file: {fileName}")


with open(fileName,'w',encoding='utf-8',  newline ='') as result_file:
    wr = csv.writer(result_file,dialect='excel')
    wr.writerows(trendingFinalList)

File name of CSV file: csvOut04-Jul-2020-15-50-29.csv
