{ "cells": [ { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "from datetime import datetime\n", "from datetime import date\n" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [], "source": [ "\n", "allFiles = os.listdir('scrapedData')\n", "csvFileNames = []\n", "for fileName in allFiles:\n", " if \"csvOut\" in fileName:\n", " csvFileNames.append(fileName)\n", " \n", "\n", "combined_csv = pd.concat( [ pd.read_csv(\"scrapedData\\\\\" + f) for f in csvFileNames ] ) \n" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
regionTrendingtrendingRanktimeFetchedvideoIdvideoTitlevideoCategoryIdvideoPublishTimevideoDurationvideoTagsvideoViewsvideoLikesvideoDislikesvideoCommentCountvideoDescriptionvieoLicencedchannelNamechannelIdchannelDescriptionchannelPublishedAtchannelViewCountchannelSubsCountchannelVideoCount
0US12020-07-01 15:09:45.453481h0U2QUGKbSEKanye West – Wash Us In The Blood feat. Travis...222020-06-30T14:00:11ZPT3M42S['kanye', 'kanye west', 'ye', 'yeezus', 'yeezy...4214043.0269209.013107.022665.0Stream/Download “Wash Us In The Blood” ft. Tra...FalseKanye WestUCs6eXM7s8Vl5WcECcRHc2qQNaN2006-01-10T22:52:29Z4098600262000009
1US22020-07-01 15:09:45.453481LiB65FQnm6wCash vs Flight 1v1 Basketball! Shave Beard or ...242020-06-30T17:53:01ZPT25M42S['cash vs flight 1v1 basketball', 'flight reac...1497970.0103096.01083.010384.0Get Your Energy Like Me Using GG! \\nI receive ...TrueCashNastyUCvyTdLw8SkVmUcHYXSDEGwAI make videos. I make you laugh. I be happy.2013-06-22T01:48:44Z75838595636300002132
2US32020-07-01 15:09:45.453481T8pi91qWnRwMoneybagg Yo – Said Sum (Official Music Video)102020-06-30T16:59:58ZPT2M59SNaN689435.051327.0916.01915.0Moneybagg Yo's new track 'Said Sum' out now: h...FalseMoneyBagg YoUCrdPrDuDCbG8xayk5QkRLQANaN2016-10-06T03:39:25Z344603686130000084
3US42020-07-01 15:09:45.453481rAxNSpO78fEI Surprised Brent Rivera With A Custom iPad Pr...242020-06-30T16:59:43ZPT13M59SNaN2863644.0212914.03235.064229.0This was Insane, I Can't Believe We Did This F...TrueZHCUClQubH2NeMmGLTLgNdLBwXgThanks for subscribing! Its my mission to make...2013-08-07T03:22:54Z91447488813700000272
4US52020-07-01 15:09:45.453481mrF-KjnDTxcGenoa 1-3 Juventus | Dybala, CR7 & Douglas Cos...172020-06-30T22:30:01ZPT4M14S['Dybala', 'CR7', 'Douglas Costa', 'Genoa', 'D...4259625.0106818.01644.04661.0Juventus re-open their four point lead at the ...TrueSerie AUCBJeMCIeLQos7wacox4hmLQWelcome to the Official Serie A channel. Over ...2012-10-30T13:54:30Z1406914395466000018934
\n", "
" ], "text/plain": [ " regionTrending trendingRank timeFetched videoId \\\n", "0 US 1 2020-07-01 15:09:45.453481 h0U2QUGKbSE \n", "1 US 2 2020-07-01 15:09:45.453481 LiB65FQnm6w \n", "2 US 3 2020-07-01 15:09:45.453481 T8pi91qWnRw \n", "3 US 4 2020-07-01 15:09:45.453481 rAxNSpO78fE \n", "4 US 5 2020-07-01 15:09:45.453481 mrF-KjnDTxc \n", "\n", " videoTitle videoCategoryId \\\n", "0 Kanye West – Wash Us In The Blood feat. Travis... 22 \n", "1 Cash vs Flight 1v1 Basketball! Shave Beard or ... 24 \n", "2 Moneybagg Yo – Said Sum (Official Music Video) 10 \n", "3 I Surprised Brent Rivera With A Custom iPad Pr... 24 \n", "4 Genoa 1-3 Juventus | Dybala, CR7 & Douglas Cos... 17 \n", "\n", " videoPublishTime videoDuration \\\n", "0 2020-06-30T14:00:11Z PT3M42S \n", "1 2020-06-30T17:53:01Z PT25M42S \n", "2 2020-06-30T16:59:58Z PT2M59S \n", "3 2020-06-30T16:59:43Z PT13M59S \n", "4 2020-06-30T22:30:01Z PT4M14S \n", "\n", " videoTags videoViews videoLikes \\\n", "0 ['kanye', 'kanye west', 'ye', 'yeezus', 'yeezy... 4214043.0 269209.0 \n", "1 ['cash vs flight 1v1 basketball', 'flight reac... 1497970.0 103096.0 \n", "2 NaN 689435.0 51327.0 \n", "3 NaN 2863644.0 212914.0 \n", "4 ['Dybala', 'CR7', 'Douglas Costa', 'Genoa', 'D... 4259625.0 106818.0 \n", "\n", " videoDislikes videoCommentCount \\\n", "0 13107.0 22665.0 \n", "1 1083.0 10384.0 \n", "2 916.0 1915.0 \n", "3 3235.0 64229.0 \n", "4 1644.0 4661.0 \n", "\n", " videoDescription vieoLicenced \\\n", "0 Stream/Download “Wash Us In The Blood” ft. Tra... False \n", "1 Get Your Energy Like Me Using GG! \\nI receive ... True \n", "2 Moneybagg Yo's new track 'Said Sum' out now: h... False \n", "3 This was Insane, I Can't Believe We Did This F... True \n", "4 Juventus re-open their four point lead at the ... True \n", "\n", " channelName channelId \\\n", "0 Kanye West UCs6eXM7s8Vl5WcECcRHc2qQ \n", "1 CashNasty UCvyTdLw8SkVmUcHYXSDEGwA \n", "2 MoneyBagg Yo UCrdPrDuDCbG8xayk5QkRLQA \n", "3 ZHC UClQubH2NeMmGLTLgNdLBwXg \n", "4 Serie A UCBJeMCIeLQos7wacox4hmLQ \n", "\n", " channelDescription channelPublishedAt \\\n", "0 NaN 2006-01-10T22:52:29Z \n", "1 I make videos. I make you laugh. I be happy. 2013-06-22T01:48:44Z \n", "2 NaN 2016-10-06T03:39:25Z \n", "3 Thanks for subscribing! Its my mission to make... 2013-08-07T03:22:54Z \n", "4 Welcome to the Official Serie A channel. Over ... 2012-10-30T13:54:30Z \n", "\n", " channelViewCount channelSubsCount channelVideoCount \n", "0 40986002 6200000 9 \n", "1 758385956 3630000 2132 \n", "2 344603686 1300000 84 \n", "3 914474888 13700000 272 \n", "4 1406914395 4660000 18934 " ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.options.display.max_columns = None\n", "combined_csv.head()" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "11400\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
regionTrendingtrendingRanktimeFetchedvideoIdvideoTitlevideoCategoryIdvideoPublishTimevideoDurationvideoTagsvideoViewsvideoLikesvideoDislikesvideoCommentCountvideoDescriptionvideoLicensedchannelNamechannelIdchannelDescriptionchannelPublishedAtchannelViewCountchannelSubsCountchannelVideoCountthumbnail_linkcomments_disabledratings_disabledvideo_error_or_removedpublishedDateCorrectFormattrendingDateCorrectFormatdayDifferencepublishedZTimepublishedZTimeFloatpublishedDayOfWeeknewOrOldData
0US12020-07-01 15:09:45.453481h0U2QUGKbSEKanye West – Wash Us In The Blood feat. Travis...222020-06-30T14:00:11ZPT3M42S['kanye', 'kanye west', 'ye', 'yeezus', 'yeezy...4214043.0269209.013107.022665.0Stream/Download “Wash Us In The Blood” ft. Tra...FalseKanye WestUCs6eXM7s8Vl5WcECcRHc2qQNaN2006-01-10T22:52:29Z4098600262000009notAvailablenotAvailablenotAvailablenotAvailable30-06-2001-07-20114:00:1114.0030563new
1US22020-07-01 15:09:45.453481LiB65FQnm6wCash vs Flight 1v1 Basketball! Shave Beard or ...242020-06-30T17:53:01ZPT25M42S['cash vs flight 1v1 basketball', 'flight reac...1497970.0103096.01083.010384.0Get Your Energy Like Me Using GG! \\nI receive ...TrueCashNastyUCvyTdLw8SkVmUcHYXSDEGwAI make videos. I make you laugh. I be happy.2013-06-22T01:48:44Z75838595636300002132notAvailablenotAvailablenotAvailablenotAvailable30-06-2001-07-20117:53:0117.8836113new
2US32020-07-01 15:09:45.453481T8pi91qWnRwMoneybagg Yo – Said Sum (Official Music Video)102020-06-30T16:59:58ZPT2M59SNaN689435.051327.0916.01915.0Moneybagg Yo's new track 'Said Sum' out now: h...FalseMoneyBagg YoUCrdPrDuDCbG8xayk5QkRLQANaN2016-10-06T03:39:25Z344603686130000084notAvailablenotAvailablenotAvailablenotAvailable30-06-2001-07-20116:59:5816.9994443new
3US42020-07-01 15:09:45.453481rAxNSpO78fEI Surprised Brent Rivera With A Custom iPad Pr...242020-06-30T16:59:43ZPT13M59SNaN2863644.0212914.03235.064229.0This was Insane, I Can't Believe We Did This F...TrueZHCUClQubH2NeMmGLTLgNdLBwXgThanks for subscribing! Its my mission to make...2013-08-07T03:22:54Z91447488813700000272notAvailablenotAvailablenotAvailablenotAvailable30-06-2001-07-20116:59:4316.9952783new
4US52020-07-01 15:09:45.453481mrF-KjnDTxcGenoa 1-3 Juventus | Dybala, CR7 & Douglas Cos...172020-06-30T22:30:01ZPT4M14S['Dybala', 'CR7', 'Douglas Costa', 'Genoa', 'D...4259625.0106818.01644.04661.0Juventus re-open their four point lead at the ...TrueSerie AUCBJeMCIeLQos7wacox4hmLQWelcome to the Official Serie A channel. Over ...2012-10-30T13:54:30Z1406914395466000018934notAvailablenotAvailablenotAvailablenotAvailable30-06-2001-07-20122:30:0122.5002783new
\n", "
" ], "text/plain": [ " regionTrending trendingRank timeFetched videoId \\\n", "0 US 1 2020-07-01 15:09:45.453481 h0U2QUGKbSE \n", "1 US 2 2020-07-01 15:09:45.453481 LiB65FQnm6w \n", "2 US 3 2020-07-01 15:09:45.453481 T8pi91qWnRw \n", "3 US 4 2020-07-01 15:09:45.453481 rAxNSpO78fE \n", "4 US 5 2020-07-01 15:09:45.453481 mrF-KjnDTxc \n", "\n", " videoTitle videoCategoryId \\\n", "0 Kanye West – Wash Us In The Blood feat. Travis... 22 \n", "1 Cash vs Flight 1v1 Basketball! Shave Beard or ... 24 \n", "2 Moneybagg Yo – Said Sum (Official Music Video) 10 \n", "3 I Surprised Brent Rivera With A Custom iPad Pr... 24 \n", "4 Genoa 1-3 Juventus | Dybala, CR7 & Douglas Cos... 17 \n", "\n", " videoPublishTime videoDuration \\\n", "0 2020-06-30T14:00:11Z PT3M42S \n", "1 2020-06-30T17:53:01Z PT25M42S \n", "2 2020-06-30T16:59:58Z PT2M59S \n", "3 2020-06-30T16:59:43Z PT13M59S \n", "4 2020-06-30T22:30:01Z PT4M14S \n", "\n", " videoTags videoViews videoLikes \\\n", "0 ['kanye', 'kanye west', 'ye', 'yeezus', 'yeezy... 4214043.0 269209.0 \n", "1 ['cash vs flight 1v1 basketball', 'flight reac... 1497970.0 103096.0 \n", "2 NaN 689435.0 51327.0 \n", "3 NaN 2863644.0 212914.0 \n", "4 ['Dybala', 'CR7', 'Douglas Costa', 'Genoa', 'D... 4259625.0 106818.0 \n", "\n", " videoDislikes videoCommentCount \\\n", "0 13107.0 22665.0 \n", "1 1083.0 10384.0 \n", "2 916.0 1915.0 \n", "3 3235.0 64229.0 \n", "4 1644.0 4661.0 \n", "\n", " videoDescription videoLicensed \\\n", "0 Stream/Download “Wash Us In The Blood” ft. Tra... False \n", "1 Get Your Energy Like Me Using GG! \\nI receive ... True \n", "2 Moneybagg Yo's new track 'Said Sum' out now: h... False \n", "3 This was Insane, I Can't Believe We Did This F... True \n", "4 Juventus re-open their four point lead at the ... True \n", "\n", " channelName channelId \\\n", "0 Kanye West UCs6eXM7s8Vl5WcECcRHc2qQ \n", "1 CashNasty UCvyTdLw8SkVmUcHYXSDEGwA \n", "2 MoneyBagg Yo UCrdPrDuDCbG8xayk5QkRLQA \n", "3 ZHC UClQubH2NeMmGLTLgNdLBwXg \n", "4 Serie A UCBJeMCIeLQos7wacox4hmLQ \n", "\n", " channelDescription channelPublishedAt \\\n", "0 NaN 2006-01-10T22:52:29Z \n", "1 I make videos. I make you laugh. I be happy. 2013-06-22T01:48:44Z \n", "2 NaN 2016-10-06T03:39:25Z \n", "3 Thanks for subscribing! Its my mission to make... 2013-08-07T03:22:54Z \n", "4 Welcome to the Official Serie A channel. Over ... 2012-10-30T13:54:30Z \n", "\n", " channelViewCount channelSubsCount channelVideoCount thumbnail_link \\\n", "0 40986002 6200000 9 notAvailable \n", "1 758385956 3630000 2132 notAvailable \n", "2 344603686 1300000 84 notAvailable \n", "3 914474888 13700000 272 notAvailable \n", "4 1406914395 4660000 18934 notAvailable \n", "\n", " comments_disabled ratings_disabled video_error_or_removed \\\n", "0 notAvailable notAvailable notAvailable \n", "1 notAvailable notAvailable notAvailable \n", "2 notAvailable notAvailable notAvailable \n", "3 notAvailable notAvailable notAvailable \n", "4 notAvailable notAvailable notAvailable \n", "\n", " publishedDateCorrectFormat trendingDateCorrectFormat dayDifference \\\n", "0 30-06-20 01-07-20 1 \n", "1 30-06-20 01-07-20 1 \n", "2 30-06-20 01-07-20 1 \n", "3 30-06-20 01-07-20 1 \n", "4 30-06-20 01-07-20 1 \n", "\n", " publishedZTime publishedZTimeFloat publishedDayOfWeek newOrOldData \n", "0 14:00:11 14.003056 3 new \n", "1 17:53:01 17.883611 3 new \n", "2 16:59:58 16.999444 3 new \n", "3 16:59:43 16.995278 3 new \n", "4 22:30:01 22.500278 3 new " ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.options.display.max_columns = None\n", "#combined_csv = pd.read_csv(\"scrapedData\\\\\" + csvFileNames[0])\n", "numRows = len(combined_csv.index)\n", "print(numRows)\n", "columnCountCurrent = (len(combined_csv.index))\n", "notAvailableColumn = [\"notAvailable\"] * numRows\n", "newStringColumn = [\"new\"] * numRows\n", "combined_csv = combined_csv.rename({'vieoLicenced': 'videoLicensed'}, axis=1)\n", "\n", "combined_csv['thumbnail_link'] = notAvailableColumn\n", "combined_csv['comments_disabled'] = notAvailableColumn\n", "combined_csv['ratings_disabled'] = notAvailableColumn\n", "combined_csv['video_error_or_removed'] = notAvailableColumn\n", "\n", "# Creating publishedDateCorrectFormat column\n", "videoPublishTimeColumn = (combined_csv['videoPublishTime'])\n", "videoPublishTimeColumnSplit1 = [i.split('T', 1)[0] for i in videoPublishTimeColumn]\n", "videoPublishTimeZ = [i.split('T', 1)[1] for i in videoPublishTimeColumn]\n", "videoPublishTimeZFinal = [s.replace('Z', '') for s in videoPublishTimeZ]\n", "#Converting the Z time into float\n", "videoPublishTimeZFloat = [float(i.split(':', 2)[0]) + float(i.split(':', 2)[1])/60 + float(i.split(':', 2)[2])/3600 for i in videoPublishTimeZFinal]\n", "\n", "videoPublishTimeStandard = [datetime.strptime(i, '%Y-%m-%d') for i in videoPublishTimeColumnSplit1] # THis is in standard datetime object format\n", "publishedDayOfWeek = [((i.weekday()+1)%7)+1 for i in videoPublishTimeStandard]\n", "\n", "publishedDateCorrectFormat = [x.strftime(\"%d-%m-%y\") for x in videoPublishTimeStandard]\n", "combined_csv['publishedDateCorrectFormat'] = publishedDateCorrectFormat\n", "\n", "\n", "\n", "\n", "# Creating trendingDateCorrectFormat\n", "timeFetchedColumn = combined_csv['timeFetched']\n", "timeFetchedColumnSplit1 = [i.split(' ', 1)[0] for i in timeFetchedColumn]\n", "timeFetchedStandard = [datetime.strptime(i, '%Y-%m-%d') for i in timeFetchedColumnSplit1]\n", "trendingDateCorrectFormat = [x.strftime(\"%d-%m-%y\") for x in timeFetchedStandard]\n", "combined_csv['trendingDateCorrectFormat'] = trendingDateCorrectFormat\n", "\n", "\n", "# Calculating day difference\n", "dayDifference = [(timeFetchedStandard[i] - videoPublishTimeStandard[i]).days for i in range(numRows)]\n", "combined_csv['dayDifference'] = dayDifference\n", "\n", "# publishedZTime column - format hh:mm:ss\n", "combined_csv['publishedZTime'] = videoPublishTimeZFinal\n", "\n", "# publishedZTimeFloat column - converted to float\n", "combined_csv['publishedZTimeFloat'] = videoPublishTimeZFloat\n", "\n", "# publishedDayOfWeek column - 1 - sunday , 7 - saturday\n", "combined_csv['publishedDayOfWeek'] = publishedDayOfWeek\n", "\n", "# newOrOldData column\n", "combined_csv['newOrOldData'] = newStringColumn\n", "\n", "\n", "combined_csv.head()" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [], "source": [ "combined_csv.to_csv( \"newDataOnly_csv_newFormat.csv\", index=False )" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [], "source": [ "oldData = pd.read_excel (r'finalOldData.xlsx')" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [], "source": [ "combinedOldNew_csv = pd.concat( [oldData, combined_csv] ) \n", "combinedOldNew_csv.to_csv( \"oldAndNewData.csv\", index=False )" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
regionTrendingtrendingRanktimeFetchedvideoIdvideoTitlevideoCategoryIdvideoPublishTimevideoDurationvideoTagsvideoViewsvideoLikesvideoDislikesvideoCommentCountvideoDescriptionvideoLicensedchannelNamechannelIdchannelDescriptionchannelPublishedAtchannelViewCountchannelSubsCountchannelVideoCountthumbnail_linkcomments_disabledratings_disabledvideo_error_or_removedpublishedDateCorrectFormattrendingDateCorrectFormatdayDifferencepublishedZTimepublishedZTimeFloatpublishedDayOfWeeknewOrOldData
0US02017-11-14 00:00:002kyS6SvSYSEWE WANT TO TALK ABOUT OUR MARRIAGE222017-11-13T17:13:01.000ZnotAvailableSHANtell martin748374.057527.02966.015954.0SHANTELL'S CHANNEL - https://www.youtube.com/s...notAvailableCaseyNeistat0notAvailablenotAvailable000https://i.ytimg.com/vi/2kyS6SvSYSE/default.jpgFalseFalseFalse2017-11-13 00:00:002017-11-14 00:00:00117:13:0117.2169442old
1US02017-11-14 00:00:001ZAPwfrtAFYThe Trump Presidency: Last Week Tonight with J...242017-11-13T07:30:00.000ZnotAvailablelast week tonight trump presidency|\"last week ...2418783.097185.06146.012703.0One year after the presidential election, John...notAvailableLastWeekTonight0notAvailablenotAvailable000https://i.ytimg.com/vi/1ZAPwfrtAFY/default.jpgFalseFalseFalse2017-11-13 00:00:002017-11-14 00:00:00107:30:007.5000002old
2US02017-11-14 00:00:005qpjK5DgCt4Racist Superman | Rudy Mancuso, King Bach & Le...232017-11-12T19:05:24.000ZnotAvailableracist superman|\"rudy\"|\"mancuso\"|\"king\"|\"bach\"...3191434.0146033.05339.08181.0WATCH MY PREVIOUS VIDEO â–¶ \\n\\nSUBSCRIBE â–º ...notAvailableRudy Mancuso0notAvailablenotAvailable000https://i.ytimg.com/vi/5qpjK5DgCt4/default.jpgFalseFalseFalse2017-11-12 00:00:002017-11-14 00:00:00219:05:2419.0900001old
3US02017-11-14 00:00:00puqaWrEC7tYNickelback Lyrics: Real or Fake?242017-11-13T11:00:04.000ZnotAvailablerhett and link|\"gmm\"|\"good mythical morning\"|\"...343168.010172.0666.02146.0Today we find out if Link is a Nickelback amat...notAvailableGood Mythical Morning0notAvailablenotAvailable000https://i.ytimg.com/vi/puqaWrEC7tY/default.jpgFalseFalseFalse2017-11-13 00:00:002017-11-14 00:00:00111:00:0411.0011112old
4US02017-11-14 00:00:00d380meD0W0MI Dare You: GOING BALD!?242017-11-12T18:01:41.000ZnotAvailableryan|\"higa\"|\"higatv\"|\"nigahiga\"|\"i dare you\"|\"...2095731.0132235.01989.017518.0I know it's been a while since we did this sho...notAvailablenigahiga0notAvailablenotAvailable000https://i.ytimg.com/vi/d380meD0W0M/default.jpgFalseFalseFalse2017-11-12 00:00:002017-11-14 00:00:00218:01:4118.0280561old
\n", "
" ], "text/plain": [ " regionTrending trendingRank timeFetched videoId \\\n", "0 US 0 2017-11-14 00:00:00 2kyS6SvSYSE \n", "1 US 0 2017-11-14 00:00:00 1ZAPwfrtAFY \n", "2 US 0 2017-11-14 00:00:00 5qpjK5DgCt4 \n", "3 US 0 2017-11-14 00:00:00 puqaWrEC7tY \n", "4 US 0 2017-11-14 00:00:00 d380meD0W0M \n", "\n", " videoTitle videoCategoryId \\\n", "0 WE WANT TO TALK ABOUT OUR MARRIAGE 22 \n", "1 The Trump Presidency: Last Week Tonight with J... 24 \n", "2 Racist Superman | Rudy Mancuso, King Bach & Le... 23 \n", "3 Nickelback Lyrics: Real or Fake? 24 \n", "4 I Dare You: GOING BALD!? 24 \n", "\n", " videoPublishTime videoDuration \\\n", "0 2017-11-13T17:13:01.000Z notAvailable \n", "1 2017-11-13T07:30:00.000Z notAvailable \n", "2 2017-11-12T19:05:24.000Z notAvailable \n", "3 2017-11-13T11:00:04.000Z notAvailable \n", "4 2017-11-12T18:01:41.000Z notAvailable \n", "\n", " videoTags videoViews videoLikes \\\n", "0 SHANtell martin 748374.0 57527.0 \n", "1 last week tonight trump presidency|\"last week ... 2418783.0 97185.0 \n", "2 racist superman|\"rudy\"|\"mancuso\"|\"king\"|\"bach\"... 3191434.0 146033.0 \n", "3 rhett and link|\"gmm\"|\"good mythical morning\"|\"... 343168.0 10172.0 \n", "4 ryan|\"higa\"|\"higatv\"|\"nigahiga\"|\"i dare you\"|\"... 2095731.0 132235.0 \n", "\n", " videoDislikes videoCommentCount \\\n", "0 2966.0 15954.0 \n", "1 6146.0 12703.0 \n", "2 5339.0 8181.0 \n", "3 666.0 2146.0 \n", "4 1989.0 17518.0 \n", "\n", " videoDescription videoLicensed \\\n", "0 SHANTELL'S CHANNEL - https://www.youtube.com/s... notAvailable \n", "1 One year after the presidential election, John... notAvailable \n", "2 WATCH MY PREVIOUS VIDEO â–¶ \\n\\nSUBSCRIBE â–º ... notAvailable \n", "3 Today we find out if Link is a Nickelback amat... notAvailable \n", "4 I know it's been a while since we did this sho... notAvailable \n", "\n", " channelName channelId channelDescription channelPublishedAt \\\n", "0 CaseyNeistat 0 notAvailable notAvailable \n", "1 LastWeekTonight 0 notAvailable notAvailable \n", "2 Rudy Mancuso 0 notAvailable notAvailable \n", "3 Good Mythical Morning 0 notAvailable notAvailable \n", "4 nigahiga 0 notAvailable notAvailable \n", "\n", " channelViewCount channelSubsCount channelVideoCount \\\n", "0 0 0 0 \n", "1 0 0 0 \n", "2 0 0 0 \n", "3 0 0 0 \n", "4 0 0 0 \n", "\n", " thumbnail_link comments_disabled \\\n", "0 https://i.ytimg.com/vi/2kyS6SvSYSE/default.jpg False \n", "1 https://i.ytimg.com/vi/1ZAPwfrtAFY/default.jpg False \n", "2 https://i.ytimg.com/vi/5qpjK5DgCt4/default.jpg False \n", "3 https://i.ytimg.com/vi/puqaWrEC7tY/default.jpg False \n", "4 https://i.ytimg.com/vi/d380meD0W0M/default.jpg False \n", "\n", " ratings_disabled video_error_or_removed publishedDateCorrectFormat \\\n", "0 False False 2017-11-13 00:00:00 \n", "1 False False 2017-11-13 00:00:00 \n", "2 False False 2017-11-12 00:00:00 \n", "3 False False 2017-11-13 00:00:00 \n", "4 False False 2017-11-12 00:00:00 \n", "\n", " trendingDateCorrectFormat dayDifference publishedZTime \\\n", "0 2017-11-14 00:00:00 1 17:13:01 \n", "1 2017-11-14 00:00:00 1 07:30:00 \n", "2 2017-11-14 00:00:00 2 19:05:24 \n", "3 2017-11-14 00:00:00 1 11:00:04 \n", "4 2017-11-14 00:00:00 2 18:01:41 \n", "\n", " publishedZTimeFloat publishedDayOfWeek newOrOldData \n", "0 17.216944 2 old \n", "1 7.500000 2 old \n", "2 19.090000 1 old \n", "3 11.001111 2 old \n", "4 18.028056 1 old " ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combinedOldNew_csv.head()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.7.7 64-bit ('mlEnv': conda)", "language": "python", "name": "python37764bitmlenvconda75c86b840a424a4e95d50ae2ee417e09" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.7" } }, "nbformat": 4, "nbformat_minor": 4 }