{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T03:10:34.352345Z", "start_time": "2019-03-25T03:10:34.093998Z" } }, "outputs": [], "source": [ "##01##\n", "import requests\n", "from bs4 import BeautifulSoup" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T03:10:36.724167Z", "start_time": "2019-03-25T03:10:36.662983Z" } }, "outputs": [], "source": [ "headers = {\n", " 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',\n", " 'Accept-Encoding': 'gzip, deflate, br',\n", " 'Accept-Language': 'zh-CN,zh;q=0.9',\n", " 'Connection': 'keep-alive',\n", " 'Cookie': '_ntes_nnid=5e3bfe2c5019b346a72c07dcca53137c,1539230035064; _ntes_nuid=5e3bfe2c5019b346a72c07dcca53137c; UM_distinctid=166614624fd4c0-0a3477445d6a3f-661f1574-1fa400-166614624fe10e; mp_MA-838B-E46B5704033F_hubble=%7B%22sessionReferrer%22%3A%20%22https%3A%2F%2Fyoudata.163.com%2Findex%2Fapply%3Fchannel%3DB_baidu_sjbb_X80%22%2C%22updatedTime%22%3A%201539230522820%2C%22sessionStartTime%22%3A%201539230274623%2C%22sendNumClass%22%3A%20%7B%22allNum%22%3A%2020%2C%22errSendNum%22%3A%200%7D%2C%22deviceUdid%22%3A%20%22718513ec-5979-4d1e-9d8f-1b8e14a795e8%22%2C%22persistedTime%22%3A%201539230274611%2C%22LASTEVENT%22%3A%20%7B%22eventId%22%3A%20%22c8bc572c94437e3178ddcbeb55c176bf1c25856b%22%2C%22time%22%3A%201539230522820%7D%2C%22currentReferrer%22%3A%20%22https%3A%2F%2Fyoudata.163.com%2Findex%2Fapply%3Fchannel%3DB_baidu_sjbb_X80%22%2C%22sessionUuid%22%3A%20%22198ad0b4-30fc-4900-89c4-4f19c24306c9%22%7D; mail_psc_fingerprint=6e77768441da082055ae54b708555c75; __oc_uuid=93ede0b0-0ddb-11e9-9fb5-a3cb88e1a5e1; Province=010; City=010; vjuids=-dc4b8aca0.1682bdf142d.0.af89e76d115d1; vjlast=1546924267.1547017727.13; NTES_YD_PASSPORT=tzwzHwo1Z0bZybWh6NVxqDpQ.CeSNtaWI8Gdjl_T8tHF7LHE7Muc9bw6e9x25faymnahDIdML1nigrphb5D_odh5P.tMvP.Ea4HZpKz8_JSdGtAZFYjpHvXaBhZyFKYlpilW3bTiZtPYnq0IpHZqW4oZ4i1xlTi4UDbbS18vJH12rdGJyaEMbph6hMApu0OJoTnfTTNUP.PIb; P_OINFO=ok8aqwcaamb7d6e061f09cb4885b4649cbfe23daee@wx.163.com|1547347764|0|open_wap|00&99|null#0|null|open_wap|ok8aqwcaamb7d6e061f09cb4885b4649cbfe23daee@wx.163.com; JSESSIONID-WYYY=u99ZnGVt9wnmG82DNqMkbg%5CKG5w%2B9czNlslw%2BSCJC46rqOt%2Fzm3%2B8fyOe13Z3vU4PJnUtriqp9HWppU1jJ3itp%2FU1rK9mDhHSidyQ%5CHlvTgEHbUxftq%5CfhZdz4xq%2BeEpBAVfack1%2B28K5Gry%2BlA%2BquI176Ju6FCmzKMzH%2FrA4roWuGVa%3A1547602931235; _iuqxldmzr_=32; __utma=94650624.835405209.1547601134.1547601134.1547601134.1; __utmc=94650624; __utmz=94650624.1547601134.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; WM_NI=W8tmso3%2F5mJ7D41zCTFuKHl7kB%2FNNJUJc5mevsv6dEdjIdcUQotBQsQnghhfBBif9ZqEk5KAGoeb5BpUe9LNUs8O6KLfeYQsOsu38IijxwTgNVQl4eD4dxX979sk6mUXNUY%3D; WM_NIKE=9ca17ae2e6ffcda170e2e6eeabc149b48d8493fc7bbab08fb3c55e938f8e84b8648593ffb8e93ea68baa89b72af0fea7c3b92a95e8bc8ef265f1f1fb8ec93d88ecff9bc26086ab85a9ef3da99ee1afcd429796add6f06fb6adbad7e74fa8b68785e25f93b1bca3d47993b2a989eb6e829ea787bc52b39ba5d6ca4b85b1ab83b84ab48babdacf4697adf8b0b54190baabbad4798dadbbd4bb43abb6b898d76d8e8a9f8db76f88ecac99db33f5e7bd8cb567978882b6dc37e2a3; WM_TID=fdH66tvKguVAFUFUVUNogWZ1TnvLVewo; __utmb=94650624.10.10.1547601134',\n", " 'Host': 'music.163.com',\n", " 'Referer': 'https://music.163.com/',\n", " 'Upgrade-Insecure-Requests': '1',\n", " 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36'\n", " \n", "}\n", "\n", "def save_artist(group_id, initial, hot_artist_dic, artisti_dic):\n", " \n", " params = {'id': group_id, 'initial': initial}\n", " \n", " r = requests.get('http://music.163.com/discover/artist/cat',headers=headers, params=params)\n", "\n", " # 网页解析\n", " soup = BeautifulSoup(r.content.decode(), 'html.parser')\n", " body = soup.body\n", "\n", " hot_artists = body.find_all('a', attrs={'class': 'msk'})\n", " artists = body.find_all('a', attrs={'class': 'nm nm-icn f-thide s-fc0'})\n", " for artist in hot_artists:\n", " artist_id = artist['href'].replace('/artist?id=', '').strip()\n", " artist_name = artist['title'].replace('的音乐', '')\n", " try:\n", " hot_artist_dic[artist_id] = artist_name\n", " except Exception as e:\n", " # 打印错误日志\n", " print(e)\n", "\n", " for artist in artists:\n", " artist_id = artist['href'].replace('/artist?id=', '').strip()\n", " artist_name = artist['title'].replace('的音乐', '')\n", " try:\n", " artist_dic[artist_id] = artist_name\n", " except Exception as e:\n", " # 打印错误日志\n", " print(e)\n", " #return artist_dic, hot_artist_dic" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T03:10:51.135724Z", "start_time": "2019-03-25T03:10:51.132908Z" } }, "outputs": [], "source": [ "gg=1001\n", "initial = 0" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T03:10:53.125979Z", "start_time": "2019-03-25T03:10:52.738578Z" } }, "outputs": [], "source": [ "artist_dic = {}\n", "hot_artist_dic = {} \n", "save_artist(gg, initial, hot_artist_dic, artist_dic )" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T03:11:13.168111Z", "start_time": "2019-03-25T03:11:13.154867Z" } }, "outputs": [ { "data": { "text/plain": [ "{'1140028': '承利',\n", " '1142011': '王极',\n", " '1142042': '徐梦圆',\n", " '1151001': '未来星B3Rich',\n", " '1158100': '留声玩具',\n", " '1159045': '齐一',\n", " '1185054': '尚先生',\n", " '1194094': '孤矢',\n", " '1197168': '徐秉龙',\n", " '1198010': '暗杠',\n", " '1198071': 'HOPE-T',\n", " '12002184': 'AR(刘夫阳)',\n", " '12002248': '沈以诚',\n", " '12027624': '栗先达',\n", " '1203045': '艾热',\n", " '1204010': 'Tizzy T',\n", " '12050022': '裂天',\n", " '12050049': 'AirJordy',\n", " '12052056': 'Fi9江澈',\n", " '12059094': 'JAEFOREAL',\n", " '12079066': 'Lil.Jet',\n", " '12079186': 'Al\\xa0Rocco',\n", " '12079219': '李佳隆',\n", " '12083175': '排骨教主',\n", " '12083570': 'YKEY',\n", " '12084229': '焦迈奇',\n", " '12085016': '接个吻,开一枪',\n", " '12085017': 'BLOWFEVER',\n", " '12085569': '李蚊香',\n", " '1209003': 'LuckyMaxx',\n", " '12094099': '徐真真',\n", " '12094419': '羽肿',\n", " '12094558': 'Buzzy',\n", " '12114094': '马英伦',\n", " '12118230': '钱正昊',\n", " '12119569': '李迦南Doc',\n", " '12126098': 'Vk',\n", " '12127154': '吴海啸',\n", " '12127564': '艾福杰尼',\n", " '12138252': '张雪飞',\n", " '12138269': '毛不易',\n", " '12146142': '大壮',\n", " '12158033': '张佳伦',\n", " '12174057': '艾辰',\n", " '12174110': '崔开潮',\n", " '12185042': '特曼',\n", " '12194650': 'KT',\n", " '12199576': '小青龙',\n", " '12205140': '青柠',\n", " '12205361': '刘莱斯',\n", " '12236125': '王以太',\n", " '12259217': 'allenvee',\n", " '12268138': 'Todd\\xa0Li',\n", " '12270575': '孟凡明',\n", " '12271965': 'Sand',\n", " '12276430': 'Cubi',\n", " '12282733': '黑崎子',\n", " '12286700': '虎二',\n", " '12287118': 'Ayo97',\n", " '12287780': '前男友',\n", " '12319383': '胖胖胖',\n", " '12350853': 'Red\\xa0Monkey',\n", " '12360230': 'Lil-7',\n", " '12367021': 'AlimjanHasan',\n", " '12420149': '李袁杰',\n", " '12429072': '隔壁老樊',\n", " '12453329': '功夫胖KungFuPen',\n", " '12464053': 'PRC\\xa0巴音汗',\n", " '12476238': '英镑',\n", " '12489984': '掌嘴',\n", " '12520754': 'DJ-荣大大',\n", " '12570017': 'MOMINJAN_ABLIKIM穆明江',\n", " '12606272': '吕大叶',\n", " '12641765': '江辰',\n", " '12760978': '广东雨神',\n", " '12782871': '$唐老师',\n", " '12798217': '刘思鉴',\n", " '12798308': '马子林Broma',\n", " '12852558': 'Y\\xa0U\\xa0Jay',\n", " '12905881': 'DJ\\xa0Mr.Zi',\n", " '12932368': '蔡徐坤',\n", " '12969396': 'MOONBOI(杨望舒)',\n", " '13112541': 'Gibb-Z',\n", " '13145283': 'ICE',\n", " '13152025': '杨多',\n", " '13286745': 'еяхат\\xa0музыка',\n", " '13296197': '二狗村高富帅',\n", " '13445026': '小君',\n", " '13527275': 'Wya乌鸦',\n", " '13609637': '马良',\n", " '13906123': '张紫豪',\n", " '14857181': '情词尧',\n", " '15064461': '方宇杰',\n", " '27862910': '者思范Remix',\n", " '28304970': '万能青年',\n", " '28868804': '寅子',\n", " '29069586': '王小帅',\n", " '30647258': 'DJ杨硕',\n", " '790044': '小峰峰',\n", " '977173': '3Bangz'}" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "artist_dic" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "ExecuteTime": { "end_time": "2019-03-05T04:40:59.471272Z", "start_time": "2019-03-05T04:40:42.416934Z" }, "collapsed": true }, "outputs": [], "source": [ "for i in range(65, 91):\n", " save_artist(gg, i, hot_artist_dic, artist_dic )" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 20, "metadata": { "ExecuteTime": { "end_time": "2019-03-05T04:40:59.668283Z", "start_time": "2019-03-05T04:40:59.471272Z" }, "collapsed": true }, "outputs": [], "source": [ "## 别运行它 ##\n", "## 会重新覆盖掉 ##\n", "\n", "import json\n", "with open('save_artist_dict.json','w') as f:\n", " json.dump(artist_dic,f)\n", "\n", "with open('save_hot_artist_dict.json','w') as f:\n", " json.dump(hot_artist_dic,f)\n", " " ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T03:36:03.297690Z", "start_time": "2019-03-25T03:36:03.294787Z" } }, "outputs": [], "source": [ "# ##01##\n", "\n", "# import json\n", "# dd= json.load(open('save_artist_dict.json')) \n", "# # dd" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2019-03-15T13:11:15.931636Z", "start_time": "2019-03-15T13:11:15.922635Z" }, "collapsed": true }, "outputs": [], "source": [ "##01##\n", "\n", "artist_dic ={}\n", "artist_dic = dd \n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T03:16:16.019225Z", "start_time": "2019-03-25T03:16:15.992373Z" } }, "outputs": [], "source": [ "headers = {\n", " 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',\n", " 'Accept-Encoding': 'gzip, deflate, sdch',\n", " 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6',\n", " 'Cache-Control': 'no-cache',\n", " 'Connection': 'keep-alive',\n", " 'Cookie': '_ntes_nnid=7eced19b27ffae35dad3f8f2bf5885cd,1476521011210; _ntes_nuid=7eced19b27ffae35dad3f8f2bf5885cd; usertrack=c+5+hlgB7TgnsAmACnXtAg==; Province=025; City=025; _ga=GA1.2.1405085820.1476521280; NTES_PASSPORT=6n9ihXhbWKPi8yAqG.i2kETSCRa.ug06Txh8EMrrRsliVQXFV_orx5HffqhQjuGHkNQrLOIRLLotGohL9s10wcYSPiQfI2wiPacKlJ3nYAXgM; P_INFO=hourui93@163.com|1476523293|1|study|11&12|jis&1476511733&mail163#jis&320100#10#0#0|151889&0|g37_client_check&mailsettings&mail163&study&blog|hourui93@163.com; JSESSIONID-WYYY=189f31767098c3bd9d03d9b968c065daf43cbd4c1596732e4dcb471beafe2bf0605b85e969f92600064a977e0b64a24f0af7894ca898b696bd58ad5f39c8fce821ec2f81f826ea967215de4d10469e9bd672e75d25f116a9d309d360582a79620b250625859bc039161c78ab125a1e9bf5d291f6d4e4da30574ccd6bbab70b710e3f358f%3A1476594130342; _iuqxldmzr_=25; __utma=94650624.1038096298.1476521011.1476588849.1476592408.6; __utmb=94650624.11.10.1476592408; __utmc=94650624; __utmz=94650624.1476521011.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)',\n", " 'DNT': '1',\n", " 'Host': 'music.163.com',\n", " 'Pragma': 'no-cache',\n", " 'Referer': 'http://music.163.com/',\n", " 'Upgrade-Insecure-Requests': '1',\n", " 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'\n", "}\n", "\n", "\n", "\n", "def save_albums(artist_id, albume_dic):\n", " params = {'id': artist_id, 'limit': '200'}\n", " # 获取歌手个人主页\n", " r = requests.get('http://music.163.com/artist/album', headers=headers, params=params)\n", "\n", " # 网页解析\n", " soup = BeautifulSoup(r.content.decode(), 'html.parser')\n", " body = soup.body\n", "\n", " albums = body.find_all('a', attrs={'class': 'tit s-fc0'}) # 获取所有专辑\n", "\n", " for album in albums:\n", " albume_id = album['href'].replace('/album?id=', '')\n", " albume_dic[albume_id] = artist_id" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T03:17:09.572085Z", "start_time": "2019-03-25T03:17:08.893986Z" } }, "outputs": [], "source": [ "albume_dic = {}\n", "#for i in artist_dic.keys()\n", "save_albums(2747, albume_dic)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T03:17:23.948477Z", "start_time": "2019-03-25T03:17:23.942741Z" } }, "outputs": [ { "data": { "text/plain": [ "{'2478699': 2747,\n", " '3233534': 2747,\n", " '3233776': 2747,\n", " '3261022': 2747,\n", " '3266290': 2747,\n", " '3266292': 2747,\n", " '3266298': 2747,\n", " '3266912': 2747,\n", " '3266915': 2747,\n", " '3266920': 2747,\n", " '3287239': 2747,\n", " '34428052': 2747,\n", " '34430036': 2747,\n", " '34798078': 2747,\n", " '35104158': 2747,\n", " '35623240': 2747,\n", " '35889034': 2747,\n", " '38512406': 2747,\n", " '8160': 2747,\n", " '8163': 2747,\n", " '8166': 2747,\n", " '8169': 2747,\n", " '8177': 2747,\n", " '8182': 2747,\n", " '8184': 2747,\n", " '8198': 2747,\n", " '8205': 2747,\n", " '8209': 2747,\n", " '8218': 2747,\n", " '8221': 2747,\n", " '8227': 2747,\n", " '8238': 2747,\n", " '8242': 2747,\n", " '8244': 2747,\n", " '8247': 2747,\n", " '8256': 2747,\n", " '8265': 2747,\n", " '8275': 2747,\n", " '8282': 2747,\n", " '8290': 2747,\n", " '8299': 2747,\n", " '8306': 2747,\n", " '8309': 2747,\n", " '8315': 2747,\n", " '8318': 2747,\n", " '8320': 2747,\n", " '8321': 2747,\n", " '8324': 2747,\n", " '8328': 2747,\n", " '8331': 2747,\n", " '8334': 2747,\n", " '8336': 2747,\n", " '8339': 2747,\n", " '8341': 2747,\n", " '8344': 2747,\n", " '8347': 2747,\n", " '8349': 2747,\n", " '8350': 2747,\n", " '8353': 2747,\n", " '8355': 2747,\n", " '8356': 2747,\n", " '8362': 2747,\n", " '8365': 2747,\n", " '8367': 2747,\n", " '8369': 2747,\n", " '8372': 2747,\n", " '8373': 2747,\n", " '8375': 2747,\n", " '8378': 2747,\n", " '8381': 2747,\n", " '8387': 2747,\n", " '8390': 2747,\n", " '8394': 2747}" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "albume_dic" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "ExecuteTime": { "end_time": "2019-03-05T05:17:12.491125Z", "start_time": "2019-03-05T05:17:12.002105Z" }, "collapsed": true }, "outputs": [], "source": [ "##不要运行它 会覆盖掉 ##\n", "\n", "\n", "import json\n", "with open('save_albume_dict.json','w') as f:\n", " json.dump(albume_dic,f)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 20, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T03:36:08.704019Z", "start_time": "2019-03-25T03:36:08.701047Z" }, "scrolled": true }, "outputs": [], "source": [ "# ##01##\n", "\n", "# import json\n", "# kk = json.load(open('save_albume_dict.json')) \n", "# kk" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2019-03-19T12:25:26.035948Z", "start_time": "2019-03-19T12:25:25.960944Z" } }, "outputs": [ { "data": { "text/plain": [ "'12085017'" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "kk['36074293']" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2019-03-18T11:36:40.871554Z", "start_time": "2019-03-18T11:36:40.855553Z" } }, "outputs": [ { "data": { "text/plain": [ "29977" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(albume_dic)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2019-03-22T16:21:18.773456Z", "start_time": "2019-03-22T16:21:18.753456Z" } }, "outputs": [], "source": [ "albume_dic = {}\n", "albume_dic = kk" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T03:19:10.798490Z", "start_time": "2019-03-25T03:19:10.765274Z" } }, "outputs": [], "source": [ "headers = {\n", " 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',\n", " 'Accept-Encoding': 'gzip, deflate, sdch',\n", " 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6',\n", " 'Cache-Control': 'no-cache',\n", " 'Connection': 'keep-alive',\n", " 'Cookie': '_ntes_nnid=7eced19b27ffae35dad3f8f2bf5885cd,1476521011210; _ntes_nuid=7eced19b27ffae35dad3f8f2bf5885cd; usertrack=c+5+hlgB7TgnsAmACnXtAg==; Province=025; City=025; NTES_PASSPORT=6n9ihXhbWKPi8yAqG.i2kETSCRa.ug06Txh8EMrrRsliVQXFV_orx5HffqhQjuGHkNQrLOIRLLotGohL9s10wcYSPiQfI2wiPacKlJ3nYAXgM; P_INFO=hourui93@163.com|1476523293|1|study|11&12|jis&1476511733&mail163#jis&320100#10#0#0|151889&0|g37_client_check&mailsettings&mail163&study&blog|hourui93@163.com; _ga=GA1.2.1405085820.1476521280; JSESSIONID-WYYY=fb5288e1c5f667324f1636d020704cab2f27ee915622b114f89027cbf60c38be2af6b9cbef2223c1f2581e3502f11b86efd60891d6f61b6f783c0d55114f8269fa801df7352f5cc4c8259876e563a6bd0212b504a8997723a0593b21d5b3d9076d4fa38c098be68e3c5d36d342e4a8e40c1f73378cec0b5851bd8a628886edbdd23a7093%3A1476623819662; _iuqxldmzr_=25; __utma=94650624.1038096298.1476521011.1476610320.1476622020.10; __utmb=94650624.14.10.1476622020; __utmc=94650624; __utmz=94650624.1476521011.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)',\n", " 'DNT': '1',\n", " 'Host': 'music.163.com',\n", " 'Pragma': 'no-cache',\n", " 'Referer': 'http://music.163.com/',\n", " 'Upgrade-Insecure-Requests': '1',\n", " 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'\n", " }\n", "\n", "def save_music(album_id, music_dic):\n", " params = {'id': album_id}\n", " # 获取专辑对应的页面\n", " r = requests.get('http://music.163.com/album', headers=headers, params=params)\n", "\n", " # 网页解析\n", " soup = BeautifulSoup(r.content.decode(), 'html.parser')\n", " body = soup.body\n", "\n", " musics = body.find('ul', attrs={'class': 'f-hide'}).find_all('li') # 获取专辑的所有音乐\n", "\n", " for music in musics:\n", " music = music.find('a')\n", " music_id = music['href'].replace('/song?id=', '')\n", " music_name = music.getText()\n", " music_dic[music_id] = [music_name, album_id]\n", " \n", " " ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T03:19:14.703190Z", "start_time": "2019-03-25T03:19:14.698628Z" } }, "outputs": [], "source": [ "# flush print\n", "import sys\n", "def flushprint(d):\n", " sys.stdout.write('\\r')\n", " sys.stdout.write(str(d))\n", " sys.stdout.flush()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T03:21:13.095717Z", "start_time": "2019-03-25T03:21:12.718016Z" } }, "outputs": [ { "data": { "text/plain": [ "{'463268802': ['传奇三部曲', 8160],\n", " '82795': ['唱一遍一遍', 8160],\n", " '82799': ['传奇', 8160],\n", " '82803': ['偏爱水中月', 8160],\n", " '82807': ['倾国倾城', 8160],\n", " '82811': ['忘不了', 8160],\n", " '82814': ['苏三采茶', 8160],\n", " '82817': ['三部情曲', 8160],\n", " '82820': ['舍不得把眼睛睁开', 8160],\n", " '82824': ['玫瑰寄情', 8160],\n", " '82828': ['幸福在这里', 8160]}" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "music_dic={}\n", "save_music(8160,music_dic)\n", "\n", "music_dic" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2019-03-24T18:15:07.357825Z", "start_time": "2019-03-24T18:15:07.347825Z" } }, "outputs": [], "source": [ "import random\n", "import time" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 15, "metadata": { "ExecuteTime": { "end_time": "2019-03-15T13:59:18.629461Z", "start_time": "2019-03-15T13:52:36.515531Z" }, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "开始爬取代理ip\n", "HTTPS:218.22.7.62:53281\n", "HTTPS:218.22.7.62:53281\n", "HTTP:113.116.144.227:9000\n", "HTTP:113.116.145.206:9000\n", "HTTP:27.37.47.244:9000\n", "HTTPS:27.14.146.188:8118\n", "HTTP:116.209.57.11:9999\n", "HTTPS:119.176.66.90:9999\n", "HTTPS:116.209.53.204:9999\n", "HTTP:111.75.193.24:48449\n", "HTTPS:61.189.242.243:55484\n", "HTTPS:210.5.10.87:53281\n", "HTTP:121.8.107.13:8080\n", "HTTPS:27.191.234.69:9999\n", "HTTPS:121.69.13.242:53281\n", "HTTPS:58.240.220.86:53281\n", "HTTPS:203.86.3.22:8080\n", "HTTPS:61.128.208.94:3128\n", "HTTPS:218.241.219.226:9999\n", "HTTP:222.74.61.98:53281\n", "HTTPS:121.69.13.242:53281\n", "HTTP:113.116.146.188:9000\n", "HTTP:211.162.70.229:3128\n", "HTTPS:113.65.5.181:8118\n", "HTTPS:27.191.234.69:9999\n", "HTTPS:14.118.130.213:8081\n", "HTTP:60.190.153.150:8080\n", "HTTP:113.116.146.8:9000\n", "HTTP:101.251.216.103:8080\n", "HTTPS:113.65.5.181:8118\n", "HTTPS:110.52.235.214:9999\n", "HTTPS:121.69.46.178:9000\n", "HTTPS:115.231.5.230:44524\n", "HTTP:116.209.59.81:9999\n", "HTTPS:59.45.16.10:59156\n", "HTTPS:122.141.74.186:8080\n", "HTTP:182.111.129.37:53281\n", "HTTPS:115.231.5.230:44524\n", "HTTPS:59.45.16.10:59156\n", "HTTPS:60.184.194.157:3128\n", "HTTP:115.171.203.189:9000\n", "HTTP:125.46.0.62:53281\n", "HTTPS:203.86.3.22:8080\n", "HTTPS:218.241.219.226:9999\n", "HTTP:221.227.39.160:8118\n", "HTTPS:124.152.32.140:53281\n", "HTTP:113.116.146.188:9000\n", "HTTP:211.162.70.229:3128\n", "HTTP:221.210.120.153:54402\n", "HTTPS:110.250.65.108:8118\n", "HTTPS:115.171.203.73:9000\n", "HTTPS:221.205.111.210:9797\n", "HTTP:182.111.129.37:53281\n", "HTTP:112.95.190.10:9797\n", "HTTP:125.46.0.62:53281\n", "HTTPS:121.69.46.178:9000\n", "HTTP:59.44.247.194:9797\n", "HTTPS:180.141.90.172:53281\n", "HTTP:113.200.214.164:9999\n", "HTTPS:58.240.220.86:53281\n", "HTTP:61.176.223.7:58822\n", "HTTP:221.210.120.153:54402\n", "HTTP:218.75.10.58:8080\n", "HTTP:222.74.61.98:53281\n", "HTTPS:116.209.53.204:9999\n", "HTTP:218.28.58.150:53281\n", "HTTP:111.177.183.32:9999\n", "HTTPS:116.209.55.150:9999\n", "HTTPS:183.30.204.154:9999\n", "HTTPS:124.152.32.140:53281\n", "HTTPS:113.54.152.170:8080\n", "HTTPS:113.140.1.82:53281\n", "HTTP:14.118.130.214:8081\n", "HTTPS:111.198.154.116:8888\n", "HTTP:221.205.111.7:9797\n", "HTTPS:117.90.0.10:8118\n", "HTTP:123.133.41.104:53281\n", "HTTP:221.7.255.168:8080\n", "HTTPS:110.250.65.108:8118\n", "HTTP:113.200.214.164:9999\n", "HTTPS:140.210.4.143:53281\n", "HTTPS:61.128.208.94:3128\n", "HTTP:101.231.50.154:8000\n", ":118.24.148.74:8080\n", "HTTP:218.28.58.150:53281\n", "HTTPS:121.40.78.138:3128\n", "HTTPS:36.110.14.186:3128\n", "HTTP:110.52.235.155:9999\n", "HTTP:116.196.90.181:3128\n", "HTTPS:27.17.45.90:43411\n", "HTTPS:110.52.235.41:9999\n", "HTTPS:114.119.116.92:61066\n", "HTTP:221.227.39.160:8118\n", "HTTPS:117.114.149.66:53281\n", "HTTPS:112.74.207.50:3128\n", "HTTPS:101.132.122.230:3128\n", "HTTPS:116.62.149.245:8118\n", "HTTP:116.196.90.181:3128\n", "HTTP:116.209.55.243:9999\n", "HTTP:116.209.56.244:9999\n", "HTTPS:123.117.36.233:9000\n", "爬取完成\n", "一共爬取代理ip: 101 个 \n", "\n" ] } ], "source": [ "#!/usr/bin/env python3\n", "# -*- coding: utf-8 -*-\n", "import requests, threading, datetime\n", "from lxml import etree\n", "import random\n", "\n", "\"\"\"\n", "1、抓取西刺代理网站的代理ip\n", "2、并根据指定的目标url,对抓取到ip的有效性进行验证\n", "3、最后存到指定的path\n", "\"\"\"\n", "\n", "# ------------------------------------------------------文档处理--------------------------------------------------------\n", "# 写入文档\n", "def write(path,text):\n", " with open(path, 'a', encoding='utf-8') as f:\n", " f.writelines(text)\n", " f.write('\\n')\n", "# 清空文档\n", "def truncatefile(path):\n", " with open(path, 'w', encoding='utf-8') as f:\n", " f.truncate()\n", "# 读取文档\n", "def read(path):\n", " with open(path, 'r', encoding='utf-8') as f:\n", " txt = []\n", " for s in f.readlines():\n", " txt.append(s.strip())\n", " return txt\n", "# ----------------------------------------------------------------------------------------------------------------------\n", "# 计算时间差,格式: 时分秒\n", "def gettimediff(start,end):\n", " seconds = (end - start).seconds\n", " m, s = divmod(seconds, 60)\n", " h, m = divmod(m, 60)\n", " diff = (\"%02d:%02d:%02d\" % (h, m, s))\n", " return diff\n", "# ----------------------------------------------------------------------------------------------------------------------\n", "'''返回一个随机的请求头 headers'''\n", "def getheaders():\n", " user_agent_list = [\n", " \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1\",\n", " \"Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11\",\n", " \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6\",\n", " \"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6\",\n", " \"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1\",\n", " \"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5\",\n", " \"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5\",\n", " \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3\",\n", " \"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3\",\n", " \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3\",\n", " \"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3\",\n", " \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3\",\n", " \"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3\",\n", " \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3\",\n", " \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3\",\n", " \"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3\",\n", " \"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24\",\n", " \"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24\"\n", " ]\n", " UserAgent = random.choice(user_agent_list)\n", " headers = {'User-Agent': UserAgent}\n", " return headers\n", "\n", "'''检查ip是否可用'''\n", "def checkip(targeturl,ip):\n", " headers = getheaders() # 定制请求头\n", " proxies = {\"http\": \"http://\"+ip, \"https\": \"http://\"+ip} # 代理ip\n", " try:\n", " response = requests.get(url=targeturl, proxies=proxies, headers=headers, timeout=5).status_code\n", " if response == 200:\n", " return True\n", " else:\n", " return False\n", " except:\n", " return False\n", "\n", "'''获取代理方法'''\n", "# 免费代理 XiciDaili\n", "def findip(type,pagenum,targeturl,path): # ip类型,页码,目标url,存放ip的路径\n", " list = {\n", " '1': 'http://www.xicidaili.com/nt/', # xicidaili国内普通代理\n", " '2': 'http://www.xicidaili.com/nn/', # xicidaili国内高匿代理\n", " '3': 'http://www.xicidaili.com/wn/', # xicidaili国内https代理\n", " '4': 'http://www.xicidaili.com/wt/' # xicidaili国外http代理\n", " }\n", " url = list[str(type)]+str(pagenum) # 配置url\n", " headers = getheaders() # 定制请求头\n", " html = requests.get(url=url, headers=headers, timeout=5).text\n", " doc = etree.HTML(html)\n", " all = doc.xpath('.//table[@id=\"ip_list\"]/tr')\n", " for i in all:\n", " t = i.xpath('td/text()')\n", " if len(t) > 0:\n", " ip = t[0] + ':' + t[1]\n", " if t[5]:\n", " is_avail = checkip(targeturl, ip)\n", " if is_avail == True:\n", " ip = t[5].strip() + ':' + ip\n", " write(path=path, text=ip)\n", " print(ip)\n", "\n", "'''多线程抓取ip入口'''\n", "def getip(targeturl,path):\n", " truncatefile(path) # 爬取前清空文档\n", " start = datetime.datetime.now() # 开始时间\n", " threads = []\n", " for type in range(4): # 四种类型ip,每种类型取前三页,共12条线程\n", " for pagenum in range(3):\n", " t = threading.Thread(target=findip, args=(type+1, pagenum+1, targeturl, path))\n", " threads.append(t)\n", " print('开始爬取代理ip')\n", " for s in threads: # 开启多线程爬取\n", " s.start()\n", " for e in threads: # 等待所有线程结束\n", " e.join()\n", " print('爬取完成')\n", " # end = datetime.datetime.now() # 结束时间\n", " # diff = gettimediff(start, end) # 计算耗时\n", " ips = read(path) # 读取爬到的ip数量\n", " print('一共爬取代理ip: %s 个 \\n' % (len(ips)))\n", " # print('一共爬取代理ip: %s 个,共耗时: %s \\n' % (len(ips), diff))\n", "\n", "'''启动'''\n", "if __name__ == '__main__':\n", " path = 'ip.txt' # 存放爬取ip的文档path\n", " targeturl = 'http://www.cnblogs.com/TurboWay/' # 验证ip有效性的指定url\n", " getip(targeturl, path)\n" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T07:05:19.516749Z", "start_time": "2019-03-25T07:05:19.511168Z" } }, "outputs": [], "source": [ "# music_dic = {}\n", "# for k, i in enumerate(kk.keys()):\n", "# #time.sleep(+random.random())\n", "# flushprint(k)\n", "# if k % 100 == 0:\n", "# print('this is the music of :' + str(k))\n", " \n", "# try:\n", "# save_music(i, music_dic)\n", "# except Exception as e:\n", "# print(i)\n", "# print(e) \n", " \n", " \n", "# if k % 2000 == 0:\n", "# with open('save_music_dict{}.json'.format(k//2000),'w') as f:\n", "# json.dump(music_dic,f) \n", "# print('保存到了:',k)\n", "# misic_dic.clear() \n", "# music_dic = {}" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2019-03-18T11:26:17.241269Z", "start_time": "2019-03-18T11:26:17.221269Z" }, "collapsed": true }, "outputs": [], "source": [ "music_dic = {}\n", "\n", "k = 10001\n", "i = 10001\n", "\n" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T07:05:40.195091Z", "start_time": "2019-03-25T07:05:40.189072Z" } }, "outputs": [], "source": [ "# music_dic = {}\n", "# for k, i in enumerate(kk.keys()): \n", "# #time.sleep(1.0+random.random())\n", "# flushprint(k)\n", " \n", "# if k % 1000 == 0:\n", "# print('this is the music of :' + str(k))\n", " \n", "# try:\n", "# save_music(i, music_dic)\n", " \n", "# except Exception as e:\n", "# print(i)\n", "# print(e) \n", " \n", " \n", "# if k % 5000 == 0:\n", "# with open('save_music_dict01{}.json'.format(k//5000),'w') as f:\n", "# json.dump(music_dic,f) \n", "# print('保存到了:',k)\n", "# #misic_dic.clear() \n", " " ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T03:37:01.317808Z", "start_time": "2019-03-25T03:37:01.310875Z" } }, "outputs": [], "source": [ "# music_dic = {}\n", "# for k, i in enumerate(kk.keys()): \n", "# #time.sleep(1.0+random.random())\n", "# flushprint(k)\n", " \n", "# if k % 1000 == 0 and k != 0:\n", "# print('this is the music of :' + str(k))\n", "# time.sleep(60.0)\n", "# try:\n", "# save_music(i, music_dic)\n", " \n", "# except Exception as e:\n", "# print('#'+i)\n", "# print(e) \n", " \n", " \n", "# if k % 5000 == 0 and k != 0:\n", "# with open('save_music_dict03{}.json'.format(k//5000),'w') as f:\n", "# json.dump(music_dic,f) \n", "# print('保存到了:',k)\n", " \n", " \n", " \n", "# #misic_dic.clear()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2019-03-22T20:25:12.053245Z", "start_time": "2019-03-22T20:25:11.126192Z" } }, "outputs": [], "source": [ "\n", "import json\n", "with open('save_music_dict01.json','w') as f:\n", " json.dump(music_dic,f)\n", " " ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T03:37:06.829217Z", "start_time": "2019-03-25T03:37:06.826524Z" } }, "outputs": [], "source": [ "# import json\n", "# jj = json.load(open('save_music_dict01.json')) \n", "# jj\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2019-03-24T18:15:34.657066Z", "start_time": "2019-03-24T18:15:34.638066Z" } }, "outputs": [ { "data": { "text/plain": [ "111898" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(jj)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2019-03-21T19:38:21.983175Z", "start_time": "2019-03-21T19:38:21.025121Z" } }, "outputs": [], "source": [ "import json\n", "with open('save_music_dict.json','w') as f:\n", " json.dump(music_dic,f)\n", " " ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T03:37:09.745178Z", "start_time": "2019-03-25T03:37:09.742521Z" } }, "outputs": [], "source": [ "# import json\n", "# jj = json.load(open('save_music_dict.json')) \n", "# jj\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2019-03-21T19:38:35.701960Z", "start_time": "2019-03-21T19:38:35.668958Z" } }, "outputs": [ { "data": { "text/plain": [ "110918" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(jj)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2019-03-08T07:52:15.759688Z", "start_time": "2019-03-08T07:52:15.745688Z" }, "collapsed": true }, "outputs": [], "source": [ "music_dic = {}\n", "music_dic = jj" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 27, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T07:05:57.554208Z", "start_time": "2019-03-25T07:05:57.528775Z" } }, "outputs": [], "source": [ "# import requests\n", "# import re\n", "# import csv\n", "# import json\n", " \n", " \n", "# class SingerSpider(object):\n", "# def __init__(self):\n", "# self.headers = {\n", "# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',\n", "# 'Accept-Encoding': 'gzip, deflate',\n", "# 'Accept-Language': 'zh-CN,zh;q=0.9',\n", "# 'Connection': 'keep-alive',\n", "# 'Host': 'music.163.com',\n", "# 'Referer': 'http://music.163.com/',\n", "# 'Upgrade-Insecure-Requests': '1',\n", "# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '\n", "# 'Chrome/66.0.3359.181 Safari/537.36'\n", "# }\n", " \n", "# def get_index(self, url):\n", "# '请求模块'\n", "# try:\n", "# resp = requests.get(url,headers=self.headers)\n", "# if resp.status_code == 200:\n", "# self.parse_re(resp.text)\n", "# else:\n", "# print('error')\n", "# except ConnectionError:\n", "# self.get_index(url)\n", " \n", "# def parse_re(self, resp):\n", "# '解析模块'\n", "# print('start parse {}'.format(url))\n", "# tags = re.findall(r'(.*?)', resp, re.S)\n", "# title = re.findall(r'(.*?)-.*?', resp, re.S)\n", "# for tag in tags:\n", "# # print(tag[0],tag[1])\n", "# self.save_json(tag, title)\n", "# #self.save_csv(tag, title)\n", " \n", "# def save_csv(self, tag, title):\n", "# '存储模块'\n", "# print('start save {}'.format(url))\n", "# with open('all_singer.csv', 'a+', newline='', encoding='utf-8') as f:\n", "# writer = csv.writer(f)\n", "# writer.writerow((tag[0], tag[1], title[0]))\n", "# print('finish spider {}'.format(url))\n", " \n", "# def save_json(self, tag, title):\n", "# print('start save {}'.format(url))\n", "# s = json.dumps({'id': tag[0], 'name': tag[1], 'title': title[0]},ensure_ascii=False)\n", "# with open('all_singer.json', 'a+', newline='', encoding='utf-8') as f:\n", "# f.write(s)\n", "# print('finish spider {}'.format(url))\n", "# print(s)\n", " \n", " \n", "# if __name__ == '__main__':\n", "# # 歌手分类id\n", "# list1 = [1001, 1002, 1003, 2001, 2002, 2003, 6001, 6002, 6003, 7001, 7002, 7003, 4001, 4002, 4003]\n", "# # initial的值\n", "# list2 = [0,65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90]\n", "# for i in list1:\n", "# for j in list2:\n", "# url = 'http://music.163.com/discover/artist/cat?id=' + str(i) + '&initial=' + str(j)\n", "# print('start spider {}'.format(url))\n", "# SingerSpider().get_index(url)\n", " \n", "# #原文:https://blog.csdn.net/wanhaiwei/article/details/84327561 " ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T03:37:48.475669Z", "start_time": "2019-03-25T03:37:48.473147Z" } }, "outputs": [], "source": [ "# f = open(r'C://Users/Administrator/all_singer.csv','r',encoding='utf-8') \n", "# g=f.read()\n", "# print(g)\n", "# f.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 16, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T03:29:09.205076Z", "start_time": "2019-03-25T03:29:09.195366Z" } }, "outputs": [], "source": [ "headers = {\n", " 'Host': 'music.163.com',\n", " 'Connection': 'keep-alive',\n", " 'Content-Length': '484',\n", " 'Cache-Control': 'max-age=0',\n", " 'Origin': 'http://music.163.com',\n", " 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36',\n", " 'Content-Type': 'application/x-www-form-urlencoded',\n", " 'Accept': '*/*',\n", " 'DNT': '1',\n", " 'Accept-Encoding': 'gzip, deflate',\n", " 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4',\n", " 'Cookie': 'JSESSIONID-WYYY=b66d89ed74ae9e94ead89b16e475556e763dd34f95e6ca357d06830a210abc7b685e82318b9d1d5b52ac4f4b9a55024c7a34024fddaee852404ed410933db994dcc0e398f61e670bfeea81105cbe098294e39ac566e1d5aa7232df741870ba1fe96e5cede8372ca587275d35c1a5d1b23a11e274a4c249afba03e20fa2dafb7a16eebdf6%3A1476373826753; _iuqxldmzr_=25; _ntes_nnid=7fa73e96706f26f3ada99abba6c4a6b2,1476372027128; _ntes_nuid=7fa73e96706f26f3ada99abba6c4a6b2; __utma=94650624.748605760.1476372027.1476372027.1476372027.1; __utmb=94650624.4.10.1476372027; __utmc=94650624; __utmz=94650624.1476372027.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)',\n", "}\n", "\n", "params = {\n", " 'csrf_token': ''\n", "}\n", "\n", "data = {\n", " 'params': '5L+s/X1qDy33tb2sjT6to2T4oxv89Fjg1aYRkjgzpNPR6hgCpp0YVjNoTLQAwWu9VYvKROPZQj6qTpBK+sUeJovyNHsnU9/StEfZwCOcKfECFFtAvoNIpulj1TDOtBir',\n", " 'encSecKey': '59079f3e07d6e240410018dc871bf9364f122b720c0735837d7916ac78d48a79ec06c6307e6a0e576605d6228bd0b377a96e1a7fc7c7ddc8f6a3dc6cc50746933352d4ec5cbe7bddd6dcb94de085a3b408d895ebfdf2f43a7c72fc783512b3c9efb860679a88ef21ccec5ff13592be450a1edebf981c0bf779b122ddbd825492'\n", " \n", "}" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T03:25:43.756748Z", "start_time": "2019-03-25T03:25:43.752784Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "http://music.163.com/api/v1/resource/comments/R_SO_4_516997458?limit=20&offset=0\n" ] } ], "source": [ "offset = 0\n", "music_id = '516997458'\n", "url = 'http://music.163.com/api/v1/resource/comments/R_SO_4_'+ music_id + '?limit=20&offset=' + str(offset)\n", "print(url)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T03:29:13.106029Z", "start_time": "2019-03-25T03:29:12.706428Z" } }, "outputs": [ { "data": { "text/plain": [ "dict_keys(['total', 'more', 'moreHot', 'hotComments', 'userId', 'topComments', 'comments', 'isMusician', 'code'])" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "offset = 0\n", "music_id = '516997458'\n", "url = 'http://music.163.com/api/v1/resource/comments/R_SO_4_'+ music_id + '?limit=20&offset=' + str(offset)\n", "response = requests.post(url, headers=headers, data=data)\n", "cj = response.json()\n", "cj.keys()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "ExecuteTime": { "end_time": "2019-03-25T03:29:47.585689Z", "start_time": "2019-03-25T03:29:47.570306Z" } }, "outputs": [ { "data": { "text/plain": [ "[{'beReplied': [],\n", " 'commentId': 605012425,\n", " 'commentLocationType': 1,\n", " 'content': '合作很开心 \\\\(//∇//)\\\\ 再强调一下 不要比较哦~',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 31474,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509773639514,\n", " 'user': {'authStatus': 1,\n", " 'avatarUrl': 'http://p2.music.126.net/DOm76SPCQKRbQT9Y9uhwrA==/109951163717598478.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '双笙子',\n", " 'remarkName': None,\n", " 'userId': 135214753,\n", " 'userType': 4,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [],\n", " 'commentId': 605186129,\n", " 'commentLocationType': 0,\n", " 'content': '2017.02.27,我鼓起勇气私信了二笙,希望能听到她翻唱《白石溪》,于是,2017.11.04,我得偿所愿,疯狂打call!!!',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 18512,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509785241690,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/RUYiHbkMH-oynQm_hjXyDw==/18915998044379216.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '我就是猴子啊',\n", " 'remarkName': None,\n", " 'userId': 136088330,\n", " 'userType': 0,\n", " 'vipRights': {'associator': {'rights': True, 'vipCode': 100},\n", " 'musicPackage': None,\n", " 'redVipAnnualCount': 1},\n", " 'vipType': 11}},\n", " {'beReplied': [],\n", " 'commentId': 606532743,\n", " 'commentLocationType': 0,\n", " 'content': '他年君归,我已白首,可否安否?[心碎]',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 9525,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509890451139,\n", " 'user': {'authStatus': 1,\n", " 'avatarUrl': 'http://p2.music.126.net/DT1H8k8UZKF-IifM2fzdHg==/109951163850082224.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '左启文',\n", " 'remarkName': None,\n", " 'userId': 573597989,\n", " 'userType': 4,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [],\n", " 'commentId': 605144328,\n", " 'commentLocationType': 0,\n", " 'content': '为笙不离古风 敢问东风,玉成双偶!',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 7793,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509782850774,\n", " 'user': {'authStatus': 1,\n", " 'avatarUrl': 'http://p2.music.126.net/KmOtNRZ5_ToYyvYE3K4txw==/109951163539808902.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '小月萧要抱抱',\n", " 'remarkName': None,\n", " 'userId': 511241074,\n", " 'userType': 4,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [],\n", " 'commentId': 606375014,\n", " 'commentLocationType': 0,\n", " 'content': '此生相依,人间白首。',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 5772,\n", " 'parentCommentId': 0,\n", " 'pendantData': {'id': 4002,\n", " 'imageUrl': 'http://p1.music.126.net/yMXwThrlR-9EB8m-xCwJTQ==/109951163313135573.jpg'},\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509879397234,\n", " 'user': {'authStatus': 1,\n", " 'avatarUrl': 'http://p2.music.126.net/DuX4kYdk6iD5-m0EPeod-A==/1393081243116509.jpg',\n", " 'expertTags': None,\n", " 'experts': {'1': '音乐原创视频达人', '2': '古风资讯达人'},\n", " 'locationInfo': None,\n", " 'nickname': '叶洛洛_',\n", " 'remarkName': None,\n", " 'userId': 41575131,\n", " 'userType': 4,\n", " 'vipRights': {'associator': {'rights': True, 'vipCode': 100},\n", " 'musicPackage': None,\n", " 'redVipAnnualCount': 1},\n", " 'vipType': 11}},\n", " {'beReplied': [],\n", " 'commentId': 605045551,\n", " 'commentLocationType': 0,\n", " 'content': '遗君一心 一心怎收?@双笙子',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 4203,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509775953038,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/q-F0hEb7fbhXIc24Id6k8g==/109951163824141936.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '-Taroty-',\n", " 'remarkName': None,\n", " 'userId': 253884140,\n", " 'userType': 0,\n", " 'vipRights': {'associator': {'rights': True, 'vipCode': 100},\n", " 'musicPackage': None,\n", " 'redVipAnnualCount': -1},\n", " 'vipType': 11}},\n", " {'beReplied': [{'beRepliedCommentId': 605012425,\n", " 'content': '合作很开心 \\\\(//∇//)\\\\ 再强调一下 不要比较哦~',\n", " 'expressionUrl': None,\n", " 'status': 0,\n", " 'user': {'authStatus': 1,\n", " 'avatarUrl': 'http://p2.music.126.net/DOm76SPCQKRbQT9Y9uhwrA==/109951163717598478.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '双笙子',\n", " 'remarkName': None,\n", " 'userId': 135214753,\n", " 'userType': 4,\n", " 'vipRights': None,\n", " 'vipType': 0}}],\n", " 'commentId': 605025286,\n", " 'commentLocationType': 0,\n", " 'content': '好!!!超温柔好喜欢啊!!',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 3469,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509773949545,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/n3zDytKxsEn7o73-hghrOg==/109951163907894797.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '再看要给钱的',\n", " 'remarkName': None,\n", " 'userId': 117723916,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [],\n", " 'commentId': 605179520,\n", " 'commentLocationType': 0,\n", " 'content': '“金风玉露一相逢,便胜却人间无数” “身无彩凤双飞翼,心有灵犀一点通” 笙,最近真的高产呐~表白你♡♡我最好的二笙♡月月的声音也好听的不要不要的♡为我的两个女神打call~冬季了过的都好吧?♡♡☆',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 2716,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509785525844,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/mjqnDgYQqyNQr0NTRLrhBg==/109951163658952918.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '慕小然呐',\n", " 'remarkName': None,\n", " 'userId': 454717946,\n", " 'userType': 0,\n", " 'vipRights': {'associator': None,\n", " 'musicPackage': {'rights': True, 'vipCode': 220},\n", " 'redVipAnnualCount': -1},\n", " 'vipType': 10}},\n", " {'beReplied': [],\n", " 'commentId': 605412277,\n", " 'commentLocationType': 0,\n", " 'content': '大家不要再捞那个评论了,也不用举报了。开开心心听歌,好的坏的评论都接收,不接收恶意揣测和胡乱分析。最新出的《不立传》是原创同人曲,《扬州姑娘》是人声本家,《飞行安全颂》是和银临女神的合作曲,《羽人夜歌》还有《女孩你为何踮脚尖》同样是原唱和人声本家。',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 2532,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509799538380,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/RoyBWg89m_6QWNSrb8pK-A==/109951163860176383.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '嘎吱____',\n", " 'remarkName': None,\n", " 'userId': 272211992,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [],\n", " 'commentId': 607843027,\n", " 'commentLocationType': 0,\n", " 'content': '发现双笙长大了不带囧菌一起玩了。。。。',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 2032,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1510027128236,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/CSt7VwgL3eN8OZtQW9Y0Mw==/18800549325165207.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '草丛里好多水',\n", " 'remarkName': None,\n", " 'userId': 377560577,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [],\n", " 'commentId': 605158591,\n", " 'commentLocationType': 0,\n", " 'content': '白石溪畔 斜阳逐流,可嫌 金风玉露 兼程久,灵犀心念 便相谋。只羡鸳鸯不羡仙,你是我心里的琴瑟和鸣,在我眼底你那么好,白首偕老,隔首相望,忆昔盼兮。@双笙子 ',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 1471,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509784001095,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/-I1R0pArRrY8etmreIeKpA==/109951163911256830.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '煮酒溫涼為锣故',\n", " 'remarkName': None,\n", " 'userId': 351891558,\n", " 'userType': 0,\n", " 'vipRights': {'associator': {'rights': True, 'vipCode': 100},\n", " 'musicPackage': None,\n", " 'redVipAnnualCount': -1},\n", " 'vipType': 11}},\n", " {'beReplied': [],\n", " 'commentId': 605388801,\n", " 'commentLocationType': 0,\n", " 'content': '冥月小姐姐唱到 何年夕何,两相执手 的时候突然的低音简直温柔的让我心头一颤!实在是太好听啦!还有小双笙的蜜汁少年音!(2333)和冥月小姐姐唱到 九霄一曲 人间白首,隔世相问 忆否忆否。那里简直不能太配了!希望以后两位女神能多多合作出些更多的古风歌!❤️❤️[亲亲][亲亲]',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 1413,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509799104012,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/PfBFxqykfk4_B0RU6b3JYg==/109951163547476225.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '瓜君身下的可爱傲娇高冷攻orz',\n", " 'remarkName': None,\n", " 'userId': 397792954,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [],\n", " 'commentId': 605353067,\n", " 'commentLocationType': 0,\n", " 'content': '后排表白二笙[亲亲][亲亲]顺带表白全员[亲]每个歌手都会唱出属于自己特点的歌,每个翻唱也只是演绎了令一个版本。我们都要不比不刷[大笑]做好自己,好好听歌[憨笑]继续表白我高产的二笙[爱心][爱心]我们会一直支持你的[亲亲][亲亲]',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 1122,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509796178459,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/odzoJ-TPYYnQQOjQQw6Reg==/19099616486559044.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '忘忆羡泪',\n", " 'remarkName': None,\n", " 'userId': 540357008,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [],\n", " 'commentId': 605370392,\n", " 'commentLocationType': 0,\n", " 'content': '共笙, 百年暮昏,到白昼。',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 887,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509797619854,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/68DTrNOBb0w1pcd03tXY-A==/109951163586635183.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '一袭白衣殒',\n", " 'remarkName': None,\n", " 'userId': 340825017,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [],\n", " 'commentId': 621427493,\n", " 'commentLocationType': 0,\n", " 'content': '人的一生,要死去三次。第一次,当你的心跳停止,呼吸消逝,你在生物学上被宣告了死亡;第二次,当你下葬,人们穿着黑衣出席你的葬礼,他们宣告,你在这个社会上不复存在,你悄然离去;而第三次死亡,是这个世界上最后一个记得你的人,把你忘记,于是,你就真正地死去。整个宇宙都将不再和你有关',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 667,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1511352258015,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/hGZWQMTVju4Ise4O9Ifszw==/109951163918333368.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '一起落入的那些俗套其实都很开心',\n", " 'remarkName': None,\n", " 'userId': 353580314,\n", " 'userType': 0,\n", " 'vipRights': {'associator': None,\n", " 'musicPackage': {'rights': True, 'vipCode': 220},\n", " 'redVipAnnualCount': -1},\n", " 'vipType': 10}}]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cj['hotComments']" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2019-03-24T18:34:04.433167Z", "start_time": "2019-03-24T18:16:56.711474Z" }, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "100保存到了100\n", "200保存到了200\n", "300保存到了300\n", "400保存到了400\n", "500保存到了500\n", "600保存到了600\n", "700保存到了700\n", "800保存到了800\n", "900保存到了900\n", "1000保存到了1000\n", "1100保存到了1100\n", "1200保存到了1200\n", "1300保存到了1300\n", "1400保存到了1400\n", "1500保存到了1500\n", "1600保存到了1600\n", "1700保存到了1700\n", "1800保存到了1800\n", "1900保存到了1900\n", "2000保存到了2000\n", "2100保存到了2100\n", "2200保存到了2200\n", "2300保存到了2300\n", "2400保存到了2400\n", "2500保存到了2500\n", "2600保存到了2600\n", "2700保存到了2700\n", "2800保存到了2800\n", "2900保存到了2900\n", "3000保存到了3000\n", "3100保存到了3100\n", "3200保存到了3200\n", "3300保存到了3300\n", "3400保存到了3400\n", "3500保存到了3500\n", "3600保存到了3600\n", "3700保存到了3700\n", "3800保存到了3800\n", "3900保存到了3900\n", "4000保存到了4000\n", "4100保存到了4100\n", "4200保存到了4200\n", "4300保存到了4300\n", "4400保存到了4400\n", "4500保存到了4500\n", "4600保存到了4600\n", "4700保存到了4700\n", "4800保存到了4800\n", "4900保存到了4900\n", "5000保存到了5000\n", "5100保存到了5100\n", "5200保存到了5200\n", "5300保存到了5300\n", "5400保存到了5400\n", "5500保存到了5500\n", "5600保存到了5600\n", "5700保存到了5700\n", "5800保存到了5800\n", "5900保存到了5900\n", "6000保存到了6000\n", "6100保存到了6100\n", "6200保存到了6200\n", "6300保存到了6300\n", "6400保存到了6400\n", "6500保存到了6500\n", "6600保存到了6600\n", "6700保存到了6700\n", "6800保存到了6800\n", "6900保存到了6900\n", "7000保存到了7000\n", "7100保存到了7100\n", "7200保存到了7200\n", "7300保存到了7300\n", "7400保存到了7400\n", "7500保存到了7500\n", "7600保存到了7600\n", "7700保存到了7700\n", "7800保存到了7800\n", "7900保存到了7900\n", "8000保存到了8000\n", "8100保存到了8100\n", "8200保存到了8200\n", "8300保存到了8300\n", "8400保存到了8400\n", "8500保存到了8500\n", "8600保存到了8600\n", "8700保存到了8700\n", "8800保存到了8800\n", "8900保存到了8900\n", "8981" ] }, { "ename": "KeyError", "evalue": "'hotComments'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 22\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 23\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 24\u001b[1;33m \u001b[1;32mif\u001b[0m \u001b[0mcj\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'hotComments'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 25\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mitems\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mcj\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'hotComments'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 26\u001b[0m \u001b[0mmc\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mitems\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'content'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mKeyError\u001b[0m: 'hotComments'" ] } ], "source": [ "import random\n", "import time\n", "#from fake_useragent import UserAgent\n", "\n", "offset = 0\n", "musiccomment = []\n", "\n", "for k, i in enumerate(jj.keys()):\n", " \n", " #time.sleep(0.1+random.random())\n", " \n", " #ua = UserAgent()\n", " #headers['User-Agent'] = ua.random\n", " url = 'http://music.163.com/api/v1/resource/comments/R_SO_4_'+ i+ '?limit=20&offset=' + str(offset)\n", " #time.sleep(1.0+random.random())\n", " \n", " response = requests.post(url, headers=headers, data=data)\n", " cj = response.json()\n", " \n", " flushprint(k)\n", " \n", " \n", " \n", " if cj['hotComments']:\n", " for items in cj['hotComments']:\n", " mc = items['content']\n", " musiccomment.append(mc)\n", " \n", " elif cj['comments']:\n", " for items in cj['comments']:\n", " mc = items['content']\n", " musiccomment.append(mc)\n", " \n", " \n", " #except Exception as e:\n", " # print(e)\n", " # print(i)\n", " \n", " if k % 100 ==0 and k != 0: \n", " with open('comment0.txt', 'a+',encoding='utf-8') as f:\n", " f.write(str(musiccomment)+'\\n\\n\\n\\n')\n", " \n", " print('保存到了%d'%k)\n", " musiccomment = []\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 44, "metadata": { "ExecuteTime": { "end_time": "2019-03-24T08:51:00.566777Z", "start_time": "2019-03-24T08:51:00.486773Z" } }, "outputs": [ { "data": { "text/plain": [ "dict_keys(['code', 'msg'])" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "offset = 0\n", "music_id = '516997458'\n", "url = 'http://music.163.com/api/v1/resource/comments/R_SO_4_'+ music_id + '?limit=20&offset=' + str(offset)\n", "response = requests.post(url, headers=headers, data=data)\n", "cj = response.json()\n", "cj.keys()\n" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "ExecuteTime": { "end_time": "2019-03-24T08:44:55.674469Z", "start_time": "2019-03-24T08:44:55.619466Z" } }, "outputs": [ { "data": { "text/plain": [ "[{'beReplied': [],\n", " 'commentId': 1429664496,\n", " 'commentLocationType': 0,\n", " 'content': '一个轻柔一个清澈,很好啊',\n", " 'decoration': {},\n", " 'expressionUrl': None,\n", " 'isRemoveHotComment': False,\n", " 'liked': False,\n", " 'likedCount': 1,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1553415176852,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p1.music.126.net/QwmQnU7NISJMojoDkethwQ==/109951163598132517.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '梦想长高10厘米',\n", " 'remarkName': None,\n", " 'userId': 1635948673,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [{'beRepliedCommentId': 1429535389,\n", " 'content': '在你发评论的时候动动脑子,没有父母这话是随便说的吗,不了解就瞎喷[怒]',\n", " 'expressionUrl': None,\n", " 'status': 0,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p1.music.126.net/ro-GK-5XWK0BRS1SFaOQOw==/109951163932462029.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': 'Whisky2005',\n", " 'remarkName': None,\n", " 'userId': 1292353868,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}}],\n", " 'commentId': 1429714606,\n", " 'commentLocationType': 0,\n", " 'content': '别理她,举报就好',\n", " 'decoration': {},\n", " 'expressionUrl': None,\n", " 'isRemoveHotComment': False,\n", " 'liked': False,\n", " 'likedCount': 0,\n", " 'parentCommentId': 1419676965,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1553414969099,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p1.music.126.net/QwmQnU7NISJMojoDkethwQ==/109951163598132517.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '梦想长高10厘米',\n", " 'remarkName': None,\n", " 'userId': 1635948673,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [{'beRepliedCommentId': 1429576231,\n", " 'content': '我也是姓左的',\n", " 'expressionUrl': None,\n", " 'status': 0,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p1.music.126.net/TizCd9qJuPOeTV7RiMvySA==/109951163872407610.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '碧蓝丶丢人萌新',\n", " 'remarkName': None,\n", " 'userId': 538236549,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}}],\n", " 'commentId': 1429678839,\n", " 'commentLocationType': 0,\n", " 'content': 'qiao',\n", " 'decoration': {},\n", " 'expressionUrl': None,\n", " 'isRemoveHotComment': False,\n", " 'liked': False,\n", " 'likedCount': 0,\n", " 'parentCommentId': 606532743,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1553413811692,\n", " 'user': {'authStatus': 1,\n", " 'avatarUrl': 'http://p1.music.126.net/DT1H8k8UZKF-IifM2fzdHg==/109951163850082224.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '左启文',\n", " 'remarkName': None,\n", " 'userId': 573597989,\n", " 'userType': 4,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [{'beRepliedCommentId': 1429535389,\n", " 'content': '在你发评论的时候动动脑子,没有父母这话是随便说的吗,不了解就瞎喷[怒]',\n", " 'expressionUrl': None,\n", " 'status': 0,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/ro-GK-5XWK0BRS1SFaOQOw==/109951163932462029.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': 'Whisky2005',\n", " 'remarkName': None,\n", " 'userId': 1292353868,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}}],\n", " 'commentId': 1429623280,\n", " 'commentLocationType': 0,\n", " 'content': '不想被喷就在说话前动动脑子,你说的这话就好像在说临摹得比原作还要好',\n", " 'decoration': {},\n", " 'expressionUrl': None,\n", " 'isRemoveHotComment': False,\n", " 'liked': False,\n", " 'likedCount': 0,\n", " 'parentCommentId': 1419676965,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1553412181764,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/Vlsfe2DBecxMOmVUPxYAlQ==/109951163106299956.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '抱走哪个好呢',\n", " 'remarkName': None,\n", " 'userId': 624231369,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [],\n", " 'commentId': 1429657548,\n", " 'commentLocationType': 0,\n", " 'content': '好听',\n", " 'decoration': {},\n", " 'expressionUrl': None,\n", " 'isRemoveHotComment': False,\n", " 'liked': False,\n", " 'likedCount': 0,\n", " 'parentCommentId': 0,\n", " 'pendantData': {'id': 5000,\n", " 'imageUrl': 'http://p1.music.126.net/11rU3itRKssu9iI-ly_hOQ==/109951163313124195.jpg'},\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1553411567807,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/6QUDHgG09dfyCUpddcg1aA==/109951163923068764.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '____薛洋____',\n", " 'remarkName': None,\n", " 'userId': 1296734356,\n", " 'userType': 0,\n", " 'vipRights': {'associator': {'rights': True, 'vipCode': 100},\n", " 'musicPackage': None,\n", " 'redVipAnnualCount': 1},\n", " 'vipType': 11}},\n", " {'beReplied': [{'beRepliedCommentId': 1429561582,\n", " 'content': '每个人有每个人的观点,何必互相喷呢,我就是认为比原版好听',\n", " 'expressionUrl': None,\n", " 'status': 0,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/ro-GK-5XWK0BRS1SFaOQOw==/109951163932462029.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': 'Whisky2005',\n", " 'remarkName': None,\n", " 'userId': 1292353868,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}}],\n", " 'commentId': 1429651603,\n", " 'commentLocationType': 0,\n", " 'content': '那我的评论也是在发表我的观点,臭👶👶',\n", " 'decoration': {},\n", " 'expressionUrl': None,\n", " 'isRemoveHotComment': False,\n", " 'liked': False,\n", " 'likedCount': 0,\n", " 'parentCommentId': 1419676965,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1553411505259,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/Vlsfe2DBecxMOmVUPxYAlQ==/109951163106299956.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '抱走哪个好呢',\n", " 'remarkName': None,\n", " 'userId': 624231369,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [{'beRepliedCommentId': 1429535389,\n", " 'content': '在你发评论的时候动动脑子,没有父母这话是随便说的吗,不了解就瞎喷[怒]',\n", " 'expressionUrl': None,\n", " 'status': 0,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/ro-GK-5XWK0BRS1SFaOQOw==/109951163932462029.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': 'Whisky2005',\n", " 'remarkName': None,\n", " 'userId': 1292353868,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}}],\n", " 'commentId': 1429619101,\n", " 'commentLocationType': 0,\n", " 'content': '模仿的能超过原唱?你的脑子在哪?',\n", " 'decoration': {},\n", " 'expressionUrl': None,\n", " 'isRemoveHotComment': False,\n", " 'liked': False,\n", " 'likedCount': 0,\n", " 'parentCommentId': 1419676965,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1553411314670,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/Vlsfe2DBecxMOmVUPxYAlQ==/109951163106299956.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '抱走哪个好呢',\n", " 'remarkName': None,\n", " 'userId': 624231369,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [{'beRepliedCommentId': 1429535389,\n", " 'content': '在你发评论的时候动动脑子,没有父母这话是随便说的吗,不了解就瞎喷[怒]',\n", " 'expressionUrl': None,\n", " 'status': 0,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/ro-GK-5XWK0BRS1SFaOQOw==/109951163932462029.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': 'Whisky2005',\n", " 'remarkName': None,\n", " 'userId': 1292353868,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}}],\n", " 'commentId': 1429626899,\n", " 'commentLocationType': 0,\n", " 'content': '你比较不就是踩一捧一?',\n", " 'decoration': {},\n", " 'expressionUrl': None,\n", " 'isRemoveHotComment': False,\n", " 'liked': False,\n", " 'likedCount': 0,\n", " 'parentCommentId': 1419676965,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1553411202358,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/Vlsfe2DBecxMOmVUPxYAlQ==/109951163106299956.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '抱走哪个好呢',\n", " 'remarkName': None,\n", " 'userId': 624231369,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [{'beRepliedCommentId': 1429535389,\n", " 'content': '在你发评论的时候动动脑子,没有父母这话是随便说的吗,不了解就瞎喷[怒]',\n", " 'expressionUrl': None,\n", " 'status': 0,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/ro-GK-5XWK0BRS1SFaOQOw==/109951163932462029.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': 'Whisky2005',\n", " 'remarkName': None,\n", " 'userId': 1292353868,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}}],\n", " 'commentId': 1429615121,\n", " 'commentLocationType': 0,\n", " 'content': '说你怎么?',\n", " 'decoration': {},\n", " 'expressionUrl': None,\n", " 'isRemoveHotComment': False,\n", " 'liked': False,\n", " 'likedCount': 0,\n", " 'parentCommentId': 1419676965,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1553411128186,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/Vlsfe2DBecxMOmVUPxYAlQ==/109951163106299956.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '抱走哪个好呢',\n", " 'remarkName': None,\n", " 'userId': 624231369,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [{'beRepliedCommentId': 1429224319,\n", " 'content': '对啊',\n", " 'expressionUrl': None,\n", " 'status': 0,\n", " 'user': {'authStatus': 1,\n", " 'avatarUrl': 'http://p2.music.126.net/DT1H8k8UZKF-IifM2fzdHg==/109951163850082224.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '左启文',\n", " 'remarkName': None,\n", " 'userId': 573597989,\n", " 'userType': 4,\n", " 'vipRights': None,\n", " 'vipType': 0}}],\n", " 'commentId': 1429576231,\n", " 'commentLocationType': 0,\n", " 'content': '我也是姓左的',\n", " 'decoration': {},\n", " 'expressionUrl': None,\n", " 'isRemoveHotComment': False,\n", " 'liked': False,\n", " 'likedCount': 0,\n", " 'parentCommentId': 606532743,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1553409735409,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/TizCd9qJuPOeTV7RiMvySA==/109951163872407610.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '碧蓝丶丢人萌新',\n", " 'remarkName': None,\n", " 'userId': 538236549,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [{'beRepliedCommentId': 1429125510,\n", " 'content': None,\n", " 'expressionUrl': None,\n", " 'status': -10,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/Vlsfe2DBecxMOmVUPxYAlQ==/109951163106299956.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '抱走哪个好呢',\n", " 'remarkName': None,\n", " 'userId': 624231369,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}}],\n", " 'commentId': 1429535389,\n", " 'commentLocationType': 0,\n", " 'content': '在你发评论的时候动动脑子,没有父母这话是随便说的吗,不了解就瞎喷[怒]',\n", " 'decoration': {},\n", " 'expressionUrl': None,\n", " 'isRemoveHotComment': False,\n", " 'liked': False,\n", " 'likedCount': 0,\n", " 'parentCommentId': 1419676965,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1553407382977,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/ro-GK-5XWK0BRS1SFaOQOw==/109951163932462029.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': 'Whisky2005',\n", " 'remarkName': None,\n", " 'userId': 1292353868,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [{'beRepliedCommentId': 1419676965,\n", " 'content': '比原版还好听',\n", " 'expressionUrl': None,\n", " 'status': 0,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/ro-GK-5XWK0BRS1SFaOQOw==/109951163932462029.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': 'Whisky2005',\n", " 'remarkName': None,\n", " 'userId': 1292353868,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}}],\n", " 'commentId': 1429537210,\n", " 'commentLocationType': 0,\n", " 'content': '还有我只是说这比原版好听,也没有贬低原版的意思,原版已经很好听但这个版本我觉的比原版更好听',\n", " 'decoration': {},\n", " 'expressionUrl': None,\n", " 'isRemoveHotComment': False,\n", " 'liked': False,\n", " 'likedCount': 0,\n", " 'parentCommentId': 1419676965,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1553407042602,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/ro-GK-5XWK0BRS1SFaOQOw==/109951163932462029.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': 'Whisky2005',\n", " 'remarkName': None,\n", " 'userId': 1292353868,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [{'beRepliedCommentId': 1419676965,\n", " 'content': '比原版还好听',\n", " 'expressionUrl': None,\n", " 'status': 0,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/ro-GK-5XWK0BRS1SFaOQOw==/109951163932462029.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': 'Whisky2005',\n", " 'remarkName': None,\n", " 'userId': 1292353868,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}}],\n", " 'commentId': 1429561582,\n", " 'commentLocationType': 0,\n", " 'content': '每个人有每个人的观点,何必互相喷呢,我就是认为比原版好听',\n", " 'decoration': {},\n", " 'expressionUrl': None,\n", " 'isRemoveHotComment': False,\n", " 'liked': False,\n", " 'likedCount': 0,\n", " 'parentCommentId': 1419676965,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1553406518992,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/ro-GK-5XWK0BRS1SFaOQOw==/109951163932462029.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': 'Whisky2005',\n", " 'remarkName': None,\n", " 'userId': 1292353868,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [],\n", " 'commentId': 1429269929,\n", " 'commentLocationType': 0,\n", " 'content': '竹业篇而来\\n霸业未有妻,淮竹终为妾,一生挚爱,不离不弃,即使没有相思树,我也相信,有情人会再次重聚。',\n", " 'decoration': {},\n", " 'expressionUrl': None,\n", " 'isRemoveHotComment': False,\n", " 'liked': False,\n", " 'likedCount': 1,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1553391403396,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/MTLZ68_k-sK3UO4W5_5ucQ==/109951163603755384.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '银河中的妖精',\n", " 'remarkName': None,\n", " 'userId': 1534534690,\n", " 'userType': 0,\n", " 'vipRights': {'associator': {'rights': True, 'vipCode': 100},\n", " 'musicPackage': None,\n", " 'redVipAnnualCount': -1},\n", " 'vipType': 11}},\n", " {'beReplied': [{'beRepliedCommentId': 1428723383,\n", " 'content': '左启文是你的真名吗?',\n", " 'expressionUrl': None,\n", " 'status': 0,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/TizCd9qJuPOeTV7RiMvySA==/109951163872407610.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '碧蓝丶丢人萌新',\n", " 'remarkName': None,\n", " 'userId': 538236549,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}}],\n", " 'commentId': 1429224319,\n", " 'commentLocationType': 0,\n", " 'content': '对啊',\n", " 'decoration': {},\n", " 'expressionUrl': None,\n", " 'isRemoveHotComment': False,\n", " 'liked': False,\n", " 'likedCount': 0,\n", " 'parentCommentId': 606532743,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1553389865376,\n", " 'user': {'authStatus': 1,\n", " 'avatarUrl': 'http://p2.music.126.net/DT1H8k8UZKF-IifM2fzdHg==/109951163850082224.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '左启文',\n", " 'remarkName': None,\n", " 'userId': 573597989,\n", " 'userType': 4,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [],\n", " 'commentId': 1429054997,\n", " 'commentLocationType': 0,\n", " 'content': '好听,幸福',\n", " 'decoration': {},\n", " 'expressionUrl': None,\n", " 'isRemoveHotComment': False,\n", " 'liked': False,\n", " 'likedCount': 0,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1553357795390,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/x6SwELtUm6EnwdnCe-BpuQ==/109951163633064303.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '朱晓含',\n", " 'remarkName': None,\n", " 'userId': 302073820,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [{'beRepliedCommentId': 1426851758,\n", " 'content': '我说为什么他们总说双笙粉,今天终于看到活了',\n", " 'expressionUrl': None,\n", " 'status': 0,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/4ooxbFU4X-5ZH3aigGt8Cg==/109951163736134599.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '泠沄洨',\n", " 'remarkName': None,\n", " 'userId': 1709480639,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}}],\n", " 'commentId': 1429019611,\n", " 'commentLocationType': 0,\n", " 'content': '素质烂的可以 招黑还不知收敛 ',\n", " 'decoration': {},\n", " 'expressionUrl': None,\n", " 'isRemoveHotComment': False,\n", " 'liked': False,\n", " 'likedCount': 0,\n", " 'parentCommentId': 1419676965,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1553355337359,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/pkfMbRlEIrFyVXRaSfiRUA==/109951163169252433.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '落尘之土',\n", " 'remarkName': None,\n", " 'userId': 1386034882,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [{'beRepliedCommentId': 607843027,\n", " 'content': '发现双笙长大了不带囧菌一起玩了。。。。',\n", " 'expressionUrl': None,\n", " 'status': 0,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/CSt7VwgL3eN8OZtQW9Y0Mw==/18800549325165207.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '草丛里好多水',\n", " 'remarkName': None,\n", " 'userId': 377560577,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}}],\n", " 'commentId': 1428965499,\n", " 'commentLocationType': 0,\n", " 'content': '靠别人捧红还踩别人',\n", " 'decoration': {},\n", " 'expressionUrl': None,\n", " 'isRemoveHotComment': False,\n", " 'liked': False,\n", " 'likedCount': 0,\n", " 'parentCommentId': 607843027,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1553355298319,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/pkfMbRlEIrFyVXRaSfiRUA==/109951163169252433.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '落尘之土',\n", " 'remarkName': None,\n", " 'userId': 1386034882,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [],\n", " 'commentId': 1428841216,\n", " 'commentLocationType': 0,\n", " 'content': '所谓伊人,在水一方\\n所谓君子,望而欲求\\n君子问曰,可期三年\\n伊人对曰,可期朝夕',\n", " 'decoration': {},\n", " 'expressionUrl': None,\n", " 'isRemoveHotComment': False,\n", " 'liked': False,\n", " 'likedCount': 2,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1553349527069,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/eZvGVQERWKkWkQgPiXhRiw==/109951163789573760.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '琛宝波特',\n", " 'remarkName': None,\n", " 'userId': 1462458458,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [{'beRepliedCommentId': 606532743,\n", " 'content': '他年君归,我已白首,可否安否?[心碎]',\n", " 'expressionUrl': None,\n", " 'status': 0,\n", " 'user': {'authStatus': 1,\n", " 'avatarUrl': 'http://p2.music.126.net/DT1H8k8UZKF-IifM2fzdHg==/109951163850082224.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '左启文',\n", " 'remarkName': None,\n", " 'userId': 573597989,\n", " 'userType': 4,\n", " 'vipRights': None,\n", " 'vipType': 0}}],\n", " 'commentId': 1428723383,\n", " 'commentLocationType': 0,\n", " 'content': '左启文是你的真名吗?',\n", " 'decoration': {},\n", " 'expressionUrl': None,\n", " 'isRemoveHotComment': False,\n", " 'liked': False,\n", " 'likedCount': 0,\n", " 'parentCommentId': 606532743,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1553344533467,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/TizCd9qJuPOeTV7RiMvySA==/109951163872407610.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '碧蓝丶丢人萌新',\n", " 'remarkName': None,\n", " 'userId': 538236549,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}}]" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cj['comments']" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2019-03-24T06:25:51.056042Z", "start_time": "2019-03-24T06:25:51.032042Z" } }, "outputs": [ { "data": { "text/plain": [ "[{'beReplied': [],\n", " 'commentId': 605012425,\n", " 'commentLocationType': 1,\n", " 'content': '合作很开心 \\\\(//∇//)\\\\ 再强调一下 不要比较哦~',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 31460,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509773639514,\n", " 'user': {'authStatus': 1,\n", " 'avatarUrl': 'http://p2.music.126.net/DOm76SPCQKRbQT9Y9uhwrA==/109951163717598478.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '双笙子',\n", " 'remarkName': None,\n", " 'userId': 135214753,\n", " 'userType': 4,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [],\n", " 'commentId': 605186129,\n", " 'commentLocationType': 0,\n", " 'content': '2017.02.27,我鼓起勇气私信了二笙,希望能听到她翻唱《白石溪》,于是,2017.11.04,我得偿所愿,疯狂打call!!!',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 18503,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509785241690,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/RUYiHbkMH-oynQm_hjXyDw==/18915998044379216.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '我就是猴子啊',\n", " 'remarkName': None,\n", " 'userId': 136088330,\n", " 'userType': 0,\n", " 'vipRights': {'associator': {'rights': True, 'vipCode': 100},\n", " 'musicPackage': None,\n", " 'redVipAnnualCount': 1},\n", " 'vipType': 11}},\n", " {'beReplied': [],\n", " 'commentId': 606532743,\n", " 'commentLocationType': 0,\n", " 'content': '他年君归,我已白首,可否安否?[心碎]',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 9524,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509890451139,\n", " 'user': {'authStatus': 1,\n", " 'avatarUrl': 'http://p2.music.126.net/DT1H8k8UZKF-IifM2fzdHg==/109951163850082224.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '左启文',\n", " 'remarkName': None,\n", " 'userId': 573597989,\n", " 'userType': 4,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [],\n", " 'commentId': 605144328,\n", " 'commentLocationType': 0,\n", " 'content': '为笙不离古风 敢问东风,玉成双偶!',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 7792,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509782850774,\n", " 'user': {'authStatus': 1,\n", " 'avatarUrl': 'http://p2.music.126.net/KmOtNRZ5_ToYyvYE3K4txw==/109951163539808902.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '小月萧要抱抱',\n", " 'remarkName': None,\n", " 'userId': 511241074,\n", " 'userType': 4,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [],\n", " 'commentId': 606375014,\n", " 'commentLocationType': 0,\n", " 'content': '此生相依,人间白首。',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 5770,\n", " 'parentCommentId': 0,\n", " 'pendantData': {'id': 4002,\n", " 'imageUrl': 'http://p1.music.126.net/yMXwThrlR-9EB8m-xCwJTQ==/109951163313135573.jpg'},\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509879397234,\n", " 'user': {'authStatus': 1,\n", " 'avatarUrl': 'http://p2.music.126.net/DuX4kYdk6iD5-m0EPeod-A==/1393081243116509.jpg',\n", " 'expertTags': None,\n", " 'experts': {'1': '音乐原创视频达人', '2': '古风资讯达人'},\n", " 'locationInfo': None,\n", " 'nickname': '叶洛洛_',\n", " 'remarkName': None,\n", " 'userId': 41575131,\n", " 'userType': 4,\n", " 'vipRights': {'associator': {'rights': True, 'vipCode': 100},\n", " 'musicPackage': None,\n", " 'redVipAnnualCount': 1},\n", " 'vipType': 11}},\n", " {'beReplied': [],\n", " 'commentId': 605045551,\n", " 'commentLocationType': 0,\n", " 'content': '遗君一心 一心怎收?@双笙子',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 4203,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509775953038,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/q-F0hEb7fbhXIc24Id6k8g==/109951163824141936.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '-Taroty-',\n", " 'remarkName': None,\n", " 'userId': 253884140,\n", " 'userType': 0,\n", " 'vipRights': {'associator': {'rights': True, 'vipCode': 100},\n", " 'musicPackage': None,\n", " 'redVipAnnualCount': -1},\n", " 'vipType': 11}},\n", " {'beReplied': [{'beRepliedCommentId': 605012425,\n", " 'content': '合作很开心 \\\\(//∇//)\\\\ 再强调一下 不要比较哦~',\n", " 'expressionUrl': None,\n", " 'status': 0,\n", " 'user': {'authStatus': 1,\n", " 'avatarUrl': 'http://p2.music.126.net/DOm76SPCQKRbQT9Y9uhwrA==/109951163717598478.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '双笙子',\n", " 'remarkName': None,\n", " 'userId': 135214753,\n", " 'userType': 4,\n", " 'vipRights': None,\n", " 'vipType': 0}}],\n", " 'commentId': 605025286,\n", " 'commentLocationType': 0,\n", " 'content': '好!!!超温柔好喜欢啊!!',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 3469,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509773949545,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/n3zDytKxsEn7o73-hghrOg==/109951163907894797.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '再看要给钱的',\n", " 'remarkName': None,\n", " 'userId': 117723916,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [],\n", " 'commentId': 605179520,\n", " 'commentLocationType': 0,\n", " 'content': '“金风玉露一相逢,便胜却人间无数” “身无彩凤双飞翼,心有灵犀一点通” 笙,最近真的高产呐~表白你♡♡我最好的二笙♡月月的声音也好听的不要不要的♡为我的两个女神打call~冬季了过的都好吧?♡♡☆',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 2716,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509785525844,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/mjqnDgYQqyNQr0NTRLrhBg==/109951163658952918.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '慕小然呐',\n", " 'remarkName': None,\n", " 'userId': 454717946,\n", " 'userType': 0,\n", " 'vipRights': {'associator': None,\n", " 'musicPackage': {'rights': True, 'vipCode': 220},\n", " 'redVipAnnualCount': -1},\n", " 'vipType': 10}},\n", " {'beReplied': [],\n", " 'commentId': 605412277,\n", " 'commentLocationType': 0,\n", " 'content': '大家不要再捞那个评论了,也不用举报了。开开心心听歌,好的坏的评论都接收,不接收恶意揣测和胡乱分析。最新出的《不立传》是原创同人曲,《扬州姑娘》是人声本家,《飞行安全颂》是和银临女神的合作曲,《羽人夜歌》还有《女孩你为何踮脚尖》同样是原唱和人声本家。',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 2532,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509799538380,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/RoyBWg89m_6QWNSrb8pK-A==/109951163860176383.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '嘎吱____',\n", " 'remarkName': None,\n", " 'userId': 272211992,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [],\n", " 'commentId': 607843027,\n", " 'commentLocationType': 0,\n", " 'content': '发现双笙长大了不带囧菌一起玩了。。。。',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 2027,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1510027128236,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/CSt7VwgL3eN8OZtQW9Y0Mw==/18800549325165207.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '草丛里好多水',\n", " 'remarkName': None,\n", " 'userId': 377560577,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [],\n", " 'commentId': 605158591,\n", " 'commentLocationType': 0,\n", " 'content': '白石溪畔 斜阳逐流,可嫌 金风玉露 兼程久,灵犀心念 便相谋。只羡鸳鸯不羡仙,你是我心里的琴瑟和鸣,在我眼底你那么好,白首偕老,隔首相望,忆昔盼兮。@双笙子 ',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 1470,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509784001095,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/-I1R0pArRrY8etmreIeKpA==/109951163911256830.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '煮酒溫涼為锣故',\n", " 'remarkName': None,\n", " 'userId': 351891558,\n", " 'userType': 0,\n", " 'vipRights': {'associator': {'rights': True, 'vipCode': 100},\n", " 'musicPackage': None,\n", " 'redVipAnnualCount': -1},\n", " 'vipType': 11}},\n", " {'beReplied': [],\n", " 'commentId': 605388801,\n", " 'commentLocationType': 0,\n", " 'content': '冥月小姐姐唱到 何年夕何,两相执手 的时候突然的低音简直温柔的让我心头一颤!实在是太好听啦!还有小双笙的蜜汁少年音!(2333)和冥月小姐姐唱到 九霄一曲 人间白首,隔世相问 忆否忆否。那里简直不能太配了!希望以后两位女神能多多合作出些更多的古风歌!❤️❤️[亲亲][亲亲]',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 1411,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509799104012,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/PfBFxqykfk4_B0RU6b3JYg==/109951163547476225.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '瓜君身下的可爱傲娇高冷攻orz',\n", " 'remarkName': None,\n", " 'userId': 397792954,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [],\n", " 'commentId': 605353067,\n", " 'commentLocationType': 0,\n", " 'content': '后排表白二笙[亲亲][亲亲]顺带表白全员[亲]每个歌手都会唱出属于自己特点的歌,每个翻唱也只是演绎了令一个版本。我们都要不比不刷[大笑]做好自己,好好听歌[憨笑]继续表白我高产的二笙[爱心][爱心]我们会一直支持你的[亲亲][亲亲]',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 1122,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509796178459,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/odzoJ-TPYYnQQOjQQw6Reg==/19099616486559044.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '忘忆羡泪',\n", " 'remarkName': None,\n", " 'userId': 540357008,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [],\n", " 'commentId': 605370392,\n", " 'commentLocationType': 0,\n", " 'content': '共笙, 百年暮昏,到白昼。',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 887,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1509797619854,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/68DTrNOBb0w1pcd03tXY-A==/109951163586635183.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '一袭白衣殒',\n", " 'remarkName': None,\n", " 'userId': 340825017,\n", " 'userType': 0,\n", " 'vipRights': None,\n", " 'vipType': 0}},\n", " {'beReplied': [],\n", " 'commentId': 621427493,\n", " 'commentLocationType': 0,\n", " 'content': '人的一生,要死去三次。第一次,当你的心跳停止,呼吸消逝,你在生物学上被宣告了死亡;第二次,当你下葬,人们穿着黑衣出席你的葬礼,他们宣告,你在这个社会上不复存在,你悄然离去;而第三次死亡,是这个世界上最后一个记得你的人,把你忘记,于是,你就真正地死去。整个宇宙都将不再和你有关',\n", " 'decoration': None,\n", " 'expressionUrl': None,\n", " 'liked': False,\n", " 'likedCount': 667,\n", " 'parentCommentId': 0,\n", " 'pendantData': None,\n", " 'repliedMark': False,\n", " 'showFloorComment': None,\n", " 'status': 0,\n", " 'time': 1511352258015,\n", " 'user': {'authStatus': 0,\n", " 'avatarUrl': 'http://p2.music.126.net/hGZWQMTVju4Ise4O9Ifszw==/109951163918333368.jpg',\n", " 'expertTags': None,\n", " 'experts': None,\n", " 'locationInfo': None,\n", " 'nickname': '一起落入的那些俗套其实都很开心',\n", " 'remarkName': None,\n", " 'userId': 353580314,\n", " 'userType': 0,\n", " 'vipRights': {'associator': None,\n", " 'musicPackage': {'rights': True, 'vipCode': 220},\n", " 'redVipAnnualCount': -1},\n", " 'vipType': 10}}]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cj['hotComments']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "http://music.163.com/api/v1/resource/comments/R_SO_4_516997458?limit=20&offset=0" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2019-03-23T16:44:30.181659Z", "start_time": "2019-03-23T16:44:30.161659Z" } }, "outputs": [ { "data": { "text/plain": [ "(9220, 20, 15, 0)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cj['total'],len(cj['comments']), len(cj['hotComments']), len(cj['topComments'])" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "ExecuteTime": { "end_time": "2019-03-24T06:36:47.423773Z", "start_time": "2019-03-24T06:36:47.408772Z" } }, "outputs": [], "source": [ "\n", "musiccomment = []\n", "for items in cj['hotComments']:\n", " mc = items['content']\n", " musiccomment.append(mc)\n", "\n", "with open('comment.txt', 'a+',encoding='utf-8') as f:\n", " f.write(str(musiccomment)+'\\n')\n", "musiccomment = []\n", "\n", " " ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "ExecuteTime": { "end_time": "2019-03-23T17:26:55.485263Z", "start_time": "2019-03-23T17:26:55.470263Z" } }, "outputs": [ { "data": { "text/plain": [ "'合作很开心 \\\\(//∇//)\\\\ 再强调一下 不要比较哦~'" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cj['hotComments'][0]['content']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.4" }, "latex_envs": { "LaTeX_envs_menu_present": true, "autoclose": false, "autocomplete": true, "bibliofile": "biblio.bib", "cite_by": "apalike", "current_citInitial": 1, "eqLabelWithNumbers": true, "eqNumInitial": 1, "hotkeys": { "equation": "Ctrl-E", "itemize": "Ctrl-I" }, "labels_anchors": false, "latex_user_defs": false, "report_style_numbering": false, "user_envs_cfg": false }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }