{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Libraries" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2019-09-03T13:33:57.818654Z", "start_time": "2019-09-03T13:33:56.000041Z" } }, "outputs": [], "source": [ "import requests, bs4, webbrowser\n", "import pandas as pd\n", "import re" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Scraping" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2019-09-03T13:34:01.709907Z", "start_time": "2019-09-03T13:33:57.825648Z" } }, "outputs": [], "source": [ "query = 'what is null pointer exception'\n", "url_query = 'https://stackoverflow.com/search?q='+query\n", "\n", "def vote_ans(child1):\n", " nums = re.findall(r'\\d+', child1)\n", " if (len(nums)) == 2:\n", " return nums[0], nums[1]\n", " elif (len(nums)) == 1:\n", " return nums[0], 0\n", " else:\n", " return 0, 0\n", " \n", "def link(child3):\n", " return 'https://stackoverflow.com'+re.findall(r'href=\"(.*?)\"', child3)[0]\n", "def title(child3):\n", " return re.findall(r'', child3)[0]\n", "def summary(child3):\n", " raw = re.findall(r'
((.|\\n)*?)
', child3)[0][0]\n", " raw = raw.strip()\n", " remove_tags_re = re.compile(r\"<.*?>\")\n", " raw = remove_tags_re.sub(\"\", raw)\n", " raw = raw.replace(\"<\", \"<\")\n", " raw = raw.replace(\">\", \">\")\n", " raw = raw.replace(\"&\", \"&\")\n", " return raw\n", "\n", "results = pd.DataFrame(columns=['Title', 'Link', 'Summary', 'Vote', 'Answers'])\n", "res = requests.get(url_query)\n", "if res.status_code == requests.codes.ok:\n", " ressoup = bs4.BeautifulSoup(res.text, 'lxml')\n", " elems = ressoup.select('.question-summary')\n", " for i,e in enumerate(elems):\n", " for j, c in enumerate(e.children):\n", " if j == 1:\n", " vote, ans = vote_ans(str(c))\n", " vote, ans = int(vote), int(ans)\n", " if j == 3:\n", " t = title(str(c))\n", " l = link(str(c))\n", " s = summary(str(c))\n", " results = pd.concat([results, pd.DataFrame({\n", " 'Title': [t], 'Link': [l], 'Summary': [s], 'Vote': [vote], 'Answers': [ans]\n", " })], axis=0)\n", "results= results.reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2019-09-03T13:34:01.749829Z", "start_time": "2019-09-03T13:34:01.713894Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TitleLinkSummaryVoteAnswers
0What is a NullReferenceException, and how do I...https://stackoverflow.com/questions/4660142/wh.... The .NET runtime then detects this error and...23050
1What is a NullPointerException, and how do I f...https://stackoverflow.com/questions/218384/wha..., but it does not actually contain a primitive...36040
2What is a smart pointer and when should I use ...https://stackoverflow.com/questions/106508/wha.... Hopefully raises some NULL pointer exception...18120
3Check whether a string is not null and not emptyhttps://stackoverflow.com/questions/3598770/ch...ensuring you will not get a null pointer excep...8610
4what is causing my Null Pointer Exceptionhttps://stackoverflow.com/questions/13782424/w...I know that there are lots of threads on Null ...14
5Facing Null Pointer Exceptionhttps://stackoverflow.com/questions/29468824/f...belongs to no group it hits a null pointer exc...12
6Null Pointer Exception in programhttps://stackoverflow.com/questions/3478736/nu...The Below code does not fail to compile but ho...04
7Getting a null pointer exception on this line ...https://stackoverflow.com/questions/16780588/g...if (methodCall.getClassName(cg.getConstantPool...12
8What is the reason of null pointer exception?https://stackoverflow.com/questions/24647305/w...am continuously getting null pointer exception...01
9what is the reason of the null pointer exceptionhttps://stackoverflow.com/questions/19637054/w...hi all i try to do calculator,i'm write the fo...04
10What is wrong with this code? (Array of object...https://stackoverflow.com/questions/17431296/w...(); //This line gives null pointer exceptio...14
11Class inside a function causing null pointer e...https://stackoverflow.com/questions/12599477/c...put it into the function, it will cause null p...02
12Java code generating a null pointer exceptionhttps://stackoverflow.com/questions/14821107/j...I'm getting this Null Pointer Exception:\\n\\nja...45
13Null Pointer Exception: addTextChangedListenerhttps://stackoverflow.com/questions/15916242/n...=\"+URLEncoder.encode(destination.getText().toS...03
14Null pointer Exception - findViewById()https://stackoverflow.com/questions/19078461/n...Can anyone help me to find out what can be the...4610
\n", "
" ], "text/plain": [ " Title \\\n", "0 What is a NullReferenceException, and how do I... \n", "1 What is a NullPointerException, and how do I f... \n", "2 What is a smart pointer and when should I use ... \n", "3 Check whether a string is not null and not empty \n", "4 what is causing my Null Pointer Exception \n", "5 Facing Null Pointer Exception \n", "6 Null Pointer Exception in program \n", "7 Getting a null pointer exception on this line ... \n", "8 What is the reason of null pointer exception? \n", "9 what is the reason of the null pointer exception \n", "10 What is wrong with this code? (Array of object... \n", "11 Class inside a function causing null pointer e... \n", "12 Java code generating a null pointer exception \n", "13 Null Pointer Exception: addTextChangedListener \n", "14 Null pointer Exception - findViewById() \n", "\n", " Link \\\n", "0 https://stackoverflow.com/questions/4660142/wh... \n", "1 https://stackoverflow.com/questions/218384/wha... \n", "2 https://stackoverflow.com/questions/106508/wha... \n", "3 https://stackoverflow.com/questions/3598770/ch... \n", "4 https://stackoverflow.com/questions/13782424/w... \n", "5 https://stackoverflow.com/questions/29468824/f... \n", "6 https://stackoverflow.com/questions/3478736/nu... \n", "7 https://stackoverflow.com/questions/16780588/g... \n", "8 https://stackoverflow.com/questions/24647305/w... \n", "9 https://stackoverflow.com/questions/19637054/w... \n", "10 https://stackoverflow.com/questions/17431296/w... \n", "11 https://stackoverflow.com/questions/12599477/c... \n", "12 https://stackoverflow.com/questions/14821107/j... \n", "13 https://stackoverflow.com/questions/15916242/n... \n", "14 https://stackoverflow.com/questions/19078461/n... \n", "\n", " Summary Vote Answers \n", "0 . The .NET runtime then detects this error and... 2305 0 \n", "1 , but it does not actually contain a primitive... 3604 0 \n", "2 . Hopefully raises some NULL pointer exception... 1812 0 \n", "3 ensuring you will not get a null pointer excep... 861 0 \n", "4 I know that there are lots of threads on Null ... 1 4 \n", "5 belongs to no group it hits a null pointer exc... 1 2 \n", "6 The Below code does not fail to compile but ho... 0 4 \n", "7 if (methodCall.getClassName(cg.getConstantPool... 1 2 \n", "8 am continuously getting null pointer exception... 0 1 \n", "9 hi all i try to do calculator,i'm write the fo... 0 4 \n", "10 (); //This line gives null pointer exceptio... 1 4 \n", "11 put it into the function, it will cause null p... 0 2 \n", "12 I'm getting this Null Pointer Exception:\\n\\nja... 4 5 \n", "13 =\"+URLEncoder.encode(destination.getText().toS... 0 3 \n", "14 Can anyone help me to find out what can be the... 46 10 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Scraping2" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2019-09-03T13:34:02.751998Z", "start_time": "2019-09-03T13:34:01.753788Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n", "What is a NullReferenceException, and how do I fix it?\n", "https://stackoverflow.com/questions/4660142/what-is-a-nullreferenceexception-and-how-do-i-fix-it/4660186?r=SearchResults#4660186\n", "\n", "1\n", "What is a NullPointerException, and how do I fix it?\n", "https://stackoverflow.com/questions/218384/what-is-a-nullpointerexception-and-how-do-i-fix-it/218510?r=SearchResults#218510\n", "\n", "2\n", "What is a smart pointer and when should I use one?\n", "https://stackoverflow.com/questions/106508/what-is-a-smart-pointer-and-when-should-i-use-one/106614?r=SearchResults#106614\n", "\n", "3\n", "Check whether a string is not null and not empty\n", "https://stackoverflow.com/questions/3598770/check-whether-a-string-is-not-null-and-not-empty/3598792?r=SearchResults#3598792\n", "\n", "4\n", "what is causing my Null Pointer Exception\n", "https://stackoverflow.com/questions/13782424/what-is-causing-my-null-pointer-exception?r=SearchResults\n", "\n", "5\n", "Facing Null Pointer Exception\n", "https://stackoverflow.com/questions/29468824/facing-null-pointer-exception?r=SearchResults\n", "\n", "6\n", "Null Pointer Exception in program\n", "https://stackoverflow.com/questions/3478736/null-pointer-exception-in-program?r=SearchResults\n", "\n", "7\n", "Getting a null pointer exception on this line of code. What is a null pointer? And how do i fix\n", "https://stackoverflow.com/questions/16780588/getting-a-null-pointer-exception-on-this-line-of-code-what-is-a-null-pointer-a?r=SearchResults\n", "\n", "8\n", "What is the reason of null pointer exception?\n", "https://stackoverflow.com/questions/24647305/what-is-the-reason-of-null-pointer-exception?r=SearchResults\n", "\n", "9\n", "what is the reason of the null pointer exception\n", "https://stackoverflow.com/questions/19637054/what-is-the-reason-of-the-null-pointer-exception?r=SearchResults\n", "\n", "10\n", "What is wrong with this code? (Array of objects, null pointer exception.)\n", "https://stackoverflow.com/questions/17431296/what-is-wrong-with-this-code-array-of-objects-null-pointer-exception?r=SearchResults\n", "\n", "11\n", "Class inside a function causing null pointer exception\n", "https://stackoverflow.com/questions/12599477/class-inside-a-function-causing-null-pointer-exception?r=SearchResults\n", "\n", "12\n", "Java code generating a null pointer exception\n", "https://stackoverflow.com/questions/14821107/java-code-generating-a-null-pointer-exception?r=SearchResults\n", "\n", "13\n", "Null Pointer Exception: addTextChangedListener\n", "https://stackoverflow.com/questions/15916242/null-pointer-exception-addtextchangedlistener?r=SearchResults\n", "\n", "14\n", "Null pointer Exception - findViewById()\n", "https://stackoverflow.com/questions/19078461/null-pointer-exception-findviewbyid?r=SearchResults\n", "\n", "\n" ] } ], "source": [ "query = 'what is null pointer exception'\n", "url_query = 'https://stackoverflow.com/search?q='+query\n", "\n", "res = requests.get(url_query)\n", "if res.status_code == requests.codes.ok:\n", " ressoup = bs4.BeautifulSoup(res.text, 'lxml')\n", " elems = ressoup.select('.question-summary')\n", " results = \"\"\n", " for i,e in enumerate(elems):\n", " l = 'https://stackoverflow.com'+re.findall(r'href=\"(.*?)\"', str(list(e.children)[3]))[0]\n", " t = re.findall(r'
', str(list(e.children)[3]))[0]\n", " results += str(i)+'\\n'+t+'\\n'+l+'\\n\\n'\n", " print(results)\n", "else:\n", " print('Something went wrong')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }