{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"###### 변수명으로 예약어를 사용하는 것은 무조건 감점입니다. 주의해주세요."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 모범답안 1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import re\n",
"import string\n",
"import html\n",
"def countWords(url):\n",
" req = requests.get(url)\n",
" source = html.unescape(req.text).replace('><', '>\\n<')\n",
" line = source.split('\\n')\n",
" regex = re.compile('>.*?<')\n",
" words = []\n",
" for i in line:\n",
" if ('\")+9:] #자바스크립트 내용 건너뜀 \n",
" elif (source.find(\"', ' ', text1, 0, re.I|re.S)\n",
" text3 = re.sub('<.+?>', ' ', text2, 0, re.I|re.S)\n",
" text4 = re.sub('-->', ' ', text3, 0, re.I|re.S)\n",
" text5 = re.sub(\"[!|?|<|>|:|\\[|\\]|#|$|%|&|\\(|\\)|*|+|\\-|,|.|/|;|=|@|^|_|`|{|}|~|\\\"|\\'|\\\\\\|/]\",' ', text4, 0, re.I|re.S)\n",
" s=text5.split()\n",
" dic={}\n",
" j=0\n",
" for i in s:\n",
" if i in dic:\n",
" dic[i] += 1\n",
" else:\n",
" dic[i] = 1\n",
" print(dic)\n",
"\n",
"print('http://cse.koreatech.ac.kr') \n",
"call('http://cse.koreatech.ac.kr')\n",
"print()\n",
"\n",
"print('https://www.koreatech.ac.kr')\n",
"call('https://www.koreatech.ac.kr')\n",
"print()\n",
"\n",
"print('http://www.naver.com')\n",
"call('http://www.naver.com')\n",
"print()\n",
"\n",
"print('http://www.daum.net')\n",
"call('http://www.daum.net')\n",
"print()\n",
"\n",
"print('http://www.nytimes.com')\n",
"call('http://www.nytimes.com')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}