{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"
\n",
"# **Konlpy**\n",
"한글모듈\n",
"\n",
"## **1 Tagging**"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('파이썬', 'Noun'),\n",
" ('을', 'Josa'),\n",
" ('활용한', 'Verb'),\n",
" ('자연어', 'Noun'),\n",
" ('수업', 'Noun'),\n",
" ('자료', 'Noun')]"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from konlpy.tag import Twitter\n",
"twitter = Twitter()\n",
"twitter.pos('파이썬을 활용한 자연어 수업자료')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"
\n",
"## **2 Stemming**"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"text = \"워런 버핏은 삼성전자가 아닌 애플주식을 왜 샀을까\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[('워런', 'Noun'), ('버핏', 'Noun'), ('은', 'Josa'), ('삼성', 'Noun'), ('전자', 'Noun'), ('가', 'Josa'), ('아니다', 'Adjective'), ('애플', 'Noun'), ('주식', 'Noun'), ('을', 'Josa'), ('왜', 'Noun'), ('사다', 'Verb')]\n",
"CPU times: user 821 ms, sys: 34 ms, total: 855 ms\n",
"Wall time: 564 ms\n"
]
}
],
"source": [
"%%time\n",
"print(twitter.pos(text, stem=True))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[('워런', 'Noun'), ('버핏', 'Noun'), ('은', 'Josa'), ('삼성', 'Noun'), ('전자', 'Noun'), ('가', 'Josa'), ('아닌', 'Adjective'), ('애플', 'Noun'), ('주식', 'Noun'), ('을', 'Josa'), ('왜', 'Noun'), ('샀', 'Verb'), ('을까', 'Eomi')]\n",
"CPU times: user 48.4 ms, sys: 26 µs, total: 48.4 ms\n",
"Wall time: 67 ms\n"
]
}
],
"source": [
"%%time\n",
"print(twitter.pos(text))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"
\n",
"## **3 꼬꼬마, 한나눔**\n",
"개별 모듈에 따라 다른결과를 출력"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[('워', 'UN'), ('런', 'NNG'), ('버핏', 'UN'), ('은', 'JX'), ('삼성전자', 'NNG'), ('가', 'JKS'), ('아니', 'VV'), ('ㄴ', 'ETD'), ('애플', 'NNP'), ('주식', 'NNG'), ('을', 'JKO'), ('왜', 'MAG'), ('사', 'VV'), ('었', 'EPT'), ('을까', 'EFQ')]\n",
"CPU times: user 21.6 s, sys: 229 ms, total: 21.8 s\n",
"Wall time: 16.1 s\n"
]
}
],
"source": [
"%%time\n",
"from konlpy.tag import Kkma\n",
"kkma = Kkma()\n",
"print(kkma.pos(text))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[('워런', 'N'), ('버핏', 'N'), ('은', 'J'), ('삼성전자', 'N'), ('가', 'J'), ('아니', 'P'), ('ㄴ', 'E'), ('애플주식', 'N'), ('을', 'J'), ('왜', 'M'), ('사', 'P'), ('아ㄹ까', 'E')]\n",
"CPU times: user 8.6 s, sys: 77 ms, total: 8.68 s\n",
"Wall time: 5.29 s\n"
]
}
],
"source": [
"%%time\n",
"from konlpy.tag import Hannanum\n",
"han = Hannanum()\n",
"print(han.pos(text))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}