{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "Stopword-TfIdf.ipynb", "version": "0.3.2", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" } }, "cells": [ { "metadata": { "id": "QYrPzfy_2rlN", "colab_type": "text" }, "cell_type": "markdown", "source": [ "

\n", "# **Stop Words**\n", "> **nltk**" ] }, { "metadata": { "id": "jbYb-8vw2rlP", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 199 }, "outputId": "a45c3b29-99a2-4981-9ebd-f58ab32c8e3e" }, "cell_type": "code", "source": [ "# # 분석에 필요한 모듈설치\n", "! pip3 install sklearn nltk\n", "\n", "import nltk\n", "nltk.download('punkt')\n", "nltk.download('stopwords')\n", "news_texts = \"https://raw.githubusercontent.com/YongBeomKim/nltk_basic/master/data/News.txt\"" ], "execution_count": 1, "outputs": [ { "output_type": "stream", "text": [ "Requirement already satisfied: sklearn in /usr/local/lib/python3.6/dist-packages (0.0)\n", "Requirement already satisfied: nltk in /usr/local/lib/python3.6/dist-packages (3.2.5)\n", "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.6/dist-packages (from sklearn) (0.20.3)\n", "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from nltk) (1.11.0)\n", "Requirement already satisfied: scipy>=0.13.3 in /usr/local/lib/python3.6/dist-packages (from scikit-learn->sklearn) (1.1.0)\n", "Requirement already satisfied: numpy>=1.8.2 in /usr/local/lib/python3.6/dist-packages (from scikit-learn->sklearn) (1.14.6)\n", "[nltk_data] Downloading package punkt to /root/nltk_data...\n", "[nltk_data] Package punkt is already up-to-date!\n", "[nltk_data] Downloading package stopwords to /root/nltk_data...\n", "[nltk_data] Package stopwords is already up-to-date!\n" ], "name": "stdout" } ] }, { "metadata": { "id": "xB3iast92rlW", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 399 }, "outputId": "e2a1a897-04c1-4e8b-ae10-c7abcb8207f9" }, "cell_type": "code", "source": [ "# Stopwords 사용가능한 언어목록 \n", "from nltk.corpus import stopwords\n", "stopwords.ensure_loaded\n", "stopwords.__dict__.get('_fileids')" ], "execution_count": 2, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "['arabic',\n", " 'azerbaijani',\n", " 'danish',\n", " 'dutch',\n", " 'english',\n", " 'finnish',\n", " 'french',\n", " 'german',\n", " 'greek',\n", " 'hungarian',\n", " 'indonesian',\n", " 'italian',\n", " 'kazakh',\n", " 'nepali',\n", " 'norwegian',\n", " 'portuguese',\n", " 'romanian',\n", " 'russian',\n", " 'spanish',\n", " 'swedish',\n", " 'turkish']" ] }, "metadata": { "tags": [] }, "execution_count": 2 } ] }, { "metadata": { "id": "dmrwljjW2rla", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "outputId": "8254f173-3907-4974-f89c-13f51b34ce80" }, "cell_type": "code", "source": [ "from nltk.corpus import stopwords\n", "stop_eng = stopwords.words(\"english\")\n", "stop_eng[:8]" ], "execution_count": 3, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves']" ] }, "metadata": { "tags": [] }, "execution_count": 3 } ] }, { "metadata": { "id": "ljzmOev_2rld", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "outputId": "50ffcd53-639a-4a37-a150-a54ccabe8d87" }, "cell_type": "code", "source": [ "# 영문 내용을 소문자로 전처리\n", "texts = 'I like such a Wonderful Snow Ice Cream'\n", "texts = texts.lower()\n", "texts" ], "execution_count": 4, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "'i like such a wonderful snow ice cream'" ] }, "metadata": { "tags": [] }, "execution_count": 4 } ] }, { "metadata": { "id": "yVksIyqn2rlg", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "outputId": "7ebf4bb3-510e-4f17-d80c-3c7ee27c498c" }, "cell_type": "code", "source": [ "from nltk import word_tokenize\n", "tokens = word_tokenize(texts)\n", "tokens" ], "execution_count": 5, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "['i', 'like', 'such', 'a', 'wonderful', 'snow', 'ice', 'cream']" ] }, "metadata": { "tags": [] }, "execution_count": 5 } ] }, { "metadata": { "id": "oSCLRCcC2rlj", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "outputId": "9c085901-4a9d-48ce-8cf1-b06d2b59b558" }, "cell_type": "code", "source": [ "tokens = [word for word in tokens \n", " if word not in stop_eng]\n", "print(tokens)" ], "execution_count": 6, "outputs": [ { "output_type": "stream", "text": [ "['like', 'wonderful', 'snow', 'ice', 'cream']\n" ], "name": "stdout" } ] }, { "metadata": { "id": "qVnpZfqQ2rll", "colab_type": "text" }, "cell_type": "markdown", "source": [ "

\n", "# **Tf-idf**\n", "> **scikit learn**" ] }, { "metadata": { "id": "96upZtAh2rlm", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 55 }, "outputId": "95b41365-f6de-4ca5-91a8-ec86033488f4" }, "cell_type": "code", "source": [ "# with open(news_texts, 'r') as f:\n", "# texts = f.read()\n", "# texts = texts.lower()\n", "\n", "import requests\n", "texts = requests.get(news_texts).text.lower()\n", "texts[:300]" ], "execution_count": 7, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "'samsung electronics posted krw 65.98 trillion in consolidated revenue and krw 15.15 trillion in operating profit for the fourth quarter of 2017.\\n \\noverall, the company reported full-year revenue of krw 239.58 trillion and full-year operating profit of krw 53.65 trillion.\\n \\nfourth quarter earnings we'" ] }, "metadata": { "tags": [] }, "execution_count": 7 } ] }, { "metadata": { "id": "fLapw_Da2rlp", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 55 }, "outputId": "0f93e0ca-ae15-4615-fdc4-abe26a6318b5" }, "cell_type": "code", "source": [ "# 영문 Token만 추출한다 (숫자와 문장기호를 제거)\n", "import re\n", "tokenizer = re.compile('[a-z]\\w+')\n", "tokens = tokenizer.findall(texts)\n", "document = \" \".join(tokens)\n", "document[:300]" ], "execution_count": 8, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "'samsung electronics posted krw trillion in consolidated revenue and krw trillion in operating profit for the fourth quarter of overall the company reported full year revenue of krw trillion and full year operating profit of krw trillion fourth quarter earnings were driven by the components business '" ] }, "metadata": { "tags": [] }, "execution_count": 8 } ] }, { "metadata": { "id": "9kfmIR0J2rlr", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 1835 }, "outputId": "e1fce611-f0c1-422d-e097-bec81af70888" }, "cell_type": "code", "source": [ "import numpy as np\n", "import pandas as pd\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "tfidf_vec = TfidfVectorizer(stop_words='english')\n", "transformed = tfidf_vec.fit_transform(raw_documents = [document])\n", "transformed = np.array(transformed.todense())\n", "transformed" ], "execution_count": 9, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([[0.01705916, 0.00852958, 0.00852958, 0.00852958, 0.00852958,\n", " 0.00852958, 0.00852958, 0.01705916, 0.00852958, 0.03411832,\n", " 0.02558874, 0.01705916, 0.00852958, 0.00852958, 0.00852958,\n", " 0.03411832, 0.02558874, 0.03411832, 0.01705916, 0.00852958,\n", " 0.01705916, 0.0426479 , 0.03411832, 0.00852958, 0.00852958,\n", " 0.00852958, 0.05970706, 0.00852958, 0.0426479 , 0.01705916,\n", " 0.00852958, 0.00852958, 0.00852958, 0.00852958, 0.01705916,\n", " 0.00852958, 0.03411832, 0.02558874, 0.00852958, 0.00852958,\n", " 0.03411832, 0.01705916, 0.00852958, 0.03411832, 0.01705916,\n", " 0.00852958, 0.00852958, 0.00852958, 0.01705916, 0.27294655,\n", " 0.0426479 , 0.01705916, 0.00852958, 0.00852958, 0.01705916,\n", " 0.00852958, 0.00852958, 0.00852958, 0.01705916, 0.00852958,\n", " 0.00852958, 0.01705916, 0.00852958, 0.00852958, 0.00852958,\n", " 0.00852958, 0.01705916, 0.02558874, 0.00852958, 0.00852958,\n", " 0.01705916, 0.15353243, 0.00852958, 0.00852958, 0.03411832,\n", " 0.05970706, 0.00852958, 0.01705916, 0.00852958, 0.00852958,\n", " 0.03411832, 0.00852958, 0.00852958, 0.0426479 , 0.02558874,\n", " 0.01705916, 0.03411832, 0.01705916, 0.00852958, 0.01705916,\n", " 0.00852958, 0.00852958, 0.0426479 , 0.01705916, 0.00852958,\n", " 0.00852958, 0.00852958, 0.00852958, 0.0426479 , 0.01705916,\n", " 0.00852958, 0.02558874, 0.01705916, 0.02558874, 0.02558874,\n", " 0.00852958, 0.01705916, 0.05970706, 0.28147613, 0.00852958,\n", " 0.06823664, 0.03411832, 0.00852958, 0.01705916, 0.01705916,\n", " 0.01705916, 0.00852958, 0.00852958, 0.03411832, 0.00852958,\n", " 0.06823664, 0.01705916, 0.00852958, 0.05970706, 0.00852958,\n", " 0.00852958, 0.00852958, 0.00852958, 0.05117748, 0.00852958,\n", " 0.01705916, 0.02558874, 0.00852958, 0.00852958, 0.01705916,\n", " 0.18765075, 0.00852958, 0.00852958, 0.00852958, 0.01705916,\n", " 0.00852958, 0.02558874, 0.05117748, 0.00852958, 0.10235496,\n", " 0.00852958, 0.01705916, 0.01705916, 0.00852958, 0.01705916,\n", " 0.02558874, 0.00852958, 0.0852958 , 0.07676622, 0.03411832,\n", " 0.00852958, 0.14500285, 0.05117748, 0.00852958, 0.00852958,\n", " 0.00852958, 0.00852958, 0.00852958, 0.00852958, 0.00852958,\n", " 0.00852958, 0.00852958, 0.00852958, 0.0852958 , 0.00852958,\n", " 0.03411832, 0.00852958, 0.00852958, 0.09382538, 0.02558874,\n", " 0.00852958, 0.02558874, 0.01705916, 0.00852958, 0.00852958,\n", " 0.00852958, 0.03411832, 0.05970706, 0.01705916, 0.01705916,\n", " 0.0426479 , 0.00852958, 0.00852958, 0.00852958, 0.02558874,\n", " 0.02558874, 0.02558874, 0.0426479 , 0.01705916, 0.10235496,\n", " 0.0426479 , 0.00852958, 0.13647327, 0.01705916, 0.00852958,\n", " 0.00852958, 0.02558874, 0.00852958, 0.00852958, 0.03411832,\n", " 0.00852958, 0.00852958, 0.05117748, 0.01705916, 0.00852958,\n", " 0.00852958, 0.07676622, 0.06823664, 0.07676622, 0.05970706,\n", " 0.00852958, 0.01705916, 0.02558874, 0.00852958, 0.00852958,\n", " 0.00852958, 0.03411832, 0.02558874, 0.00852958, 0.01705916,\n", " 0.01705916, 0.00852958, 0.13647327, 0.05970706, 0.00852958,\n", " 0.01705916, 0.01705916, 0.00852958, 0.03411832, 0.10235496,\n", " 0.00852958, 0.0426479 , 0.03411832, 0.05117748, 0.00852958,\n", " 0.00852958, 0.00852958, 0.0426479 , 0.00852958, 0.00852958,\n", " 0.05117748, 0.05970706, 0.02558874, 0.00852958, 0.02558874,\n", " 0.02558874, 0.02558874, 0.00852958, 0.01705916, 0.0426479 ,\n", " 0.01705916, 0.00852958, 0.00852958, 0.06823664, 0.00852958,\n", " 0.00852958, 0.01705916, 0.00852958, 0.14500285, 0.0426479 ,\n", " 0.02558874, 0.02558874, 0.00852958, 0.00852958, 0.00852958,\n", " 0.01705916, 0.05970706, 0.00852958, 0.03411832, 0.00852958,\n", " 0.02558874, 0.00852958, 0.01705916, 0.16206201, 0.00852958,\n", " 0.0426479 , 0.00852958, 0.00852958, 0.00852958, 0.05970706,\n", " 0.01705916, 0.00852958, 0.01705916, 0.02558874, 0.01705916,\n", " 0.15353243, 0.01705916, 0.0426479 , 0.0426479 , 0.01705916,\n", " 0.00852958, 0.00852958, 0.00852958, 0.00852958, 0.09382538,\n", " 0.00852958, 0.00852958, 0.01705916, 0.06823664, 0.00852958,\n", " 0.00852958, 0.00852958, 0.01705916, 0.01705916, 0.01705916,\n", " 0.00852958, 0.00852958, 0.02558874, 0.00852958, 0.00852958,\n", " 0.00852958, 0.07676622, 0.06823664, 0.00852958, 0.00852958,\n", " 0.01705916, 0.01705916, 0.01705916, 0.00852958, 0.00852958,\n", " 0.00852958, 0.01705916, 0.01705916, 0.00852958, 0.00852958,\n", " 0.00852958, 0.00852958, 0.00852958, 0.06823664, 0.00852958,\n", " 0.00852958, 0.00852958, 0.14500285, 0.01705916, 0.00852958,\n", " 0.03411832, 0.01705916, 0.00852958, 0.02558874, 0.0852958 ,\n", " 0.05117748, 0.02558874, 0.22176907, 0.06823664, 0.05117748,\n", " 0.00852958, 0.00852958, 0.00852958, 0.00852958, 0.01705916,\n", " 0.00852958, 0.00852958, 0.00852958, 0.01705916, 0.01705916,\n", " 0.07676622, 0.00852958, 0.00852958, 0.19618033, 0.00852958,\n", " 0.00852958, 0.02558874, 0.01705916, 0.00852958, 0.00852958,\n", " 0.00852958, 0.00852958, 0.00852958, 0.00852958, 0.03411832,\n", " 0.01705916, 0.02558874, 0.02558874, 0.01705916, 0.00852958,\n", " 0.00852958, 0.00852958, 0.00852958, 0.01705916, 0.00852958,\n", " 0.00852958, 0.00852958, 0.00852958, 0.07676622, 0.02558874,\n", " 0.00852958, 0.01705916, 0.00852958, 0.00852958, 0.00852958,\n", " 0.00852958, 0.17912117, 0.24735781, 0.02558874, 0.00852958,\n", " 0.01705916, 0.01705916, 0.02558874, 0.00852958, 0.13647327,\n", " 0.00852958, 0.01705916, 0.00852958, 0.00852958, 0.00852958,\n", " 0.00852958, 0.00852958, 0.03411832, 0.00852958, 0.03411832,\n", " 0.00852958, 0.0426479 , 0.05117748, 0.02558874, 0.01705916,\n", " 0.01705916, 0.05970706, 0.00852958, 0.02558874, 0.01705916,\n", " 0.00852958, 0.01705916, 0.00852958, 0.06823664, 0.09382538,\n", " 0.00852958, 0.00852958, 0.05117748, 0.00852958, 0.03411832,\n", " 0.00852958, 0.00852958, 0.02558874, 0.02558874, 0.02558874,\n", " 0.01705916, 0.00852958, 0.00852958, 0.00852958, 0.00852958,\n", " 0.00852958, 0.01705916, 0.00852958, 0.0426479 , 0.00852958,\n", " 0.05970706, 0.00852958, 0.12794369, 0.00852958, 0.03411832,\n", " 0.00852958, 0.00852958, 0.00852958, 0.01705916, 0.05970706,\n", " 0.00852958, 0.05970706, 0.05117748, 0.00852958, 0.00852958,\n", " 0.00852958, 0.00852958, 0.12794369, 0.00852958, 0.05970706,\n", " 0.05970706, 0.00852958, 0.0426479 , 0.00852958, 0.00852958,\n", " 0.00852958, 0.00852958, 0.00852958, 0.00852958, 0.00852958,\n", " 0.00852958, 0.03411832, 0.00852958, 0.00852958, 0.01705916,\n", " 0.00852958, 0.00852958, 0.02558874, 0.12794369, 0.00852958,\n", " 0.00852958, 0.00852958, 0.00852958, 0.00852958, 0.06823664,\n", " 0.00852958, 0.06823664]])" ] }, "metadata": { "tags": [] }, "execution_count": 9 } ] }, { "metadata": { "id": "g9Xy-KJ82rlu", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 9054 }, "outputId": "f1cb1ccc-093a-48f6-96ec-feab190c42f2" }, "cell_type": "code", "source": [ "index_value = {i[1]:i[0] for i in tfidf_vec.vocabulary_.items()}\n", "fully_indexed = {index_value[column]:value for row in transformed \n", " for (column,value) in enumerate(row)}\n", "fully_indexed" ], "execution_count": 10, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'a8': 0.017059159255788922,\n", " 'accelerating': 0.008529579627894461,\n", " 'accountable': 0.008529579627894461,\n", " 'achieve': 0.008529579627894461,\n", " 'achieved': 0.008529579627894461,\n", " 'achieving': 0.008529579627894461,\n", " 'actively': 0.008529579627894461,\n", " 'activities': 0.017059159255788922,\n", " 'add': 0.008529579627894461,\n", " 'added': 0.034118318511577844,\n", " 'addition': 0.025588738883683383,\n", " 'address': 0.017059159255788922,\n", " 'adopting': 0.008529579627894461,\n", " 'adoption': 0.008529579627894461,\n", " 'advanced': 0.008529579627894461,\n", " 'affected': 0.034118318511577844,\n", " 'ahead': 0.025588738883683383,\n", " 'ai': 0.034118318511577844,\n", " 'aim': 0.017059159255788922,\n", " 'aiming': 0.008529579627894461,\n", " 'aims': 0.017059159255788922,\n", " 'america': 0.042647898139472305,\n", " 'amid': 0.034118318511577844,\n", " 'amounting': 0.008529579627894461,\n", " 'announcement': 0.008529579627894461,\n", " 'anticipated': 0.008529579627894461,\n", " 'appliances': 0.05970705739526123,\n", " 'application': 0.008529579627894461,\n", " 'applications': 0.042647898139472305,\n", " 'applying': 0.017059159255788922,\n", " 'appreciation': 0.008529579627894461,\n", " 'approximately': 0.008529579627894461,\n", " 'aps': 0.008529579627894461,\n", " 'areas': 0.008529579627894461,\n", " 'asp': 0.017059159255788922,\n", " 'asps': 0.008529579627894461,\n", " 'automotive': 0.034118318511577844,\n", " 'b2b': 0.025588738883683383,\n", " 'backed': 0.008529579627894461,\n", " 'base': 0.008529579627894461,\n", " 'based': 0.034118318511577844,\n", " 'basis': 0.017059159255788922,\n", " 'billion': 0.008529579627894461,\n", " 'bixby': 0.034118318511577844,\n", " 'bolster': 0.017059159255788922,\n", " 'boost': 0.008529579627894461,\n", " 'bringing': 0.008529579627894461,\n", " 'building': 0.008529579627894461,\n", " 'builds': 0.017059159255788922,\n", " 'business': 0.27294654809262275,\n", " 'businesses': 0.042647898139472305,\n", " 'camera': 0.017059159255788922,\n", " 'cameras': 0.008529579627894461,\n", " 'capabilities': 0.008529579627894461,\n", " 'capex': 0.017059159255788922,\n", " 'capital': 0.008529579627894461,\n", " 'capitalizing': 0.008529579627894461,\n", " 'case': 0.008529579627894461,\n", " 'ce': 0.017059159255788922,\n", " 'challenges': 0.008529579627894461,\n", " 'channels': 0.008529579627894461,\n", " 'china': 0.017059159255788922,\n", " 'chips': 0.008529579627894461,\n", " 'chipsets': 0.008529579627894461,\n", " 'circumstances': 0.008529579627894461,\n", " 'cis': 0.008529579627894461,\n", " 'class': 0.017059159255788922,\n", " 'cloud': 0.025588738883683383,\n", " 'coming': 0.008529579627894461,\n", " 'commercialization': 0.008529579627894461,\n", " 'communications': 0.017059159255788922,\n", " 'company': 0.1535324333021003,\n", " 'compared': 0.008529579627894461,\n", " 'competencies': 0.008529579627894461,\n", " 'competition': 0.034118318511577844,\n", " 'competitiveness': 0.05970705739526123,\n", " 'completion': 0.008529579627894461,\n", " 'components': 0.017059159255788922,\n", " 'comprising': 0.008529579627894461,\n", " 'concentrated': 0.008529579627894461,\n", " 'conditions': 0.034118318511577844,\n", " 'connected': 0.008529579627894461,\n", " 'connectivity': 0.008529579627894461,\n", " 'consolidated': 0.042647898139472305,\n", " 'consumer': 0.025588738883683383,\n", " 'content': 0.017059159255788922,\n", " 'continue': 0.034118318511577844,\n", " 'continued': 0.017059159255788922,\n", " 'continues': 0.008529579627894461,\n", " 'continuing': 0.017059159255788922,\n", " 'contribution': 0.008529579627894461,\n", " 'core': 0.008529579627894461,\n", " 'cost': 0.042647898139472305,\n", " 'costs': 0.017059159255788922,\n", " 'cryptocurrency': 0.008529579627894461,\n", " 'cup': 0.008529579627894461,\n", " 'currencies': 0.008529579627894461,\n", " 'customer': 0.008529579627894461,\n", " 'customers': 0.042647898139472305,\n", " 'cutting': 0.017059159255788922,\n", " 'dampened': 0.008529579627894461,\n", " 'datacenter': 0.025588738883683383,\n", " 'datacenters': 0.017059159255788922,\n", " 'decline': 0.025588738883683383,\n", " 'declined': 0.025588738883683383,\n", " 'declining': 0.008529579627894461,\n", " 'decrease': 0.017059159255788922,\n", " 'decreased': 0.05970705739526123,\n", " 'demand': 0.2814761277205172,\n", " 'demands': 0.008529579627894461,\n", " 'density': 0.06823663702315569,\n", " 'despite': 0.034118318511577844,\n", " 'device': 0.008529579627894461,\n", " 'devices': 0.017059159255788922,\n", " 'differentiate': 0.017059159255788922,\n", " 'differentiated': 0.017059159255788922,\n", " 'differentiating': 0.008529579627894461,\n", " 'differentiation': 0.008529579627894461,\n", " 'digital': 0.034118318511577844,\n", " 'digits': 0.008529579627894461,\n", " 'display': 0.06823663702315569,\n", " 'displays': 0.017059159255788922,\n", " 'distribution': 0.008529579627894461,\n", " 'division': 0.05970705739526123,\n", " 'dollar': 0.008529579627894461,\n", " 'door': 0.008529579627894461,\n", " 'dot': 0.008529579627894461,\n", " 'double': 0.008529579627894461,\n", " 'dram': 0.051177477767366766,\n", " 'drive': 0.008529579627894461,\n", " 'driven': 0.017059159255788922,\n", " 'dual': 0.025588738883683383,\n", " 'duo': 0.008529579627894461,\n", " 'earlier': 0.008529579627894461,\n", " 'early': 0.017059159255788922,\n", " 'earnings': 0.18765075181367816,\n", " 'east': 0.008529579627894461,\n", " 'economic': 0.008529579627894461,\n", " 'ecosystem': 0.008529579627894461,\n", " 'edge': 0.017059159255788922,\n", " 'effect': 0.008529579627894461,\n", " 'efforts': 0.025588738883683383,\n", " 'electronics': 0.051177477767366766,\n", " 'employees': 0.008529579627894461,\n", " 'end': 0.10235495553473353,\n", " 'engines': 0.008529579627894461,\n", " 'enhance': 0.017059159255788922,\n", " 'enhancing': 0.017059159255788922,\n", " 'enjoy': 0.008529579627894461,\n", " 'europe': 0.017059159255788922,\n", " 'events': 0.025588738883683383,\n", " 'executed': 0.008529579627894461,\n", " 'expand': 0.08529579627894461,\n", " 'expanding': 0.07676621665105016,\n", " 'expansion': 0.034118318511577844,\n", " 'expect': 0.008529579627894461,\n", " 'expected': 0.14500285367420584,\n", " 'expects': 0.051177477767366766,\n", " 'expenditure': 0.008529579627894461,\n", " 'experiences': 0.008529579627894461,\n", " 'experiential': 0.008529579627894461,\n", " 'face': 0.008529579627894461,\n", " 'family': 0.008529579627894461,\n", " 'fast': 0.008529579627894461,\n", " 'favorable': 0.008529579627894461,\n", " 'features': 0.008529579627894461,\n", " 'fifa': 0.008529579627894461,\n", " 'finalized': 0.008529579627894461,\n", " 'flagship': 0.08529579627894461,\n", " 'flex': 0.008529579627894461,\n", " 'flexible': 0.034118318511577844,\n", " 'flexibly': 0.008529579627894461,\n", " 'flexwash': 0.008529579627894461,\n", " 'focus': 0.09382537590683908,\n", " 'foldable': 0.025588738883683383,\n", " 'following': 0.008529579627894461,\n", " 'forecast': 0.025588738883683383,\n", " 'forecasts': 0.017059159255788922,\n", " 'foresees': 0.008529579627894461,\n", " 'forward': 0.008529579627894461,\n", " 'foundation': 0.008529579627894461,\n", " 'foundry': 0.034118318511577844,\n", " 'fourth': 0.05970705739526123,\n", " 'frs': 0.017059159255788922,\n", " 'gains': 0.017059159255788922,\n", " 'galaxy': 0.042647898139472305,\n", " 'games': 0.008529579627894461,\n", " 'gb': 0.008529579627894461,\n", " 'gears': 0.008529579627894461,\n", " 'generation': 0.025588738883683383,\n", " 'global': 0.025588738883683383,\n", " 'grew': 0.025588738883683383,\n", " 'grow': 0.042647898139472305,\n", " 'growing': 0.017059159255788922,\n", " 'growth': 0.10235495553473353,\n", " 'half': 0.042647898139472305,\n", " 'hardware': 0.008529579627894461,\n", " 'high': 0.13647327404631138,\n", " 'higher': 0.017059159255788922,\n", " 'hike': 0.008529579627894461,\n", " 'holiday': 0.008529579627894461,\n", " 'home': 0.025588738883683383,\n", " 'hub': 0.008529579627894461,\n", " 'im': 0.008529579627894461,\n", " 'image': 0.034118318511577844,\n", " 'impact': 0.008529579627894461,\n", " 'impacted': 0.008529579627894461,\n", " 'improve': 0.051177477767366766,\n", " 'improved': 0.017059159255788922,\n", " 'improvement': 0.008529579627894461,\n", " 'incentive': 0.008529579627894461,\n", " 'including': 0.07676621665105016,\n", " 'increase': 0.06823663702315569,\n", " 'increased': 0.07676621665105016,\n", " 'increasing': 0.05970705739526123,\n", " 'indicated': 0.008529579627894461,\n", " 'industry': 0.017059159255788922,\n", " 'intensified': 0.025588738883683383,\n", " 'intensifying': 0.008529579627894461,\n", " 'international': 0.008529579627894461,\n", " 'investment': 0.008529579627894461,\n", " 'investments': 0.034118318511577844,\n", " 'iot': 0.025588738883683383,\n", " 'january': 0.008529579627894461,\n", " 'japan': 0.017059159255788922,\n", " 'korea': 0.017059159255788922,\n", " 'korean': 0.008529579627894461,\n", " 'krw': 0.13647327404631138,\n", " 'large': 0.05970705739526123,\n", " 'largest': 0.008529579627894461,\n", " 'launch': 0.017059159255788922,\n", " 'launched': 0.017059159255788922,\n", " 'launches': 0.008529579627894461,\n", " 'layer': 0.034118318511577844,\n", " 'lcd': 0.10235495553473353,\n", " 'lead': 0.008529579627894461,\n", " 'leadership': 0.042647898139472305,\n", " 'led': 0.034118318511577844,\n", " 'likely': 0.051177477767366766,\n", " 'limited': 0.008529579627894461,\n", " 'line': 0.008529579627894461,\n", " 'lines': 0.008529579627894461,\n", " 'lineup': 0.042647898139472305,\n", " 'long': 0.008529579627894461,\n", " 'look': 0.008529579627894461,\n", " 'looking': 0.051177477767366766,\n", " 'low': 0.05970705739526123,\n", " 'lower': 0.025588738883683383,\n", " 'lpddr4x': 0.008529579627894461,\n", " 'lsi': 0.025588738883683383,\n", " 'lte': 0.025588738883683383,\n", " 'ltps': 0.025588738883683383,\n", " 'machine': 0.008529579627894461,\n", " 'machines': 0.017059159255788922,\n", " 'mainly': 0.042647898139472305,\n", " 'mainstream': 0.017059159255788922,\n", " 'maintaining': 0.008529579627894461,\n", " 'maintains': 0.008529579627894461,\n", " 'major': 0.06823663702315569,\n", " 'make': 0.008529579627894461,\n", " 'managing': 0.008529579627894461,\n", " 'manufactures': 0.017059159255788922,\n", " 'manufacturing': 0.008529579627894461,\n", " 'market': 0.14500285367420584,\n", " 'marketing': 0.042647898139472305,\n", " 'markets': 0.025588738883683383,\n", " 'mass': 0.025588738883683383,\n", " 'material': 0.008529579627894461,\n", " 'measures': 0.008529579627894461,\n", " 'meet': 0.008529579627894461,\n", " 'meeting': 0.017059159255788922,\n", " 'memory': 0.05970705739526123,\n", " 'micro': 0.008529579627894461,\n", " 'mid': 0.034118318511577844,\n", " 'middle': 0.008529579627894461,\n", " 'migration': 0.025588738883683383,\n", " 'mining': 0.008529579627894461,\n", " 'mix': 0.017059159255788922,\n", " 'mobile': 0.16206201292999475,\n", " 'model': 0.008529579627894461,\n", " 'models': 0.042647898139472305,\n", " 'moderate': 0.008529579627894461,\n", " 'month': 0.008529579627894461,\n", " 'months': 0.008529579627894461,\n", " 'nand': 0.05970705739526123,\n", " 'nano': 0.017059159255788922,\n", " 'nd': 0.008529579627894461,\n", " 'needs': 0.017059159255788922,\n", " 'network': 0.025588738883683383,\n", " 'networks': 0.017059159255788922,\n", " 'new': 0.1535324333021003,\n", " 'newly': 0.017059159255788922,\n", " 'nm': 0.042647898139472305,\n", " 'north': 0.042647898139472305,\n", " 'note': 0.017059159255788922,\n", " 'numbers': 0.008529579627894461,\n", " 'offering': 0.008529579627894461,\n", " 'offerings': 0.008529579627894461,\n", " 'offset': 0.008529579627894461,\n", " 'oled': 0.09382537590683908,\n", " 'olympics': 0.008529579627894461,\n", " 'ones': 0.008529579627894461,\n", " 'online': 0.017059159255788922,\n", " 'operating': 0.06823663702315569,\n", " 'opportunities': 0.008529579627894461,\n", " 'optimization': 0.008529579627894461,\n", " 'optimizing': 0.008529579627894461,\n", " 'order': 0.017059159255788922,\n", " 'orders': 0.017059159255788922,\n", " 'outlook': 0.017059159255788922,\n", " 'oven': 0.008529579627894461,\n", " 'ovens': 0.008529579627894461,\n", " 'overall': 0.025588738883683383,\n", " 'overseas': 0.008529579627894461,\n", " 'packaging': 0.008529579627894461,\n", " 'paid': 0.008529579627894461,\n", " 'panel': 0.07676621665105016,\n", " 'panels': 0.06823663702315569,\n", " 'particularly': 0.008529579627894461,\n", " 'partners': 0.008529579627894461,\n", " 'partnerships': 0.017059159255788922,\n", " 'peak': 0.017059159255788922,\n", " 'performance': 0.017059159255788922,\n", " 'phones': 0.008529579627894461,\n", " 'pick': 0.008529579627894461,\n", " 'plan': 0.008529579627894461,\n", " 'plans': 0.017059159255788922,\n", " 'plant': 0.017059159255788922,\n", " 'platforms': 0.008529579627894461,\n", " 'portfolio': 0.008529579627894461,\n", " 'portion': 0.008529579627894461,\n", " 'position': 0.008529579627894461,\n", " 'post': 0.008529579627894461,\n", " 'posted': 0.06823663702315569,\n", " 'posts': 0.008529579627894461,\n", " 'power': 0.008529579627894461,\n", " 'preliminary': 0.008529579627894461,\n", " 'premium': 0.14500285367420584,\n", " 'previous': 0.017059159255788922,\n", " 'prices': 0.008529579627894461,\n", " 'process': 0.034118318511577844,\n", " 'processes': 0.017059159255788922,\n", " 'processor': 0.008529579627894461,\n", " 'processors': 0.025588738883683383,\n", " 'product': 0.08529579627894461,\n", " 'production': 0.051177477767366766,\n", " 'productivity': 0.025588738883683383,\n", " 'products': 0.221769070325256,\n", " 'profit': 0.06823663702315569,\n", " 'profitability': 0.051177477767366766,\n", " 'profitable': 0.008529579627894461,\n", " 'profits': 0.008529579627894461,\n", " 'programs': 0.008529579627894461,\n", " 'promptly': 0.008529579627894461,\n", " 'provide': 0.017059159255788922,\n", " 'providers': 0.008529579627894461,\n", " 'pursue': 0.008529579627894461,\n", " 'pushing': 0.008529579627894461,\n", " 'pyeongtaek': 0.017059159255788922,\n", " 'qled': 0.017059159255788922,\n", " 'qoq': 0.07676621665105016,\n", " 'qualitative': 0.008529579627894461,\n", " 'quantum': 0.008529579627894461,\n", " 'quarter': 0.1961803314415726,\n", " 'quarterly': 0.008529579627894461,\n", " 'quickdrive': 0.008529579627894461,\n", " 'ramp': 0.025588738883683383,\n", " 'range': 0.017059159255788922,\n", " 'reached': 0.008529579627894461,\n", " 'readout': 0.008529579627894461,\n", " 'ready': 0.008529579627894461,\n", " 'recoveries': 0.008529579627894461,\n", " 'reduction': 0.008529579627894461,\n", " 'refrigerator': 0.008529579627894461,\n", " 'reinforce': 0.034118318511577844,\n", " 'related': 0.017059159255788922,\n", " 'release': 0.025588738883683383,\n", " 'remain': 0.025588738883683383,\n", " 'remained': 0.017059159255788922,\n", " 'replacement': 0.008529579627894461,\n", " 'reported': 0.008529579627894461,\n", " 'required': 0.008529579627894461,\n", " 'requirements': 0.008529579627894461,\n", " 'resolution': 0.017059159255788922,\n", " 'responding': 0.008529579627894461,\n", " 'response': 0.008529579627894461,\n", " 'responsible': 0.008529579627894461,\n", " 'resulting': 0.008529579627894461,\n", " 'revenue': 0.07676621665105016,\n", " 'rise': 0.025588738883683383,\n", " 'rising': 0.008529579627894461,\n", " 'risk': 0.017059159255788922,\n", " 's3': 0.008529579627894461,\n", " 's4': 0.008529579627894461,\n", " 's9': 0.008529579627894461,\n", " 'said': 0.008529579627894461,\n", " 'sales': 0.1791211721857837,\n", " 'samsung': 0.24735780920893938,\n", " 'saw': 0.025588738883683383,\n", " 'scaling': 0.008529579627894461,\n", " 'screen': 0.017059159255788922,\n", " 'screens': 0.017059159255788922,\n", " 'season': 0.025588738883683383,\n", " 'seasonal': 0.008529579627894461,\n", " 'seasonality': 0.13647327404631138,\n", " 'seasonally': 0.008529579627894461,\n", " 'second': 0.017059159255788922,\n", " 'secure': 0.008529579627894461,\n", " 'secured': 0.008529579627894461,\n", " 'seek': 0.008529579627894461,\n", " 'seeking': 0.008529579627894461,\n", " 'sees': 0.008529579627894461,\n", " 'segment': 0.034118318511577844,\n", " 'sell': 0.008529579627894461,\n", " 'semiconductor': 0.034118318511577844,\n", " 'semiconductors': 0.008529579627894461,\n", " 'sensors': 0.042647898139472305,\n", " 'server': 0.051177477767366766,\n", " 'servers': 0.025588738883683383,\n", " 'services': 0.017059159255788922,\n", " 'set': 0.017059159255788922,\n", " 'shipments': 0.05970705739526123,\n", " 'significantly': 0.008529579627894461,\n", " 'size': 0.025588738883683383,\n", " 'sized': 0.017059159255788922,\n", " 'slightly': 0.008529579627894461,\n", " 'slow': 0.017059159255788922,\n", " 'slowed': 0.008529579627894461,\n", " 'smartphone': 0.06823663702315569,\n", " 'smartphones': 0.09382537590683908,\n", " 'smartthings': 0.008529579627894461,\n", " 'software': 0.008529579627894461,\n", " 'solid': 0.051177477767366766,\n", " 'solidify': 0.008529579627894461,\n", " 'solutions': 0.034118318511577844,\n", " 'specifically': 0.008529579627894461,\n", " 'spite': 0.008529579627894461,\n", " 'sporting': 0.025588738883683383,\n", " 'ssd': 0.025588738883683383,\n", " 'stable': 0.025588738883683383,\n", " 'stack': 0.017059159255788922,\n", " 'start': 0.008529579627894461,\n", " 'stations': 0.008529579627894461,\n", " 'steady': 0.008529579627894461,\n", " 'storage': 0.008529579627894461,\n", " 'stores': 0.008529579627894461,\n", " 'strategic': 0.017059159255788922,\n", " 'strategy': 0.008529579627894461,\n", " 'strengthen': 0.042647898139472305,\n", " 'strengthened': 0.008529579627894461,\n", " 'strengthening': 0.05970705739526123,\n", " 'strive': 0.008529579627894461,\n", " 'strong': 0.1279436944184169,\n", " 'stronger': 0.008529579627894461,\n", " 'supply': 0.034118318511577844,\n", " 'supplying': 0.008529579627894461,\n", " 'tablets': 0.008529579627894461,\n", " 'technological': 0.008529579627894461,\n", " 'technologies': 0.017059159255788922,\n", " 'technology': 0.05970705739526123,\n", " 'term': 0.008529579627894461,\n", " 'thanks': 0.05970705739526123,\n", " 'total': 0.051177477767366766,\n", " 'traditionally': 0.008529579627894461,\n", " 'transition': 0.008529579627894461,\n", " 'trend': 0.008529579627894461,\n", " 'trends': 0.008529579627894461,\n", " 'trillion': 0.1279436944184169,\n", " 'try': 0.008529579627894461,\n", " 'tv': 0.05970705739526123,\n", " 'tvs': 0.05970705739526123,\n", " 'uhd': 0.008529579627894461,\n", " 'ultra': 0.042647898139472305,\n", " 'uncertainties': 0.008529579627894461,\n", " 'upcoming': 0.008529579627894461,\n", " 'upgrades': 0.008529579627894461,\n", " 'ups': 0.008529579627894461,\n", " 'usability': 0.008529579627894461,\n", " 'usage': 0.008529579627894461,\n", " 'used': 0.008529579627894461,\n", " 'utilization': 0.008529579627894461,\n", " 'value': 0.034118318511577844,\n", " 'valued': 0.008529579627894461,\n", " 'various': 0.008529579627894461,\n", " 'vendors': 0.017059159255788922,\n", " 'visual': 0.008529579627894461,\n", " 'vr': 0.008529579627894461,\n", " 'washing': 0.025588738883683383,\n", " 'weak': 0.1279436944184169,\n", " 'weighed': 0.008529579627894461,\n", " 'winter': 0.008529579627894461,\n", " 'won': 0.008529579627894461,\n", " 'world': 0.008529579627894461,\n", " 'xnm': 0.008529579627894461,\n", " 'year': 0.06823663702315569,\n", " 'yield': 0.008529579627894461,\n", " 'yoy': 0.06823663702315569}" ] }, "metadata": { "tags": [] }, "execution_count": 10 } ] }, { "metadata": { "id": "uESNNyRq3XnA", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "" ], "execution_count": 0, "outputs": [] } ] }