{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "accelerator": "GPU", "colab": { "name": "word_embeddings.ipynb", "provenance": [], "collapsed_sections": [], "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "code", "metadata": { "id": "o2-QgXTj3mw_", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "d5c97527-91ca-4d73-b949-7c4201e4f33f" }, "source": [ "try:\n", " # শুধুমাত্র টেন্সর-ফ্লো ২.x ব্যবহার করবো \n", " %tensorflow_version 2.x\n", "except Exception:\n", " pass\n", "\n", "import tensorflow as tf\n", "from tensorflow import keras\n", "from tensorflow.keras import layers" ], "execution_count": 1, "outputs": [ { "output_type": "stream", "text": [ "TensorFlow 2.x selected.\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "HEaBBaCg4iaM", "colab_type": "code", "colab": {} }, "source": [ "embedding_layer = layers.Embedding(1000, 5)" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "colab_type": "code", "id": "0YUjPgP7w0PO", "colab": { "base_uri": "https://localhost:8080/", "height": 85 }, "outputId": "4245ebcc-d86c-4658-f34a-a6bf9055cfcb" }, "source": [ "result = embedding_layer(tf.constant([1,2,3]))\n", "result.numpy()" ], "execution_count": 3, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([[-0.03846053, -0.00674887, -0.01975296, -0.02325729, 0.04202225],\n", " [ 0.02986335, 0.01062097, -0.03452694, -0.0296132 , 0.00191165],\n", " [ 0.02632796, -0.04222443, -0.03406482, -0.048172 , -0.01869417]],\n", " dtype=float32)" ] }, "metadata": { "tags": [] }, "execution_count": 3 } ] }, { "cell_type": "code", "metadata": { "id": "DBKDbyfvy2Pm", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 129 }, "outputId": "cffbcec0-6c57-4a95-df61-721e9abed070" }, "source": [ "result = embedding_layer(tf.constant([১,২,৩]))\n", "result.numpy()" ], "execution_count": 4, "outputs": [ { "output_type": "error", "ename": "SyntaxError", "evalue": "ignored", "traceback": [ "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m result = embedding_layer(tf.constant([১,২,৩]))\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid character in identifier\n" ] } ] }, { "cell_type": "code", "metadata": { "id": "Gl97oboX0ydP", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 289 }, "outputId": "cc8ac26c-7b4c-4bcb-d6b9-07954a4671bd" }, "source": [ "!pip install spacy" ], "execution_count": 5, "outputs": [ { "output_type": "stream", "text": [ "Requirement already satisfied: spacy in /usr/local/lib/python3.6/dist-packages (2.1.8)\n", "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.6/dist-packages (from spacy) (1.0.2)\n", "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.6/dist-packages (from spacy) (2.21.0)\n", "Requirement already satisfied: thinc<7.1.0,>=7.0.8 in /usr/local/lib/python3.6/dist-packages (from spacy) (7.0.8)\n", "Requirement already satisfied: srsly<1.1.0,>=0.0.6 in /usr/local/lib/python3.6/dist-packages (from spacy) (0.1.0)\n", "Requirement already satisfied: plac<1.0.0,>=0.9.6 in /usr/local/lib/python3.6/dist-packages (from spacy) (0.9.6)\n", "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from spacy) (2.0.2)\n", "Requirement already satisfied: wasabi<1.1.0,>=0.2.0 in /usr/local/lib/python3.6/dist-packages (from spacy) (0.2.2)\n", "Requirement already satisfied: preshed<2.1.0,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from spacy) (2.0.1)\n", "Requirement already satisfied: blis<0.3.0,>=0.2.2 in /usr/local/lib/python3.6/dist-packages (from spacy) (0.2.4)\n", "Requirement already satisfied: numpy>=1.15.0 in /tensorflow-2.0.0/python3.6 (from spacy) (1.17.3)\n", "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (3.0.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (2019.9.11)\n", "Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (1.24.3)\n", "Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (2.8)\n", "Requirement already satisfied: tqdm<5.0.0,>=4.10.0 in /usr/local/lib/python3.6/dist-packages (from thinc<7.1.0,>=7.0.8->spacy) (4.28.1)\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "C_u-D5y41dxW", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 68 }, "outputId": "e2bf5ea2-cdf4-4245-94b6-5b1a415cd59d" }, "source": [ "!python -m spacy download bn_core_news_sm" ], "execution_count": 6, "outputs": [ { "output_type": "stream", "text": [ "\n", "\u001b[38;5;1m✘ No compatible model found for 'bn_core_news_sm' (spaCy v2.1.8).\u001b[0m\n", "\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "PV-iFPP82axg", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 479 }, "outputId": "b5d84b00-082a-455b-b37b-d17d97ee007b" }, "source": [ "pip install https://github.com/banglakit/spacy-models/releases/download/bn_core_news_sm-2.0.0/bn_core_news_sm-2.0.0.tar.gz" ], "execution_count": 7, "outputs": [ { "output_type": "stream", "text": [ "Collecting https://github.com/banglakit/spacy-models/releases/download/bn_core_news_sm-2.0.0/bn_core_news_sm-2.0.0.tar.gz\n", "\u001b[?25l Downloading https://github.com/banglakit/spacy-models/releases/download/bn_core_news_sm-2.0.0/bn_core_news_sm-2.0.0.tar.gz (40.5MB)\n", "\u001b[K |████████████████████████████████| 40.5MB 223kB/s \n", "\u001b[?25hRequirement already satisfied: spacy>=>=2.0.12 in /usr/local/lib/python3.6/dist-packages (from bn-core-news-sm==2.0.0) (2.1.8)\n", "Requirement already satisfied: wasabi<1.1.0,>=0.2.0 in /usr/local/lib/python3.6/dist-packages (from spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (0.2.2)\n", "Requirement already satisfied: thinc<7.1.0,>=7.0.8 in /usr/local/lib/python3.6/dist-packages (from spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (7.0.8)\n", "Requirement already satisfied: blis<0.3.0,>=0.2.2 in /usr/local/lib/python3.6/dist-packages (from spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (0.2.4)\n", "Requirement already satisfied: numpy>=1.15.0 in /tensorflow-2.0.0/python3.6 (from spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (1.17.3)\n", "Requirement already satisfied: plac<1.0.0,>=0.9.6 in /usr/local/lib/python3.6/dist-packages (from spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (0.9.6)\n", "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (2.0.2)\n", "Requirement already satisfied: preshed<2.1.0,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (2.0.1)\n", "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.6/dist-packages (from spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (2.21.0)\n", "Requirement already satisfied: srsly<1.1.0,>=0.0.6 in /usr/local/lib/python3.6/dist-packages (from spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (0.1.0)\n", "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.6/dist-packages (from spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (1.0.2)\n", "Requirement already satisfied: tqdm<5.0.0,>=4.10.0 in /usr/local/lib/python3.6/dist-packages (from thinc<7.1.0,>=7.0.8->spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (4.28.1)\n", "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (3.0.4)\n", "Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (2.8)\n", "Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (1.24.3)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (2019.9.11)\n", "Building wheels for collected packages: bn-core-news-sm\n", " Building wheel for bn-core-news-sm (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for bn-core-news-sm: filename=bn_core_news_sm-2.0.0-cp36-none-any.whl size=41254297 sha256=99fcea737cd7f45d9aeed8e4be6bb94237c779869ef398c0da06d342c5e32384\n", " Stored in directory: /root/.cache/pip/wheels/92/ef/60/567e70071236fd8535ae8aa080933500dc0a0455bdc0287e4d\n", "Successfully built bn-core-news-sm\n", "Installing collected packages: bn-core-news-sm\n", "Successfully installed bn-core-news-sm-2.0.0\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "W-XdY2eG0dgZ", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 377 }, "outputId": "c4b81ec4-2c61-447e-8ce6-9e0faf8249a3" }, "source": [ "import spacy\n", "import bn_core_news_sm\n", "\n", "nlp = bn_core_news_sm.load()\n", "doc = nlp(u'আমি বাংলায় গান গাই। তুমি কি গাও?')" ], "execution_count": 14, "outputs": [ { "output_type": "error", "ename": "KeyError", "evalue": "ignored", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mbn_core_news_sm\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mnlp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbn_core_news_sm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0mdoc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mu'আমি বাংলায় গান গাই। তুমি কি গাও?'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/bn_core_news_sm/__init__.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(**overrides)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0moverrides\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 49\u001b[0;31m \u001b[0mlang\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_model_from_init_py\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m__file__\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0moverrides\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 50\u001b[0m \u001b[0mlang\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfactories\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sbd'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mSentenceSegmenter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnlp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvocab\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0mlang\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfactories\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sentencizer'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mSentenceSegmenter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnlp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvocab\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/spacy/util.py\u001b[0m in \u001b[0;36mload_model_from_init_py\u001b[0;34m(init_file, **overrides)\u001b[0m\n\u001b[1;32m 194\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mmodel_path\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 195\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mIOError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mErrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mE052\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpath2str\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 196\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mload_model_from_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmeta\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0moverrides\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 197\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 198\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/spacy/util.py\u001b[0m in \u001b[0;36mload_model_from_path\u001b[0;34m(model_path, meta, **overrides)\u001b[0m\n\u001b[1;32m 175\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdisable\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 176\u001b[0m \u001b[0mconfig\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmeta\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"pipeline_args\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 177\u001b[0;31m \u001b[0mcomponent\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcreate_pipe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconfig\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 178\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_pipe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcomponent\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 179\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_disk\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/spacy/language.py\u001b[0m in \u001b[0;36mcreate_pipe\u001b[0;34m(self, name, config)\u001b[0m\n\u001b[1;32m 261\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfactories\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 262\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"sbd\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 263\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mErrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mE108\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 264\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mErrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mE002\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyError\u001b[0m: \"[E108] As of spaCy v2.1, the pipe name `sbd` has been deprecated in favor of the pipe name `sentencizer`, which does the same thing. For example, use `nlp.create_pipeline('sentencizer')`\"" ] } ] }, { "cell_type": "code", "metadata": { "id": "2w43jPG496qk", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 394 }, "outputId": "cc74f8df-50ae-47bb-e061-c61dc3f9f515" }, "source": [ "import spacy\n", "import bn_core_news_sm\n", "\n", "nlp = bn_core_news_sm.load()\n", "doc = nlp(u'আমি বাংলায় গান গাই। তুমি কি গাও?')\n", "\n", "# Create list of word tokens\n", "token_list = []\n", "for token in doc:\n", " token_list.append(token.text)\n", "print(token_list)" ], "execution_count": 11, "outputs": [ { "output_type": "error", "ename": "KeyError", "evalue": "ignored", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mbn_core_news_sm\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mnlp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbn_core_news_sm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0mdoc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mu'আমি বাংলায় গান গাই। তুমি কি গাও?'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/bn_core_news_sm/__init__.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(**overrides)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0moverrides\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 49\u001b[0;31m \u001b[0mlang\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_model_from_init_py\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m__file__\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0moverrides\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 50\u001b[0m \u001b[0mlang\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfactories\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sbd'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mSentenceSegmenter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnlp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvocab\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0mlang\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfactories\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sentencizer'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mSentenceSegmenter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnlp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvocab\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/spacy/util.py\u001b[0m in \u001b[0;36mload_model_from_init_py\u001b[0;34m(init_file, **overrides)\u001b[0m\n\u001b[1;32m 194\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mmodel_path\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 195\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mIOError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mErrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mE052\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpath2str\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 196\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mload_model_from_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmeta\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0moverrides\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 197\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 198\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/spacy/util.py\u001b[0m in \u001b[0;36mload_model_from_path\u001b[0;34m(model_path, meta, **overrides)\u001b[0m\n\u001b[1;32m 175\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdisable\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 176\u001b[0m \u001b[0mconfig\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmeta\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"pipeline_args\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 177\u001b[0;31m \u001b[0mcomponent\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcreate_pipe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconfig\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 178\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_pipe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcomponent\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 179\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_disk\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/spacy/language.py\u001b[0m in \u001b[0;36mcreate_pipe\u001b[0;34m(self, name, config)\u001b[0m\n\u001b[1;32m 261\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfactories\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 262\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"sbd\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 263\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mErrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mE108\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 264\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mErrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mE002\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyError\u001b[0m: \"[E108] As of spaCy v2.1, the pipe name `sbd` has been deprecated in favor of the pipe name `sentencizer`, which does the same thing. For example, use `nlp.create_pipeline('sentencizer')`\"" ] } ] }, { "cell_type": "code", "metadata": { "id": "SFDpZXCr_yWz", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 374 }, "outputId": "f262f437-267f-4476-83b7-13735de6388c" }, "source": [ "import spacy\n", "\n", "nlp = spacy.load(\"bn_core_news_sm\")\n", "doc = nlp(u'আমি বাংলায় গান গাই। তুমি কি গাও?')\n", "for token in doc:\n", " print(token.text, token.pos_, token.dep_)" ], "execution_count": 13, "outputs": [ { "output_type": "error", "ename": "OSError", "evalue": "ignored", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mspacy\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mnlp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mspacy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"bn_core_news_sm\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0mdoc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mu'আমি বাংলায় গান গাই। তুমি কি গাও?'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mtoken\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdoc\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/spacy/__init__.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(name, **overrides)\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdepr_path\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0mdeprecation_warning\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mWarnings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mW001\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdepr_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 27\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mutil\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0moverrides\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 28\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/spacy/util.py\u001b[0m in \u001b[0;36mload_model\u001b[0;34m(name, **overrides)\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"exists\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# Path or Path-like to model data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mload_model_from_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0moverrides\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 139\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mIOError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mErrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mE050\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 140\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 141\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mOSError\u001b[0m: [E050] Can't find model 'bn_core_news_sm'. It doesn't seem to be a shortcut link, a Python package or a valid path to a data directory." ] } ] } ] }