{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"accelerator": "GPU",
"colab": {
"name": "word_embeddings.ipynb",
"provenance": [],
"collapsed_sections": [],
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
""
]
},
{
"cell_type": "code",
"metadata": {
"id": "o2-QgXTj3mw_",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "d5c97527-91ca-4d73-b949-7c4201e4f33f"
},
"source": [
"try:\n",
" # শুধুমাত্র টেন্সর-ফ্লো ২.x ব্যবহার করবো \n",
" %tensorflow_version 2.x\n",
"except Exception:\n",
" pass\n",
"\n",
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"from tensorflow.keras import layers"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
"TensorFlow 2.x selected.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "HEaBBaCg4iaM",
"colab_type": "code",
"colab": {}
},
"source": [
"embedding_layer = layers.Embedding(1000, 5)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab_type": "code",
"id": "0YUjPgP7w0PO",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 85
},
"outputId": "4245ebcc-d86c-4658-f34a-a6bf9055cfcb"
},
"source": [
"result = embedding_layer(tf.constant([1,2,3]))\n",
"result.numpy()"
],
"execution_count": 3,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([[-0.03846053, -0.00674887, -0.01975296, -0.02325729, 0.04202225],\n",
" [ 0.02986335, 0.01062097, -0.03452694, -0.0296132 , 0.00191165],\n",
" [ 0.02632796, -0.04222443, -0.03406482, -0.048172 , -0.01869417]],\n",
" dtype=float32)"
]
},
"metadata": {
"tags": []
},
"execution_count": 3
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "DBKDbyfvy2Pm",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 129
},
"outputId": "cffbcec0-6c57-4a95-df61-721e9abed070"
},
"source": [
"result = embedding_layer(tf.constant([১,২,৩]))\n",
"result.numpy()"
],
"execution_count": 4,
"outputs": [
{
"output_type": "error",
"ename": "SyntaxError",
"evalue": "ignored",
"traceback": [
"\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m result = embedding_layer(tf.constant([১,২,৩]))\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid character in identifier\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Gl97oboX0ydP",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 289
},
"outputId": "cc8ac26c-7b4c-4bcb-d6b9-07954a4671bd"
},
"source": [
"!pip install spacy"
],
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"text": [
"Requirement already satisfied: spacy in /usr/local/lib/python3.6/dist-packages (2.1.8)\n",
"Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.6/dist-packages (from spacy) (1.0.2)\n",
"Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.6/dist-packages (from spacy) (2.21.0)\n",
"Requirement already satisfied: thinc<7.1.0,>=7.0.8 in /usr/local/lib/python3.6/dist-packages (from spacy) (7.0.8)\n",
"Requirement already satisfied: srsly<1.1.0,>=0.0.6 in /usr/local/lib/python3.6/dist-packages (from spacy) (0.1.0)\n",
"Requirement already satisfied: plac<1.0.0,>=0.9.6 in /usr/local/lib/python3.6/dist-packages (from spacy) (0.9.6)\n",
"Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from spacy) (2.0.2)\n",
"Requirement already satisfied: wasabi<1.1.0,>=0.2.0 in /usr/local/lib/python3.6/dist-packages (from spacy) (0.2.2)\n",
"Requirement already satisfied: preshed<2.1.0,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from spacy) (2.0.1)\n",
"Requirement already satisfied: blis<0.3.0,>=0.2.2 in /usr/local/lib/python3.6/dist-packages (from spacy) (0.2.4)\n",
"Requirement already satisfied: numpy>=1.15.0 in /tensorflow-2.0.0/python3.6 (from spacy) (1.17.3)\n",
"Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (3.0.4)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (2019.9.11)\n",
"Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (1.24.3)\n",
"Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (2.8)\n",
"Requirement already satisfied: tqdm<5.0.0,>=4.10.0 in /usr/local/lib/python3.6/dist-packages (from thinc<7.1.0,>=7.0.8->spacy) (4.28.1)\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "C_u-D5y41dxW",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 68
},
"outputId": "e2bf5ea2-cdf4-4245-94b6-5b1a415cd59d"
},
"source": [
"!python -m spacy download bn_core_news_sm"
],
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"text": [
"\n",
"\u001b[38;5;1m✘ No compatible model found for 'bn_core_news_sm' (spaCy v2.1.8).\u001b[0m\n",
"\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "PV-iFPP82axg",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 479
},
"outputId": "b5d84b00-082a-455b-b37b-d17d97ee007b"
},
"source": [
"pip install https://github.com/banglakit/spacy-models/releases/download/bn_core_news_sm-2.0.0/bn_core_news_sm-2.0.0.tar.gz"
],
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"text": [
"Collecting https://github.com/banglakit/spacy-models/releases/download/bn_core_news_sm-2.0.0/bn_core_news_sm-2.0.0.tar.gz\n",
"\u001b[?25l Downloading https://github.com/banglakit/spacy-models/releases/download/bn_core_news_sm-2.0.0/bn_core_news_sm-2.0.0.tar.gz (40.5MB)\n",
"\u001b[K |████████████████████████████████| 40.5MB 223kB/s \n",
"\u001b[?25hRequirement already satisfied: spacy>=>=2.0.12 in /usr/local/lib/python3.6/dist-packages (from bn-core-news-sm==2.0.0) (2.1.8)\n",
"Requirement already satisfied: wasabi<1.1.0,>=0.2.0 in /usr/local/lib/python3.6/dist-packages (from spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (0.2.2)\n",
"Requirement already satisfied: thinc<7.1.0,>=7.0.8 in /usr/local/lib/python3.6/dist-packages (from spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (7.0.8)\n",
"Requirement already satisfied: blis<0.3.0,>=0.2.2 in /usr/local/lib/python3.6/dist-packages (from spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (0.2.4)\n",
"Requirement already satisfied: numpy>=1.15.0 in /tensorflow-2.0.0/python3.6 (from spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (1.17.3)\n",
"Requirement already satisfied: plac<1.0.0,>=0.9.6 in /usr/local/lib/python3.6/dist-packages (from spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (0.9.6)\n",
"Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (2.0.2)\n",
"Requirement already satisfied: preshed<2.1.0,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (2.0.1)\n",
"Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.6/dist-packages (from spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (2.21.0)\n",
"Requirement already satisfied: srsly<1.1.0,>=0.0.6 in /usr/local/lib/python3.6/dist-packages (from spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (0.1.0)\n",
"Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.6/dist-packages (from spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (1.0.2)\n",
"Requirement already satisfied: tqdm<5.0.0,>=4.10.0 in /usr/local/lib/python3.6/dist-packages (from thinc<7.1.0,>=7.0.8->spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (4.28.1)\n",
"Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (3.0.4)\n",
"Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (2.8)\n",
"Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (1.24.3)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=>=2.0.12->bn-core-news-sm==2.0.0) (2019.9.11)\n",
"Building wheels for collected packages: bn-core-news-sm\n",
" Building wheel for bn-core-news-sm (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for bn-core-news-sm: filename=bn_core_news_sm-2.0.0-cp36-none-any.whl size=41254297 sha256=99fcea737cd7f45d9aeed8e4be6bb94237c779869ef398c0da06d342c5e32384\n",
" Stored in directory: /root/.cache/pip/wheels/92/ef/60/567e70071236fd8535ae8aa080933500dc0a0455bdc0287e4d\n",
"Successfully built bn-core-news-sm\n",
"Installing collected packages: bn-core-news-sm\n",
"Successfully installed bn-core-news-sm-2.0.0\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "W-XdY2eG0dgZ",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 377
},
"outputId": "c4b81ec4-2c61-447e-8ce6-9e0faf8249a3"
},
"source": [
"import spacy\n",
"import bn_core_news_sm\n",
"\n",
"nlp = bn_core_news_sm.load()\n",
"doc = nlp(u'আমি বাংলায় গান গাই। তুমি কি গাও?')"
],
"execution_count": 14,
"outputs": [
{
"output_type": "error",
"ename": "KeyError",
"evalue": "ignored",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mbn_core_news_sm\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mnlp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbn_core_news_sm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0mdoc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mu'আমি বাংলায় গান গাই। তুমি কি গাও?'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/bn_core_news_sm/__init__.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(**overrides)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0moverrides\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 49\u001b[0;31m \u001b[0mlang\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_model_from_init_py\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m__file__\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0moverrides\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 50\u001b[0m \u001b[0mlang\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfactories\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sbd'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mSentenceSegmenter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnlp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvocab\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0mlang\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfactories\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sentencizer'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mSentenceSegmenter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnlp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvocab\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/spacy/util.py\u001b[0m in \u001b[0;36mload_model_from_init_py\u001b[0;34m(init_file, **overrides)\u001b[0m\n\u001b[1;32m 194\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mmodel_path\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 195\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mIOError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mErrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mE052\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpath2str\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 196\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mload_model_from_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmeta\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0moverrides\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 197\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 198\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/spacy/util.py\u001b[0m in \u001b[0;36mload_model_from_path\u001b[0;34m(model_path, meta, **overrides)\u001b[0m\n\u001b[1;32m 175\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdisable\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 176\u001b[0m \u001b[0mconfig\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmeta\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"pipeline_args\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 177\u001b[0;31m \u001b[0mcomponent\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcreate_pipe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconfig\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 178\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_pipe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcomponent\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 179\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_disk\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/spacy/language.py\u001b[0m in \u001b[0;36mcreate_pipe\u001b[0;34m(self, name, config)\u001b[0m\n\u001b[1;32m 261\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfactories\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 262\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"sbd\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 263\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mErrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mE108\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 264\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mErrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mE002\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: \"[E108] As of spaCy v2.1, the pipe name `sbd` has been deprecated in favor of the pipe name `sentencizer`, which does the same thing. For example, use `nlp.create_pipeline('sentencizer')`\""
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "2w43jPG496qk",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 394
},
"outputId": "cc74f8df-50ae-47bb-e061-c61dc3f9f515"
},
"source": [
"import spacy\n",
"import bn_core_news_sm\n",
"\n",
"nlp = bn_core_news_sm.load()\n",
"doc = nlp(u'আমি বাংলায় গান গাই। তুমি কি গাও?')\n",
"\n",
"# Create list of word tokens\n",
"token_list = []\n",
"for token in doc:\n",
" token_list.append(token.text)\n",
"print(token_list)"
],
"execution_count": 11,
"outputs": [
{
"output_type": "error",
"ename": "KeyError",
"evalue": "ignored",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mbn_core_news_sm\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mnlp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbn_core_news_sm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0mdoc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mu'আমি বাংলায় গান গাই। তুমি কি গাও?'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/bn_core_news_sm/__init__.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(**overrides)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0moverrides\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 49\u001b[0;31m \u001b[0mlang\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_model_from_init_py\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m__file__\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0moverrides\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 50\u001b[0m \u001b[0mlang\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfactories\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sbd'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mSentenceSegmenter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnlp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvocab\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0mlang\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfactories\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sentencizer'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mSentenceSegmenter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnlp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvocab\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/spacy/util.py\u001b[0m in \u001b[0;36mload_model_from_init_py\u001b[0;34m(init_file, **overrides)\u001b[0m\n\u001b[1;32m 194\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mmodel_path\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 195\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mIOError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mErrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mE052\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpath2str\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 196\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mload_model_from_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmeta\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0moverrides\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 197\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 198\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/spacy/util.py\u001b[0m in \u001b[0;36mload_model_from_path\u001b[0;34m(model_path, meta, **overrides)\u001b[0m\n\u001b[1;32m 175\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdisable\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 176\u001b[0m \u001b[0mconfig\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmeta\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"pipeline_args\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 177\u001b[0;31m \u001b[0mcomponent\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcreate_pipe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconfig\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 178\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_pipe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcomponent\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 179\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_disk\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/spacy/language.py\u001b[0m in \u001b[0;36mcreate_pipe\u001b[0;34m(self, name, config)\u001b[0m\n\u001b[1;32m 261\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfactories\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 262\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"sbd\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 263\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mErrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mE108\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 264\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mErrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mE002\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: \"[E108] As of spaCy v2.1, the pipe name `sbd` has been deprecated in favor of the pipe name `sentencizer`, which does the same thing. For example, use `nlp.create_pipeline('sentencizer')`\""
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "SFDpZXCr_yWz",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 374
},
"outputId": "f262f437-267f-4476-83b7-13735de6388c"
},
"source": [
"import spacy\n",
"\n",
"nlp = spacy.load(\"bn_core_news_sm\")\n",
"doc = nlp(u'আমি বাংলায় গান গাই। তুমি কি গাও?')\n",
"for token in doc:\n",
" print(token.text, token.pos_, token.dep_)"
],
"execution_count": 13,
"outputs": [
{
"output_type": "error",
"ename": "OSError",
"evalue": "ignored",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mspacy\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mnlp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mspacy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"bn_core_news_sm\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0mdoc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mu'আমি বাংলায় গান গাই। তুমি কি গাও?'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mtoken\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdoc\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/spacy/__init__.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(name, **overrides)\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdepr_path\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0mdeprecation_warning\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mWarnings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mW001\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdepr_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 27\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mutil\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0moverrides\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 28\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/spacy/util.py\u001b[0m in \u001b[0;36mload_model\u001b[0;34m(name, **overrides)\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"exists\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# Path or Path-like to model data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mload_model_from_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0moverrides\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 139\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mIOError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mErrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mE050\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 140\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 141\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mOSError\u001b[0m: [E050] Can't find model 'bn_core_news_sm'. It doesn't seem to be a shortcut link, a Python package or a valid path to a data directory."
]
}
]
}
]
}