{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "tts-cube-test.ipynb", "version": "0.3.2", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "code", "metadata": { "id": "3MrieRx2MheH", "colab_type": "code", "outputId": "57ae9832-a2a5-4fd8-f690-85f0558dfd63", "colab": { "base_uri": "https://localhost:8080/", "height": 204 } }, "source": [ "# Get TTS Code\n", "GIT_USER=\"GITHUB_USERNAME\" \n", "GIT_PASS=\"GITHUB_PASSWORD\"\n", "TTS_CLONE= \"https://%s:%s@github.com/tiberiu44/TTS-Cube.git\" %(GIT_USER, GIT_PASS)\n", "!rm -rf TTS-Cube\n", "!git clone $TTS_CLONE\n", "!cd TTS-Cube && git checkout dev.g2p+style" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "Cloning into 'TTS-Cube'...\n", "remote: Enumerating objects: 10, done.\u001b[K\n", "remote: Counting objects: 10% (1/10) \u001b[K\rremote: Counting objects: 20% (2/10) \u001b[K\rremote: Counting objects: 30% (3/10) \u001b[K\rremote: Counting objects: 40% (4/10) \u001b[K\rremote: Counting objects: 50% (5/10) \u001b[K\rremote: Counting objects: 60% (6/10) \u001b[K\rremote: Counting objects: 70% (7/10) \u001b[K\rremote: Counting objects: 80% (8/10) \u001b[K\rremote: Counting objects: 90% (9/10) \u001b[K\rremote: Counting objects: 100% (10/10) \u001b[K\rremote: Counting objects: 100% (10/10), done.\u001b[K\n", "remote: Compressing objects: 100% (9/9), done.\u001b[K\n", "remote: Total 1986 (delta 2), reused 5 (delta 1), pack-reused 1976\u001b[K\n", "Receiving objects: 100% (1986/1986), 828.20 MiB | 35.13 MiB/s, done.\n", "Resolving deltas: 100% (1285/1285), done.\n", "Checking out files: 100% (216/216), done.\n", "Checking out files: 100% (64/64), done.\n", "Branch 'dev.g2p+style' set up to track remote branch 'dev.g2p+style' from 'origin'.\n", "Switched to a new branch 'dev.g2p+style'\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "NbgN-ADpOfQp", "colab_type": "code", "outputId": "e970e578-5457-4813-e320-802e180a09a9", "colab": { "base_uri": "https://localhost:8080/", "height": 1377 } }, "source": [ "# Install requirements\n", "import os \n", "os.chdir('TTS-Cube')\n", "!pip install dynet\n", "!pip install -r requirements.txt" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "Collecting dynet\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/88/f0/01a561a301a8ea9aea1c28f82e108c38cd103964c7a46286ab01757a4092/dyNET-2.1-cp36-cp36m-manylinux1_x86_64.whl (28.1MB)\n", "\u001b[K |████████████████████████████████| 28.1MB 1.6MB/s \n", "\u001b[?25hRequirement already satisfied: cython in /usr/local/lib/python3.6/dist-packages (from dynet) (0.29.10)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from dynet) (1.16.4)\n", "Installing collected packages: dynet\n", "Successfully installed dynet-2.1\n", "Collecting numpy==1.15.0 (from -r requirements.txt (line 1))\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/88/29/f4c845648ed23264e986cdc5fbab5f8eace1be5e62144ef69ccc7189461d/numpy-1.15.0-cp36-cp36m-manylinux1_x86_64.whl (13.9MB)\n", "\u001b[K |████████████████████████████████| 13.9MB 2.8MB/s \n", "\u001b[?25hCollecting librosa==0.6.1 (from -r requirements.txt (line 2))\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/32/3d/7d1677f363bf2d576930be96371112a053264455885f40ff4299cd2a9348/librosa-0.6.1.tar.gz (1.6MB)\n", "\u001b[K |████████████████████████████████| 1.6MB 21.9MB/s \n", "\u001b[?25hCollecting scipy==1.1.0 (from -r requirements.txt (line 3))\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/a8/0b/f163da98d3a01b3e0ef1cab8dd2123c34aee2bafbb1c5bffa354cc8a1730/scipy-1.1.0-cp36-cp36m-manylinux1_x86_64.whl (31.2MB)\n", "\u001b[K |████████████████████████████████| 31.2MB 1.5MB/s \n", "\u001b[?25hCollecting pyworld==0.2.4 (from -r requirements.txt (line 4))\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/4d/bf/611107c139aba3af4de0e307d4e9285fbfd19411aaf91ce16a7ece62b7fe/pyworld-0.2.4.tar.gz (72kB)\n", "\u001b[K |████████████████████████████████| 81kB 26.4MB/s \n", "\u001b[?25hCollecting pysptk==0.1.11 (from -r requirements.txt (line 5))\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/27/47/45a4aa9a1fd5deeae80d32b965c5bc0fa1ba1b6cbc6c24cb1dcae6b98441/pysptk-0.1.11.tar.gz (402kB)\n", "\u001b[K |████████████████████████████████| 409kB 41.7MB/s \n", "\u001b[?25hCollecting Cython==0.27.3 (from -r requirements.txt (line 6))\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/e9/91/46cb3f4c73f1e96faa517f96e9d12de5b8c97d404c7ab71553da0e58c980/Cython-0.27.3-cp36-cp36m-manylinux1_x86_64.whl (3.1MB)\n", "\u001b[K |████████████████████████████████| 3.1MB 30.6MB/s \n", "\u001b[?25hCollecting Pillow==5.2.0 (from -r requirements.txt (line 7))\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/d1/24/f53ff6b61b3d728b90934bddb4f03f8ab584a7f49299bf3bde56e2952612/Pillow-5.2.0-cp36-cp36m-manylinux1_x86_64.whl (2.0MB)\n", "\u001b[K |████████████████████████████████| 2.0MB 26.2MB/s \n", "\u001b[?25hCollecting Flask==1.0.2 (from -r requirements.txt (line 8))\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/7f/e7/08578774ed4536d3242b14dacb4696386634607af824ea997202cd0edb4b/Flask-1.0.2-py2.py3-none-any.whl (91kB)\n", "\u001b[K |████████████████████████████████| 92kB 26.3MB/s \n", "\u001b[?25hRequirement already satisfied: audioread>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from librosa==0.6.1->-r requirements.txt (line 2)) (2.1.8)\n", "Requirement already satisfied: scikit-learn!=0.19.0,>=0.14.0 in /usr/local/lib/python3.6/dist-packages (from librosa==0.6.1->-r requirements.txt (line 2)) (0.21.2)\n", "Requirement already satisfied: joblib>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from librosa==0.6.1->-r requirements.txt (line 2)) (0.13.2)\n", "Requirement already satisfied: decorator>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from librosa==0.6.1->-r requirements.txt (line 2)) (4.4.0)\n", "Requirement already satisfied: six>=1.3 in /usr/local/lib/python3.6/dist-packages (from librosa==0.6.1->-r requirements.txt (line 2)) (1.12.0)\n", "Requirement already satisfied: resampy>=0.2.0 in /usr/local/lib/python3.6/dist-packages (from librosa==0.6.1->-r requirements.txt (line 2)) (0.2.1)\n", "Requirement already satisfied: numba>=0.38.0 in /usr/local/lib/python3.6/dist-packages (from librosa==0.6.1->-r requirements.txt (line 2)) (0.40.1)\n", "Requirement already satisfied: Jinja2>=2.10 in /usr/local/lib/python3.6/dist-packages (from Flask==1.0.2->-r requirements.txt (line 8)) (2.10.1)\n", "Requirement already satisfied: itsdangerous>=0.24 in /usr/local/lib/python3.6/dist-packages (from Flask==1.0.2->-r requirements.txt (line 8)) (1.1.0)\n", "Requirement already satisfied: Werkzeug>=0.14 in /usr/local/lib/python3.6/dist-packages (from Flask==1.0.2->-r requirements.txt (line 8)) (0.15.4)\n", "Requirement already satisfied: click>=5.1 in /usr/local/lib/python3.6/dist-packages (from Flask==1.0.2->-r requirements.txt (line 8)) (7.0)\n", "Requirement already satisfied: llvmlite>=0.25.0dev0 in /usr/local/lib/python3.6/dist-packages (from numba>=0.38.0->librosa==0.6.1->-r requirements.txt (line 2)) (0.29.0)\n", "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.6/dist-packages (from Jinja2>=2.10->Flask==1.0.2->-r requirements.txt (line 8)) (1.1.1)\n", "Building wheels for collected packages: librosa, pyworld, pysptk\n", " Building wheel for librosa (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Stored in directory: /root/.cache/pip/wheels/e7/58/3a/820767c35a26cdb7e9d70971454fc6e072524aa4edc934f710\n", " Building wheel for pyworld (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Stored in directory: /root/.cache/pip/wheels/06/f9/b0/8a8b49ebc596329b34ee06798f192c27db09f42aff16a1cc82\n", " Building wheel for pysptk (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Stored in directory: /root/.cache/pip/wheels/c6/d1/5e/329456580b5cdaac0284ac0462d2f70b442bc3217bca0c33a6\n", "Successfully built librosa pyworld pysptk\n", "\u001b[31mERROR: magenta 0.3.19 has requirement librosa>=0.6.2, but you'll have librosa 0.6.1 which is incompatible.\u001b[0m\n", "\u001b[31mERROR: datascience 0.10.6 has requirement folium==0.2.1, but you'll have folium 0.8.3 which is incompatible.\u001b[0m\n", "\u001b[31mERROR: albumentations 0.1.12 has requirement imgaug<0.2.7,>=0.2.5, but you'll have imgaug 0.2.9 which is incompatible.\u001b[0m\n", "Installing collected packages: numpy, scipy, librosa, Cython, pyworld, pysptk, Pillow, Flask\n", " Found existing installation: numpy 1.16.4\n", " Uninstalling numpy-1.16.4:\n", " Successfully uninstalled numpy-1.16.4\n", " Found existing installation: scipy 1.3.0\n", " Uninstalling scipy-1.3.0:\n", " Successfully uninstalled scipy-1.3.0\n", " Found existing installation: librosa 0.6.3\n", " Uninstalling librosa-0.6.3:\n", " Successfully uninstalled librosa-0.6.3\n", " Found existing installation: Cython 0.29.10\n", " Uninstalling Cython-0.29.10:\n", " Successfully uninstalled Cython-0.29.10\n", " Found existing installation: Pillow 4.3.0\n", " Uninstalling Pillow-4.3.0:\n", " Successfully uninstalled Pillow-4.3.0\n", " Found existing installation: Flask 1.0.3\n", " Uninstalling Flask-1.0.3:\n", " Successfully uninstalled Flask-1.0.3\n", "Successfully installed Cython-0.27.3 Flask-1.0.2 Pillow-5.2.0 librosa-0.6.1 numpy-1.15.0 pysptk-0.1.11 pyworld-0.2.4 scipy-1.1.0\n" ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.colab-display-data+json": { "pip_warning": { "packages": [ "PIL", "numpy" ] } } }, "metadata": { "tags": [] } } ] }, { "cell_type": "code", "metadata": { "id": "Po-iv-KaMeK7", "colab_type": "code", "colab": {} }, "source": [ "# Prepare model\n", "!mv data/models/en/* data/models/\n", "!bzip2 -d data/models/rnn_encoder.network.bz2" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "7GmLL9QpHS7E", "colab_type": "code", "colab": {} }, "source": [ "# Running synthesis\n", "def synthesize_text(text_string, speaker='rxr'):\n", " f= open(\"input.txt\", \"w+\")\n", " f.write(text_string)\n", " f.close()\n", " !python3 cube/synthesis.py --input-file=input.txt --output-file=test.wav --speaker=$speaker --g2p-model=data/models/en-g2p --target-sample-rate=16000\n", " # Play test.wav audio\n", " from IPython.display import Audio\n", " from scipy.io import wavfile\n", " import numpy as np\n", " data = wavfile.read('test.wav')\n", " framerate = data[0]\n", " sounddata = data[1]\n", " time = np.arange(0,len(sounddata))/framerate\n", " print('Sample rate:',framerate,'Hz')\n", " print('Total time:',len(sounddata)/framerate,'s')\n", " return Audio(sounddata,rate=framerate)\n" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "_bH16UOvLfno", "colab_type": "code", "outputId": "a3803cea-918c-49a3-8843-dd28725a1c4a", "colab": { "base_uri": "https://localhost:8080/", "height": 299 } }, "source": [ "synthesize_text('Text messaging, or texting, is the act of composing and sending electronic messages, typically consisting of alphabetic and numeric characters, between two or more users of mobile devices.')\n" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "[dynet] random seed: 9\n", "[dynet] allocating memory: 2048MB\n", "[dynet] memory allocation done.\n", "\tLoading data/models/en-g2p-bestAcc.network\n", "\tLoading data/models/en-g2p.lexicon\n", "cuda:0\n", "{'txt_file': 'input.txt', 'speaker': 'rxr', 'output_file': 'test.wav', 'batch_size': 32, 'memory': 2048, 'gpu': None, 'sample': None, 'mgc_order': 80, 'temperature': 0.7, 'target_sample_rate': 16000, 'g2p': 'data/models/en-g2p', 'learning_rate': 0.0001}\n", "[Encoding]\n", "[Vocoding]\n", " execution time=4.796911716461182\n", "Sample rate: 16000 Hz\n", "Total time: 10.848 s\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 5 } ] }, { "cell_type": "code", "metadata": { "id": "AFxgmNjoKuIe", "colab_type": "code", "outputId": "6a13f04a-c710-4556-8024-d8734e7311e4", "colab": { "base_uri": "https://localhost:8080/", "height": 299 } }, "source": [ "synthesize_text('This is one very simple test. This is a little more complicated one.')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "[dynet] random seed: 9\n", "[dynet] allocating memory: 2048MB\n", "[dynet] memory allocation done.\n", "\tLoading data/models/en-g2p-bestAcc.network\n", "\tLoading data/models/en-g2p.lexicon\n", "cuda:0\n", "{'txt_file': 'input.txt', 'speaker': 'rxr', 'output_file': 'test.wav', 'batch_size': 32, 'memory': 2048, 'gpu': None, 'sample': None, 'mgc_order': 80, 'temperature': 0.7, 'target_sample_rate': 16000, 'g2p': 'data/models/en-g2p', 'learning_rate': 0.0001}\n", "[Encoding]\n", "[Vocoding]\n", " execution time=2.043520212173462\n", "Sample rate: 16000 Hz\n", "Total time: 4.464 s\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 6 } ] }, { "cell_type": "code", "metadata": { "id": "KA0bW5dOPjG1", "colab_type": "code", "outputId": "36eab3cd-2a64-448e-db35-abc14ef17007", "colab": { "base_uri": "https://localhost:8080/", "height": 299 } }, "source": [ "synthesize_text('Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do.', speaker='cat')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "[dynet] random seed: 9\n", "[dynet] allocating memory: 2048MB\n", "[dynet] memory allocation done.\n", "\tLoading data/models/en-g2p-bestAcc.network\n", "\tLoading data/models/en-g2p.lexicon\n", "cuda:0\n", "{'txt_file': 'input.txt', 'speaker': 'cat', 'output_file': 'test.wav', 'batch_size': 32, 'memory': 2048, 'gpu': None, 'sample': None, 'mgc_order': 80, 'temperature': 0.7, 'target_sample_rate': 16000, 'g2p': 'data/models/en-g2p', 'learning_rate': 0.0001}\n", "[Encoding]\n", "[Vocoding]\n", " execution time=4.596466779708862\n", "Sample rate: 16000 Hz\n", "Total time: 10.608 s\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 7 } ] }, { "cell_type": "code", "metadata": { "id": "PNYra6K-QkXg", "colab_type": "code", "outputId": "0cbf1608-55c7-4a43-d58e-64e997ad3bcf", "colab": { "base_uri": "https://localhost:8080/", "height": 299 } }, "source": [ "synthesize_text('Once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, \"and what is the use of a book,\" thought Alice \"without pictures or conversation\"?', speaker='rxr')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "[dynet] random seed: 9\n", "[dynet] allocating memory: 2048MB\n", "[dynet] memory allocation done.\n", "\tLoading data/models/en-g2p-bestAcc.network\n", "\tLoading data/models/en-g2p.lexicon\n", "cuda:0\n", "{'txt_file': 'input.txt', 'speaker': 'rxr', 'output_file': 'test.wav', 'batch_size': 32, 'memory': 2048, 'gpu': None, 'sample': None, 'mgc_order': 80, 'temperature': 0.7, 'target_sample_rate': 16000, 'g2p': 'data/models/en-g2p', 'learning_rate': 0.0001}\n", "[Encoding]\n", "[Vocoding]\n", " execution time=4.651676654815674\n", "Sample rate: 16000 Hz\n", "Total time: 10.56 s\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 8 } ] }, { "cell_type": "code", "metadata": { "id": "fr5rFHBPOPf7", "colab_type": "code", "outputId": "065a5f63-4451-4da7-fd17-1072694dd675", "colab": { "base_uri": "https://localhost:8080/", "height": 299 } }, "source": [ "synthesize_text('In another moment down went Alice after it, never once considering how in the world she was to get out again. The rabbit-hole went straight on like a tunnel for some way, and then dipped suddenly down, so suddenly that Alice had not a moment to think about stopping herself before she found herself falling down a very deep well.', speaker='aew')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "[dynet] random seed: 9\n", "[dynet] allocating memory: 2048MB\n", "[dynet] memory allocation done.\n", "\tLoading data/models/en-g2p-bestAcc.network\n", "\tLoading data/models/en-g2p.lexicon\n", "cuda:0\n", "{'txt_file': 'input.txt', 'speaker': 'aew', 'output_file': 'test.wav', 'batch_size': 32, 'memory': 2048, 'gpu': None, 'sample': None, 'mgc_order': 80, 'temperature': 0.7, 'target_sample_rate': 16000, 'g2p': 'data/models/en-g2p', 'learning_rate': 0.0001}\n", "[Encoding]\n", "[Vocoding]\n", " execution time=7.825695753097534\n", "Sample rate: 16000 Hz\n", "Total time: 18.096 s\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 9 } ] }, { "cell_type": "code", "metadata": { "id": "VEexSUysOs-g", "colab_type": "code", "outputId": "889e3dc1-f63b-4309-ce37-4d7f207b78ef", "colab": { "base_uri": "https://localhost:8080/", "height": 299 } }, "source": [ "synthesize_text('Either the well was very deep, or she fell very slowly, for she had plenty of time as she went down to look about her and to wonder what was going to happen next.', speaker='awb')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "[dynet] random seed: 9\n", "[dynet] allocating memory: 2048MB\n", "[dynet] memory allocation done.\n", "\tLoading data/models/en-g2p-bestAcc.network\n", "\tLoading data/models/en-g2p.lexicon\n", "cuda:0\n", "{'txt_file': 'input.txt', 'speaker': 'awb', 'output_file': 'test.wav', 'batch_size': 32, 'memory': 2048, 'gpu': None, 'sample': None, 'mgc_order': 80, 'temperature': 0.7, 'target_sample_rate': 16000, 'g2p': 'data/models/en-g2p', 'learning_rate': 0.0001}\n", "[Encoding]\n", "[Vocoding]\n", " execution time=4.125210285186768\n", "Sample rate: 16000 Hz\n", "Total time: 9.312 s\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 10 } ] }, { "cell_type": "code", "metadata": { "id": "ImztZIaYSWWt", "colab_type": "code", "outputId": "5bca3595-3f28-4528-f23b-f9e75a43ef52", "colab": { "base_uri": "https://localhost:8080/", "height": 299 } }, "source": [ "synthesize_text('This is one very simple test. This is a little more complicated one.', speaker='clb')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "[dynet] random seed: 9\n", "[dynet] allocating memory: 2048MB\n", "[dynet] memory allocation done.\n", "\tLoading data/models/en-g2p-bestAcc.network\n", "\tLoading data/models/en-g2p.lexicon\n", "cuda:0\n", "{'txt_file': 'input.txt', 'speaker': 'clb', 'output_file': 'test.wav', 'batch_size': 32, 'memory': 2048, 'gpu': None, 'sample': None, 'mgc_order': 80, 'temperature': 0.7, 'target_sample_rate': 16000, 'g2p': 'data/models/en-g2p', 'learning_rate': 0.0001}\n", "[Encoding]\n", "[Vocoding]\n", " execution time=2.242943048477173\n", "Sample rate: 16000 Hz\n", "Total time: 5.04 s\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 11 } ] }, { "cell_type": "code", "metadata": { "id": "AKJchBw7nHai", "colab_type": "code", "outputId": "2df27c19-b952-4ada-cc4e-a7d25cc0c8e5", "colab": { "base_uri": "https://localhost:8080/", "height": 299 } }, "source": [ "synthesize_text('This is one very simple test. This is a little more complicated one.', speaker='lnh')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "[dynet] random seed: 9\n", "[dynet] allocating memory: 2048MB\n", "[dynet] memory allocation done.\n", "\tLoading data/models/en-g2p-bestAcc.network\n", "\tLoading data/models/en-g2p.lexicon\n", "cuda:0\n", "{'txt_file': 'input.txt', 'speaker': 'lnh', 'output_file': 'test.wav', 'batch_size': 32, 'memory': 2048, 'gpu': None, 'sample': None, 'mgc_order': 80, 'temperature': 0.7, 'target_sample_rate': 16000, 'g2p': 'data/models/en-g2p', 'learning_rate': 0.0001}\n", "[Encoding]\n", "[Vocoding]\n", " execution time=3.021852731704712\n", "Sample rate: 16000 Hz\n", "Total time: 6.72 s\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 12 } ] }, { "cell_type": "code", "metadata": { "id": "DtVvuB3SnKwG", "colab_type": "code", "outputId": "c40bd2dc-6eff-4352-cf1c-e2d92775787b", "colab": { "base_uri": "https://localhost:8080/", "height": 299 } }, "source": [ "synthesize_text('This is one very simple test. This is a little more complicated one.', speaker='eey')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "[dynet] random seed: 9\n", "[dynet] allocating memory: 2048MB\n", "[dynet] memory allocation done.\n", "\tLoading data/models/en-g2p-bestAcc.network\n", "\tLoading data/models/en-g2p.lexicon\n", "cuda:0\n", "{'txt_file': 'input.txt', 'speaker': 'eey', 'output_file': 'test.wav', 'batch_size': 32, 'memory': 2048, 'gpu': None, 'sample': None, 'mgc_order': 80, 'temperature': 0.7, 'target_sample_rate': 16000, 'g2p': 'data/models/en-g2p', 'learning_rate': 0.0001}\n", "[Encoding]\n", "[Vocoding]\n", " execution time=0.9225990772247314\n", "Sample rate: 16000 Hz\n", "Total time: 2.064 s\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 13 } ] }, { "cell_type": "code", "metadata": { "id": "wEjHoyJ0nO1K", "colab_type": "code", "outputId": "e90677e3-4405-47bd-e448-811a731ee330", "colab": { "base_uri": "https://localhost:8080/", "height": 299 } }, "source": [ "synthesize_text('This is one very simple test. This is a little more complicated one.', speaker='fem')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "[dynet] random seed: 9\n", "[dynet] allocating memory: 2048MB\n", "[dynet] memory allocation done.\n", "\tLoading data/models/en-g2p-bestAcc.network\n", "\tLoading data/models/en-g2p.lexicon\n", "cuda:0\n", "{'txt_file': 'input.txt', 'speaker': 'fem', 'output_file': 'test.wav', 'batch_size': 32, 'memory': 2048, 'gpu': None, 'sample': None, 'mgc_order': 80, 'temperature': 0.7, 'target_sample_rate': 16000, 'g2p': 'data/models/en-g2p', 'learning_rate': 0.0001}\n", "[Encoding]\n", "[Vocoding]\n", " execution time=2.097621202468872\n", "Sample rate: 16000 Hz\n", "Total time: 4.512 s\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 14 } ] }, { "cell_type": "code", "metadata": { "id": "UcLvDr-DnRpS", "colab_type": "code", "outputId": "f8575bab-9175-46cd-d19e-21bb5d7c598a", "colab": { "base_uri": "https://localhost:8080/", "height": 299 } }, "source": [ "synthesize_text('This is one very simple test. This is a little more complicated one.', speaker='bdl')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "[dynet] random seed: 9\n", "[dynet] allocating memory: 2048MB\n", "[dynet] memory allocation done.\n", "\tLoading data/models/en-g2p-bestAcc.network\n", "\tLoading data/models/en-g2p.lexicon\n", "cuda:0\n", "{'txt_file': 'input.txt', 'speaker': 'bdl', 'output_file': 'test.wav', 'batch_size': 32, 'memory': 2048, 'gpu': None, 'sample': None, 'mgc_order': 80, 'temperature': 0.7, 'target_sample_rate': 16000, 'g2p': 'data/models/en-g2p', 'learning_rate': 0.0001}\n", "[Encoding]\n", "[Vocoding]\n", " execution time=2.0030226707458496\n", "Sample rate: 16000 Hz\n", "Total time: 4.512 s\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 15 } ] }, { "cell_type": "code", "metadata": { "id": "n9yO4nsmnUPj", "colab_type": "code", "outputId": "b53b2516-30ec-45f1-80f0-65e9948dd301", "colab": { "base_uri": "https://localhost:8080/", "height": 299 } }, "source": [ "synthesize_text('This is one very simple test. This is a little more complicated one.', speaker='ksp')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "[dynet] random seed: 9\n", "[dynet] allocating memory: 2048MB\n", "[dynet] memory allocation done.\n", "\tLoading data/models/en-g2p-bestAcc.network\n", "\tLoading data/models/en-g2p.lexicon\n", "cuda:0\n", "{'txt_file': 'input.txt', 'speaker': 'ksp', 'output_file': 'test.wav', 'batch_size': 32, 'memory': 2048, 'gpu': None, 'sample': None, 'mgc_order': 80, 'temperature': 0.7, 'target_sample_rate': 16000, 'g2p': 'data/models/en-g2p', 'learning_rate': 0.0001}\n", "[Encoding]\n", "[Vocoding]\n", " execution time=2.13417649269104\n", "Sample rate: 16000 Hz\n", "Total time: 4.656 s\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 16 } ] }, { "cell_type": "code", "metadata": { "id": "D_iKD3BUnXQV", "colab_type": "code", "outputId": "3a411ab3-4300-4dbe-c8f1-114d503f6b70", "colab": { "base_uri": "https://localhost:8080/", "height": 299 } }, "source": [ "synthesize_text('This is one very simple test. This is a little more complicated one.', speaker='axb')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "[dynet] random seed: 9\n", "[dynet] allocating memory: 2048MB\n", "[dynet] memory allocation done.\n", "\tLoading data/models/en-g2p-bestAcc.network\n", "\tLoading data/models/en-g2p.lexicon\n", "cuda:0\n", "{'txt_file': 'input.txt', 'speaker': 'axb', 'output_file': 'test.wav', 'batch_size': 32, 'memory': 2048, 'gpu': None, 'sample': None, 'mgc_order': 80, 'temperature': 0.7, 'target_sample_rate': 16000, 'g2p': 'data/models/en-g2p', 'learning_rate': 0.0001}\n", "[Encoding]\n", "[Vocoding]\n", " execution time=2.1178483963012695\n", "Sample rate: 16000 Hz\n", "Total time: 4.752 s\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 17 } ] }, { "cell_type": "code", "metadata": { "id": "ik7exBaCnZ2B", "colab_type": "code", "outputId": "3903405b-2937-4223-da50-0db39c41a5a0", "colab": { "base_uri": "https://localhost:8080/", "height": 299 } }, "source": [ "synthesize_text('This is one very simple test. This is a little more complicated one.', speaker='jmk')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "[dynet] random seed: 9\n", "[dynet] allocating memory: 2048MB\n", "[dynet] memory allocation done.\n", "\tLoading data/models/en-g2p-bestAcc.network\n", "\tLoading data/models/en-g2p.lexicon\n", "cuda:0\n", "{'txt_file': 'input.txt', 'speaker': 'jmk', 'output_file': 'test.wav', 'batch_size': 32, 'memory': 2048, 'gpu': None, 'sample': None, 'mgc_order': 80, 'temperature': 0.7, 'target_sample_rate': 16000, 'g2p': 'data/models/en-g2p', 'learning_rate': 0.0001}\n", "[Encoding]\n", "[Vocoding]\n", " execution time=1.9213025569915771\n", "Sample rate: 16000 Hz\n", "Total time: 4.176 s\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 18 } ] }, { "cell_type": "code", "metadata": { "id": "2CrPnqx_ncLf", "colab_type": "code", "outputId": "9ed50bb7-2a05-418a-88ec-d2ed1b27db25", "colab": { "base_uri": "https://localhost:8080/", "height": 299 } }, "source": [ "synthesize_text('This is one very simple test. This is a little more complicated one.', speaker='ahw')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "[dynet] random seed: 9\n", "[dynet] allocating memory: 2048MB\n", "[dynet] memory allocation done.\n", "\tLoading data/models/en-g2p-bestAcc.network\n", "\tLoading data/models/en-g2p.lexicon\n", "cuda:0\n", "{'txt_file': 'input.txt', 'speaker': 'ahw', 'output_file': 'test.wav', 'batch_size': 32, 'memory': 2048, 'gpu': None, 'sample': None, 'mgc_order': 80, 'temperature': 0.7, 'target_sample_rate': 16000, 'g2p': 'data/models/en-g2p', 'learning_rate': 0.0001}\n", "[Encoding]\n", "[Vocoding]\n", " execution time=2.3511087894439697\n", "Sample rate: 16000 Hz\n", "Total time: 5.28 s\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 19 } ] }, { "cell_type": "code", "metadata": { "id": "WIFLiR_HneGX", "colab_type": "code", "outputId": "c783608b-e630-40c3-c1aa-8d3527bb98f3", "colab": { "base_uri": "https://localhost:8080/", "height": 299 } }, "source": [ "synthesize_text('This is one very simple test. This is a little more complicated one.', speaker='gka')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "[dynet] random seed: 9\n", "[dynet] allocating memory: 2048MB\n", "[dynet] memory allocation done.\n", "\tLoading data/models/en-g2p-bestAcc.network\n", "\tLoading data/models/en-g2p.lexicon\n", "cuda:0\n", "{'txt_file': 'input.txt', 'speaker': 'gka', 'output_file': 'test.wav', 'batch_size': 32, 'memory': 2048, 'gpu': None, 'sample': None, 'mgc_order': 80, 'temperature': 0.7, 'target_sample_rate': 16000, 'g2p': 'data/models/en-g2p', 'learning_rate': 0.0001}\n", "[Encoding]\n", "[Vocoding]\n", " execution time=2.1466517448425293\n", "Sample rate: 16000 Hz\n", "Total time: 4.656 s\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 20 } ] }, { "cell_type": "code", "metadata": { "id": "sOMozhC-ngZB", "colab_type": "code", "outputId": "7fc75c59-c450-4f6b-f116-900003cc11d2", "colab": { "base_uri": "https://localhost:8080/", "height": 299 } }, "source": [ "synthesize_text('This is one very simple test. This is a little more complicated one.', speaker='ljm')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "[dynet] random seed: 9\n", "[dynet] allocating memory: 2048MB\n", "[dynet] memory allocation done.\n", "\tLoading data/models/en-g2p-bestAcc.network\n", "\tLoading data/models/en-g2p.lexicon\n", "cuda:0\n", "{'txt_file': 'input.txt', 'speaker': 'ljm', 'output_file': 'test.wav', 'batch_size': 32, 'memory': 2048, 'gpu': None, 'sample': None, 'mgc_order': 80, 'temperature': 0.7, 'target_sample_rate': 16000, 'g2p': 'data/models/en-g2p', 'learning_rate': 0.0001}\n", "[Encoding]\n", "[Vocoding]\n", " execution time=0.9393620491027832\n", "Sample rate: 16000 Hz\n", "Total time: 2.064 s\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 21 } ] }, { "cell_type": "code", "metadata": { "id": "JNMC8B_wniH4", "colab_type": "code", "outputId": "12ea2c64-4ab2-4b6c-9253-be2d4f23ddcf", "colab": { "base_uri": "https://localhost:8080/", "height": 299 } }, "source": [ "synthesize_text('This is one very simple test. This is a little more complicated one.', speaker='cat')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "[dynet] random seed: 9\n", "[dynet] allocating memory: 2048MB\n", "[dynet] memory allocation done.\n", "\tLoading data/models/en-g2p-bestAcc.network\n", "\tLoading data/models/en-g2p.lexicon\n", "cuda:0\n", "{'txt_file': 'input.txt', 'speaker': 'cat', 'output_file': 'test.wav', 'batch_size': 32, 'memory': 2048, 'gpu': None, 'sample': None, 'mgc_order': 80, 'temperature': 0.7, 'target_sample_rate': 16000, 'g2p': 'data/models/en-g2p', 'learning_rate': 0.0001}\n", "[Encoding]\n", "[Vocoding]\n", " execution time=3.3476428985595703\n", "Sample rate: 16000 Hz\n", "Total time: 7.44 s\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 22 } ] }, { "cell_type": "code", "metadata": { "id": "oC8YCTksnlfT", "colab_type": "code", "outputId": "8b3d653a-4300-4425-b194-e34552f0a41a", "colab": { "base_uri": "https://localhost:8080/", "height": 299 } }, "source": [ "synthesize_text('This is one very simple test. This is a little more complicated one.', speaker='slp')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "[dynet] random seed: 9\n", "[dynet] allocating memory: 2048MB\n", "[dynet] memory allocation done.\n", "\tLoading data/models/en-g2p-bestAcc.network\n", "\tLoading data/models/en-g2p.lexicon\n", "cuda:0\n", "{'txt_file': 'input.txt', 'speaker': 'slp', 'output_file': 'test.wav', 'batch_size': 32, 'memory': 2048, 'gpu': None, 'sample': None, 'mgc_order': 80, 'temperature': 0.7, 'target_sample_rate': 16000, 'g2p': 'data/models/en-g2p', 'learning_rate': 0.0001}\n", "[Encoding]\n", "[Vocoding]\n", " execution time=2.1906933784484863\n", "Sample rate: 16000 Hz\n", "Total time: 4.944 s\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 23 } ] }, { "cell_type": "code", "metadata": { "id": "mugm_bN1noiv", "colab_type": "code", "outputId": "47d76117-de2b-4255-840d-65573c32d409", "colab": { "base_uri": "https://localhost:8080/", "height": 299 } }, "source": [ "synthesize_text('This is one very simple test. This is a little more complicated one.', speaker='slt')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "[dynet] random seed: 9\n", "[dynet] allocating memory: 2048MB\n", "[dynet] memory allocation done.\n", "\tLoading data/models/en-g2p-bestAcc.network\n", "\tLoading data/models/en-g2p.lexicon\n", "cuda:0\n", "{'txt_file': 'input.txt', 'speaker': 'slt', 'output_file': 'test.wav', 'batch_size': 32, 'memory': 2048, 'gpu': None, 'sample': None, 'mgc_order': 80, 'temperature': 0.7, 'target_sample_rate': 16000, 'g2p': 'data/models/en-g2p', 'learning_rate': 0.0001}\n", "[Encoding]\n", "[Vocoding]\n", " execution time=1.0589706897735596\n", "Sample rate: 16000 Hz\n", "Total time: 2.16 s\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 24 } ] }, { "cell_type": "code", "metadata": { "id": "RUu8vynjnpbC", "colab_type": "code", "outputId": "abbff371-1773-402c-97e6-c3d10fcf1d0c", "colab": { "base_uri": "https://localhost:8080/", "height": 299 } }, "source": [ "synthesize_text('This is one very simple test. This is a little more complicated one.', speaker='aup')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "[dynet] random seed: 9\n", "[dynet] allocating memory: 2048MB\n", "[dynet] memory allocation done.\n", "\tLoading data/models/en-g2p-bestAcc.network\n", "\tLoading data/models/en-g2p.lexicon\n", "cuda:0\n", "{'txt_file': 'input.txt', 'speaker': 'aup', 'output_file': 'test.wav', 'batch_size': 32, 'memory': 2048, 'gpu': None, 'sample': None, 'mgc_order': 80, 'temperature': 0.7, 'target_sample_rate': 16000, 'g2p': 'data/models/en-g2p', 'learning_rate': 0.0001}\n", "[Encoding]\n", "[Vocoding]\n", " execution time=1.9842188358306885\n", "Sample rate: 16000 Hz\n", "Total time: 4.416 s\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 25 } ] }, { "cell_type": "code", "metadata": { "id": "zwpPk-Y4ns5m", "colab_type": "code", "outputId": "346d1de1-b179-425a-fdf6-ee247079fe7c", "colab": { "base_uri": "https://localhost:8080/", "height": 299 } }, "source": [ "synthesize_text('This is one very simple test. This is a little more complicated one.', speaker='lj')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "[dynet] random seed: 9\n", "[dynet] allocating memory: 2048MB\n", "[dynet] memory allocation done.\n", "\tLoading data/models/en-g2p-bestAcc.network\n", "\tLoading data/models/en-g2p.lexicon\n", "cuda:0\n", "{'txt_file': 'input.txt', 'speaker': 'lj', 'output_file': 'test.wav', 'batch_size': 32, 'memory': 2048, 'gpu': None, 'sample': None, 'mgc_order': 80, 'temperature': 0.7, 'target_sample_rate': 16000, 'g2p': 'data/models/en-g2p', 'learning_rate': 0.0001}\n", "[Encoding]\n", "[Vocoding]\n", " execution time=2.560159206390381\n", "Sample rate: 16000 Hz\n", "Total time: 5.664 s\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 26 } ] }, { "cell_type": "code", "metadata": { "id": "UyX-0rCG3f5d", "colab_type": "code", "colab": {} }, "source": [ "" ], "execution_count": 0, "outputs": [] } ] }