{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "Rnn-attention_mechanisms.ipynb", "version": "0.3.2", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "yCtn1cYRG6R8", "colab_type": "text" }, "source": [ "## Get Scripts" ] }, { "cell_type": "code", "metadata": { "id": "89GComxjG2zY", "colab_type": "code", "colab": {} }, "source": [ "!wget -q https://raw.githubusercontent.com/Mmiglio/SpeechRecognition/Attention-Mechanism-and-Memory-Networks/src/getData.py\n", "!wget -q https://raw.githubusercontent.com/Mmiglio/SpeechRecognition/Attention-Mechanism-and-Memory-Networks/src/constants.py\n", "!wget -q https://raw.githubusercontent.com/Mmiglio/SpeechRecognition/Attention-Mechanism-and-Memory-Networks/src/trainingUtils.py\n", "!wget -q https://raw.githubusercontent.com/Mmiglio/SpeechRecognition/Attention-Mechanism-and-Memory-Networks/src/models.py\n", "!wget -q https://raw.githubusercontent.com/Mmiglio/SpeechRecognition/Attention-Mechanism-and-Memory-Networks/src/analysisFunctions.py" ], "execution_count": 0, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "8hLJMU7jHDJm", "colab_type": "text" }, "source": [ "## Download speech data" ] }, { "cell_type": "code", "metadata": { "id": "3FqNo78VHGsY", "colab_type": "code", "outputId": "33125573-701d-4424-b152-faa8c1fd732a", "colab": { "base_uri": "https://localhost:8080/", "height": 85 } }, "source": [ "from getData import downloadSpeechData, getDataDict\n", "\n", "# Download data\n", "downloadSpeechData(data_path='speechData/')\n", "\n", "# Get dict with files and labels\n", "dataDict = getDataDict(data_path='speechData/')" ], "execution_count": 2, "outputs": [ { "output_type": "stream", "text": [ "Downloading http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz into /content/speechData/train.tar.gz\n", "Extracting /content/speechData/train.tar.gz into /content/speechData/train\n", "Downloading http://download.tensorflow.org/data/speech_commands_test_set_v0.01.tar.gz into /content/speechData/test.tar.gz\n", "Extracting /content/speechData/test.tar.gz into /content/speechData/test\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "84zh4i7RR6Cq", "colab_type": "code", "outputId": "a842cf4d-8862-477c-962d-c5c6030395e5", "colab": { "base_uri": "https://localhost:8080/", "height": 68 } }, "source": [ "from getData import getDataframe\n", "\n", "trainDF = getDataframe(dataDict['train'])\n", "valDF = getDataframe(dataDict['val'])\n", "testDF = getDataframe(dataDict['test']) \n", "\n", "print(\"Train files: {}\".format(trainDF.shape[0]))\n", "print(\"Validation files: {}\".format(valDF.shape[0]))\n", "print(\"Test files: {}\".format(testDF.shape[0]))" ], "execution_count": 3, "outputs": [ { "output_type": "stream", "text": [ "Train files: 51088\n", "Validation files: 6798\n", "Test files: 6835\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "1pW40MeTCyUp", "colab_type": "code", "outputId": "f248020e-d402-481f-ef4f-3815f5d3153a", "colab": { "base_uri": "https://localhost:8080/", "height": 204 } }, "source": [ "trainDF.head()" ], "execution_count": 4, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
| \n", " | files | \n", "labels | \n", "category | \n", "
|---|---|---|---|
| 0 | \n", "speechData/train/dog/33f60c62_nohash_1.wav | \n", "8 | \n", "dog | \n", "
| 1 | \n", "speechData/train/bird/30065f33_nohash_0.wav | \n", "7 | \n", "bird | \n", "
| 2 | \n", "speechData/train/down/f5c3de1b_nohash_0.wav | \n", "21 | \n", "down | \n", "
| 3 | \n", "speechData/train/go/2bd2cad5_nohash_0.wav | \n", "27 | \n", "go | \n", "
| 4 | \n", "speechData/train/seven/aac5b7c1_nohash_1.wav | \n", "11 | \n", "seven | \n", "