{ "cells": [ { "cell_type": "markdown", "id": "9ab2965f-103f-4c4f-b438-f337686d55c7", "metadata": {}, "source": [ "[Onsei: Japanese pitch accent practice tool](https://github.com/itsupera/onsei)\n", "================================================================================\n", "\n", "
\n", "Click here for instructions !\n", "\n", "Any feedback or suggestion ? Please tell me in this Gitter chat\n", "
" ] }, { "cell_type": "code", "execution_count": null, "id": "0cc01d81-5d43-4fd1-aac7-1121181ee2d0", "metadata": {}, "outputs": [], "source": [ "import os\n", "import re\n", "\n", "import tempfile;\n", "\n", "import ipywidgets as widgets;\n", "from ipywebrtc import AudioRecorder, CameraStream, AudioStream;\n", "\n", "from onsei.speech_record import SpeechRecord, AlignmentMethod;\n", "from onsei.utils import segment_speech;\n", "from onsei.figures import ViewRecordFigure, CompareFigure;\n", "from onsei.sentence import Sentence;\n", "from onsei.widgets import SampleSelector, UploadSample;\n", "\n", "# Globals\n", "\n", "\n", "def get_jsut_samples():\n", " samples = {}\n", " basepath = \"data/jsut_basic5000_sample\"\n", " with open(os.path.join(basepath, \"transcript_utf8.txt\")) as f:\n", " for line in f:\n", " basename, sentence = line.rstrip().split(':')\n", " filename = os.path.join(basepath, f\"{basename}.wav\")\n", " samples[sentence] = {\n", " \"filename\": filename,\n", " \"sentence\": sentence,\n", " }\n", " return samples\n", "\n", "def get_forvo_samples():\n", " samples = {}\n", " basepath = \"data/forvo/everyday_phrases/greetings_and_apologies\"\n", " for fname in sorted(os.listdir(basepath)):\n", " m = re.match(r\"^pronunciation_ja_([^.]+).wav$\", fname)\n", " if m:\n", " sentence = m.group(1)\n", " samples[sentence] = {\n", " \"filename\": os.path.join(basepath, fname),\n", " \"sentence\": sentence,\n", " }\n", " return samples\n", "\n", "samples = {\n", " \"Forvo\": get_forvo_samples(),\n", " \"JSUT Basic 5000 corpus\": get_jsut_samples(),\n", " \"My samples\": {}, # Special collection for user's samples\n", "}\n", "\n", "teacher_rec = None\n", "student_rec = None\n", "sentence = None\n", "\n", "default_autoplay = True\n", "show_spaces_between_segments = False\n", "default_crop_vad = True\n", "\n", "my_samples_dir = tempfile.TemporaryDirectory();\n", "\n", "\n", "# Create widgets\n", "\n", "w_sample_selector = SampleSelector(samples)\n", "\n", "w_upload_sample = UploadSample(samples, my_samples_dir.name)\n", "\n", "w_autoplay_tick = widgets.Checkbox(\n", " value=default_autoplay,\n", " description='Autoplay',\n", " disabled=False,\n", " indent=False\n", ")\n", "w_show_spaces_tick = widgets.Checkbox(\n", " value=show_spaces_between_segments,\n", " description='Show spaces between sentence segments',\n", " disabled=False,\n", " indent=False\n", ")\n", "w_crop_vad_tick = widgets.Checkbox(\n", " value=default_crop_vad,\n", " description='Crop graphs to detected speech',\n", " disabled=False,\n", " indent=False\n", ")\n", "w_alignment_method_dropdown = widgets.Dropdown(\n", " options=[a.value for a in AlignmentMethod],\n", " description='Alignment method:',\n", " disabled=False,\n", ")\n", "w_options_accordion = widgets.Accordion(\n", " children=[widgets.VBox([\n", " w_autoplay_tick,\n", " w_show_spaces_tick,\n", " w_crop_vad_tick,\n", " w_alignment_method_dropdown,\n", " ])],\n", " selected_index=None,\n", ")\n", "w_options_accordion.set_title(0, \"Options\")\n", "\n", "w_audio = widgets.Audio(value=b'', format='wav', autoplay=default_autoplay, loop=False)\n", "\n", "w_sentence = widgets.HTML(value='')\n", "\n", "camera = CameraStream(constraints={'audio': True, 'video': False})\n", "w_recorder = AudioRecorder(stream=camera)\n", "\n", "w_compare_btn = widgets.Button(description=\"Compare\")\n", "\n", "w_cmp_result = widgets.Label(value='')\n", "\n", "fig_teacher = ViewRecordFigure(title=\"Teacher's recording\")\n", "fig_student = ViewRecordFigure(title=\"Your recording\")\n", "\n", "fig_cmp = CompareFigure()\n", "\n", "# Callbacks\n", "\n", "\n", "def add_uploaded_sample(change):\n", " global samples\n", "\n", " sample = change[\"new\"]\n", " samples['My samples'][sample['sentence']] = sample\n", "\n", " # Switch to this sample\n", " w_sample_selector.set_selection('My samples', sample['sentence'])\n", "\n", "w_upload_sample.observe(add_uploaded_sample, 'value')\n", "\n", "\n", "def update_autoplay(change):\n", " w_audio.autoplay = change['new']\n", "\n", "w_autoplay_tick.observe(update_autoplay, 'value')\n", " \n", "def update_show_spaces(change):\n", " global show_spaces_between_segments\n", " show_spaces_between_segments = change['new']\n", " update_sentence()\n", "\n", "w_show_spaces_tick.observe(update_show_spaces, 'value')\n", "\n", "\n", "widgets.jslink((w_crop_vad_tick, 'value'), (fig_teacher, 'crop_vad'))\n", "widgets.jslink((w_crop_vad_tick, 'value'), (fig_student, 'crop_vad'))\n", "\n", "\n", "def get_sample_audio_data(sample):\n", " return open(sample['filename'], 'rb').read()\n", "\n", "\n", "def update_sentence():\n", " if sentence:\n", " sentence_html = sentence.to_html()\n", " if not show_spaces_between_segments:\n", " sentence_html = sentence_html.replace(\" \", \"\")\n", " w_sentence.value = f'

{sentence_html}

'\n", " else:\n", " w_sentence.value = ''\n", "\n", "\n", "def update_sample(sample):\n", " global teacher_rec\n", " global sentence\n", "\n", " sentence = Sentence(sample[\"sentence\"])\n", "\n", " with w_sentence.hold_sync(), w_audio.hold_sync(), fig_teacher.hold_sync(), fig_student.hold_sync(), fig_cmp.hold_sync(), w_cmp_result.hold_sync():\n", " update_sentence()\n", "\n", " teacher_rec = SpeechRecord(sample['filename'], sentence, name=\"Teacher\");\n", "\n", " w_audio.value = get_sample_audio_data(sample);\n", " \n", " fig_teacher.update_data(teacher_rec);\n", " fig_student.clear();\n", " fig_cmp.clear();\n", " \n", " w_cmp_result.value = \"\"\n", "\n", "\n", "update_sample(w_sample_selector.selected_sample());\n", "\n", "\n", "def sample_changed(change):\n", " update_sample(dict(change[\"new\"]));\n", "\n", "w_sample_selector.observe(sample_changed, 'value')\n", "\n", "\n", "def get_student_wav_filename():\n", " try:\n", " w_recorder.save('test.webm')\n", " except ValueError as exc:\n", " if str(exc).startswith('No data'):\n", " w_cmp_result.value = f\"Record something first !\"\n", " raise exc\n", " \n", " !ffmpeg -hide_banner -loglevel error -y -i test.webm -ar 16000 -ac 1 test.wav\n", " return 'test.wav'\n", "\n", "\n", "def run_compare(_):\n", " global teacher_rec\n", " global student_rec\n", "\n", " student_wav_filename = get_student_wav_filename()\n", " # Alternatively, here is a sample:\n", " #student_wav_filename = \"data/itsu_ga_ii_ka_wakarimasen.wav\"\n", "\n", " student_rec = SpeechRecord(student_wav_filename, sentence, name=\"Student\");\n", " fig_student.update_data(student_rec);\n", " \n", " alignment_method = w_alignment_method_dropdown.value\n", "\n", " try:\n", " student_rec.align_with(teacher_rec, method=alignment_method)\n", " mean_distance = student_rec.compare_pitch();\n", " w_cmp_result.value = f\"Success !\\nMean distance = {mean_distance:.2f}\"\n", " except Exception as exc:\n", " w_cmp_result.value = \"FAILED !\"\n", " raise exc\n", "\n", " fig_cmp.update_data(teacher_rec, student_rec)\n", "\n", "\n", "w_compare_btn.on_click(run_compare)\n", "\n", "# Update the comparison if we change the alignment method\n", "w_alignment_method_dropdown.observe(run_compare, 'value')\n", "\n", "\n", "# Layout\n", "\n", "w_tab = widgets.Tab()\n", "w_tab.children = [w_sample_selector, w_upload_sample]\n", "w_tab.set_title(0, \"Samples\")\n", "w_tab.set_title(1, \"Upload new samples\")\n", "\n", "box = widgets.Box([\n", " widgets.Box([\n", " w_tab,\n", " w_options_accordion\n", " ]),\n", " w_sentence,\n", " widgets.Box([\n", " widgets.VBox([widgets.Label(value=\"Teacher's recording:\"), w_audio], layout=widgets.Layout(width='33%')),\n", " widgets.VBox([widgets.Label(value=\"Your recording:\"), w_recorder], layout=widgets.Layout(width='33%')),\n", " widgets.VBox([w_compare_btn, w_cmp_result], layout=widgets.Layout(width='33%')),\n", " ]),\n", " fig_cmp,\n", " fig_student,\n", " fig_teacher,\n", "], layout=widgets.Layout(display=\"flex\", flex_flow=\"column\", align_items=\"stretch\", align_content=\"center\")\n", ")\n", "\n", "display(box)" ] }, { "cell_type": "code", "execution_count": null, "id": "a13585bc-778a-4899-9bad-489d4646d299", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 5 }