{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "9ab2965f-103f-4c4f-b438-f337686d55c7",
   "metadata": {},
   "source": [
    "[Onsei: Japanese pitch accent practice tool](https://github.com/itsupera/onsei)\n",
    "================================================================================\n",
    "\n",
    "<details>\n",
    "<summary>Click here for instructions !</summary>\n",
    "<ul>\n",
    "<li>Select a sample to mimick and listen to it</li>\n",
    "<li>Record yourself mimicking using the record button below \"Your recording:\"</li>\n",
    "<li>Click on the \"Compare\" button</li>\n",
    "<li>Check out the Pitch comparison graph below to see your mistakes are</li>\n",
    "<li>Try again to match the teacher's pitch !</li>\n",
    "<li>If this is too easy for you, disable the Autoplay in the Options and try reading the sentence without listening first :)</li>\n",
    "</ul>\n",
    "Any feedback or suggestion ? Please tell me in <a href=\"https://gitter.im/itsupera-onsei/community\">this Gitter chat</a>\n",
    "</details>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0cc01d81-5d43-4fd1-aac7-1121181ee2d0",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import re\n",
    "\n",
    "import tempfile;\n",
    "\n",
    "import ipywidgets as widgets;\n",
    "from ipywebrtc import AudioRecorder, CameraStream, AudioStream;\n",
    "\n",
    "from onsei.speech_record import SpeechRecord, AlignmentMethod;\n",
    "from onsei.utils import segment_speech;\n",
    "from onsei.figures import ViewRecordFigure, CompareFigure;\n",
    "from onsei.sentence import Sentence;\n",
    "from onsei.widgets import SampleSelector, UploadSample;\n",
    "\n",
    "# Globals\n",
    "\n",
    "\n",
    "def get_jsut_samples():\n",
    "    samples = {}\n",
    "    basepath = \"data/jsut_basic5000_sample\"\n",
    "    with open(os.path.join(basepath, \"transcript_utf8.txt\")) as f:\n",
    "        for line in f:\n",
    "            basename, sentence = line.rstrip().split(':')\n",
    "            filename = os.path.join(basepath, f\"{basename}.wav\")\n",
    "            samples[sentence] = {\n",
    "                \"filename\": filename,\n",
    "                \"sentence\": sentence,\n",
    "            }\n",
    "    return samples\n",
    "\n",
    "def get_forvo_samples():\n",
    "    samples = {}\n",
    "    basepath = \"data/forvo/everyday_phrases/greetings_and_apologies\"\n",
    "    for fname in sorted(os.listdir(basepath)):\n",
    "        m = re.match(r\"^pronunciation_ja_([^.]+).wav$\", fname)\n",
    "        if m:\n",
    "            sentence = m.group(1)\n",
    "            samples[sentence] = {\n",
    "                \"filename\": os.path.join(basepath, fname),\n",
    "                \"sentence\": sentence,\n",
    "            }\n",
    "    return samples\n",
    "\n",
    "samples = {\n",
    "    \"Forvo\": get_forvo_samples(),\n",
    "    \"JSUT Basic 5000 corpus\": get_jsut_samples(),\n",
    "    \"My samples\": {},  # Special collection for user's samples\n",
    "}\n",
    "\n",
    "teacher_rec = None\n",
    "student_rec = None\n",
    "sentence = None\n",
    "\n",
    "default_autoplay = True\n",
    "show_spaces_between_segments = False\n",
    "default_crop_vad = True\n",
    "\n",
    "my_samples_dir = tempfile.TemporaryDirectory();\n",
    "\n",
    "\n",
    "# Create widgets\n",
    "\n",
    "w_sample_selector = SampleSelector(samples)\n",
    "\n",
    "w_upload_sample = UploadSample(samples, my_samples_dir.name)\n",
    "\n",
    "w_autoplay_tick = widgets.Checkbox(\n",
    "    value=default_autoplay,\n",
    "    description='Autoplay',\n",
    "    disabled=False,\n",
    "    indent=False\n",
    ")\n",
    "w_show_spaces_tick = widgets.Checkbox(\n",
    "    value=show_spaces_between_segments,\n",
    "    description='Show spaces between sentence segments',\n",
    "    disabled=False,\n",
    "    indent=False\n",
    ")\n",
    "w_crop_vad_tick = widgets.Checkbox(\n",
    "    value=default_crop_vad,\n",
    "    description='Crop graphs to detected speech',\n",
    "    disabled=False,\n",
    "    indent=False\n",
    ")\n",
    "w_alignment_method_dropdown = widgets.Dropdown(\n",
    "    options=[a.value for a in AlignmentMethod],\n",
    "    description='Alignment method:',\n",
    "    disabled=False,\n",
    ")\n",
    "w_options_accordion = widgets.Accordion(\n",
    "    children=[widgets.VBox([\n",
    "        w_autoplay_tick,\n",
    "        w_show_spaces_tick,\n",
    "        w_crop_vad_tick,\n",
    "        w_alignment_method_dropdown,\n",
    "    ])],\n",
    "    selected_index=None,\n",
    ")\n",
    "w_options_accordion.set_title(0, \"Options\")\n",
    "\n",
    "w_audio = widgets.Audio(value=b'', format='wav', autoplay=default_autoplay, loop=False)\n",
    "\n",
    "w_sentence = widgets.HTML(value='')\n",
    "\n",
    "camera = CameraStream(constraints={'audio': True, 'video': False})\n",
    "w_recorder = AudioRecorder(stream=camera)\n",
    "\n",
    "w_compare_btn = widgets.Button(description=\"Compare\")\n",
    "\n",
    "w_cmp_result = widgets.Label(value='')\n",
    "\n",
    "fig_teacher = ViewRecordFigure(title=\"Teacher's recording\")\n",
    "fig_student = ViewRecordFigure(title=\"Your recording\")\n",
    "\n",
    "fig_cmp = CompareFigure()\n",
    "\n",
    "# Callbacks\n",
    "\n",
    "\n",
    "def add_uploaded_sample(change):\n",
    "    global samples\n",
    "\n",
    "    sample = change[\"new\"]\n",
    "    samples['My samples'][sample['sentence']] = sample\n",
    "\n",
    "    # Switch to this sample\n",
    "    w_sample_selector.set_selection('My samples', sample['sentence'])\n",
    "\n",
    "w_upload_sample.observe(add_uploaded_sample, 'value')\n",
    "\n",
    "\n",
    "def update_autoplay(change):\n",
    "    w_audio.autoplay = change['new']\n",
    "\n",
    "w_autoplay_tick.observe(update_autoplay, 'value')\n",
    "    \n",
    "def update_show_spaces(change):\n",
    "    global show_spaces_between_segments\n",
    "    show_spaces_between_segments = change['new']\n",
    "    update_sentence()\n",
    "\n",
    "w_show_spaces_tick.observe(update_show_spaces, 'value')\n",
    "\n",
    "\n",
    "widgets.jslink((w_crop_vad_tick, 'value'), (fig_teacher, 'crop_vad'))\n",
    "widgets.jslink((w_crop_vad_tick, 'value'), (fig_student, 'crop_vad'))\n",
    "\n",
    "\n",
    "def get_sample_audio_data(sample):\n",
    "    return open(sample['filename'], 'rb').read()\n",
    "\n",
    "\n",
    "def update_sentence():\n",
    "    if sentence:\n",
    "        sentence_html = sentence.to_html()\n",
    "        if not show_spaces_between_segments:\n",
    "            sentence_html = sentence_html.replace(\" \", \"\")\n",
    "        w_sentence.value = f'<p style=\"font-size: xx-large\">{sentence_html}</p>'\n",
    "    else:\n",
    "        w_sentence.value = ''\n",
    "\n",
    "\n",
    "def update_sample(sample):\n",
    "    global teacher_rec\n",
    "    global sentence\n",
    "\n",
    "    sentence = Sentence(sample[\"sentence\"])\n",
    "\n",
    "    with w_sentence.hold_sync(), w_audio.hold_sync(), fig_teacher.hold_sync(), fig_student.hold_sync(), fig_cmp.hold_sync(), w_cmp_result.hold_sync():\n",
    "        update_sentence()\n",
    "\n",
    "        teacher_rec = SpeechRecord(sample['filename'], sentence, name=\"Teacher\");\n",
    "\n",
    "        w_audio.value = get_sample_audio_data(sample);\n",
    "    \n",
    "        fig_teacher.update_data(teacher_rec);\n",
    "        fig_student.clear();\n",
    "        fig_cmp.clear();\n",
    "    \n",
    "        w_cmp_result.value = \"\"\n",
    "\n",
    "\n",
    "update_sample(w_sample_selector.selected_sample());\n",
    "\n",
    "\n",
    "def sample_changed(change):\n",
    "    update_sample(dict(change[\"new\"]));\n",
    "\n",
    "w_sample_selector.observe(sample_changed, 'value')\n",
    "\n",
    "\n",
    "def get_student_wav_filename():\n",
    "    try:\n",
    "        w_recorder.save('test.webm')\n",
    "    except ValueError as exc:\n",
    "        if str(exc).startswith('No data'):\n",
    "            w_cmp_result.value = f\"Record something first !\"\n",
    "        raise exc\n",
    "            \n",
    "    !ffmpeg -hide_banner -loglevel error -y -i test.webm -ar 16000 -ac 1 test.wav\n",
    "    return 'test.wav'\n",
    "\n",
    "\n",
    "def run_compare(_):\n",
    "    global teacher_rec\n",
    "    global student_rec\n",
    "\n",
    "    student_wav_filename = get_student_wav_filename()\n",
    "    # Alternatively, here is a sample:\n",
    "    #student_wav_filename = \"data/itsu_ga_ii_ka_wakarimasen.wav\"\n",
    "\n",
    "    student_rec = SpeechRecord(student_wav_filename, sentence, name=\"Student\");\n",
    "    fig_student.update_data(student_rec);\n",
    "    \n",
    "    alignment_method = w_alignment_method_dropdown.value\n",
    "\n",
    "    try:\n",
    "        student_rec.align_with(teacher_rec, method=alignment_method)\n",
    "        mean_distance = student_rec.compare_pitch();\n",
    "        w_cmp_result.value = f\"Success !\\nMean distance = {mean_distance:.2f}\"\n",
    "    except Exception as exc:\n",
    "        w_cmp_result.value = \"FAILED !\"\n",
    "        raise exc\n",
    "\n",
    "    fig_cmp.update_data(teacher_rec, student_rec)\n",
    "\n",
    "\n",
    "w_compare_btn.on_click(run_compare)\n",
    "\n",
    "# Update the comparison if we change the alignment method\n",
    "w_alignment_method_dropdown.observe(run_compare, 'value')\n",
    "\n",
    "\n",
    "# Layout\n",
    "\n",
    "w_tab = widgets.Tab()\n",
    "w_tab.children = [w_sample_selector, w_upload_sample]\n",
    "w_tab.set_title(0, \"Samples\")\n",
    "w_tab.set_title(1, \"Upload new samples\")\n",
    "\n",
    "box = widgets.Box([\n",
    "    widgets.Box([\n",
    "        w_tab,\n",
    "        w_options_accordion\n",
    "    ]),\n",
    "    w_sentence,\n",
    "    widgets.Box([\n",
    "        widgets.VBox([widgets.Label(value=\"Teacher's recording:\"), w_audio], layout=widgets.Layout(width='33%')),\n",
    "        widgets.VBox([widgets.Label(value=\"Your recording:\"), w_recorder], layout=widgets.Layout(width='33%')),\n",
    "        widgets.VBox([w_compare_btn, w_cmp_result], layout=widgets.Layout(width='33%')),\n",
    "    ]),\n",
    "    fig_cmp,\n",
    "    fig_student,\n",
    "    fig_teacher,\n",
    "], layout=widgets.Layout(display=\"flex\", flex_flow=\"column\", align_items=\"stretch\", align_content=\"center\")\n",
    ")\n",
    "\n",
    "display(box)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a13585bc-778a-4899-9bad-489d4646d299",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}