{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9a748d62-0cb4-432f-bd85-85a104fc9189",
   "metadata": {},
   "outputs": [],
   "source": [
    "from src.unmask import umaskall\n",
    "import ipywidgets as widgets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a04319b5-5f27-4049-b822-7b77209d8126",
   "metadata": {},
   "outputs": [],
   "source": [
    "text = widgets.Textarea(value=\"it _is a [RB] useful _\", layout=widgets.Layout(width='100%', height=\"10em\"))\n",
    "top_k = widgets.IntText(layout=widgets.Layout(width='10%',))\n",
    "top_k2 = widgets.IntSlider(min=1, max=100, value=10, step=1, description=\"Top-K\", readout=False, \n",
    "                           layout=widgets.Layout(width='90%',), style = {'description_width': 'auto'})\n",
    "widgets.jslink((top_k2, 'value'), (top_k, 'value'))\n",
    "split_stences = widgets.Checkbox(value=True, description=\"stence by stence\", indent=False)\n",
    "single_mask = widgets.Checkbox(value=False, description=\"one blank at a time\", indent=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aa418240-afff-4c12-aad2-77be3b24d5fc",
   "metadata": {},
   "outputs": [],
   "source": [
    "import codecs\n",
    "file = widgets.FileUpload(multiple=False, description=\"Load File\", accept=\".txt\")\n",
    "def on_upload_change(change):\n",
    "    c = get_content(change['owner'])\n",
    "    if c:\n",
    "        text.value = c\n",
    "        on_button_clicked(button)\n",
    "file.observe(on_upload_change, names='_counter')\n",
    "\n",
    "def get_content(file):\n",
    "    file = file.value\n",
    "    if isinstance(file, dict):\n",
    "        c = next(iter(file.values()))['content']\n",
    "    else:\n",
    "        c = file[0]['content']\n",
    "    return codecs.decode(c, encoding=\"utf-8\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6344db76-b4ef-4543-8b2c-5c2a8e7cd08d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "class MyWriter:\n",
    "    def __init__(self, file):\n",
    "        self.logfile = file\n",
    "    def write(self, text):\n",
    "        sys.stdout.write(text)\n",
    "        self.logfile.write(text)\n",
    "    def close(self):\n",
    "        sys.stdout.close()\n",
    "        self.logfile.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "db43299c-373f-4f59-a44a-a0d31df0ab0d",
   "metadata": {},
   "outputs": [],
   "source": [
    "button = widgets.Button(description=\"Go\", button_style=\"success\", \n",
    "                        icon=\"arrow-alt-circle-right\", layout=widgets.Layout(width='30%'))\n",
    "out = widgets.Output()\n",
    "\n",
    "@out.capture()\n",
    "def on_button_clicked(b):\n",
    "    out.clear_output()\n",
    "    with open(\"output-holly_cloze.txt\", 'w', encoding=\"utf-8\") as f:\n",
    "        umaskall(text.value, top_k=top_k.value, \n",
    "                 split_stences=split_stences.value, single_mask=single_mask.value,\n",
    "                 io=MyWriter(f))\n",
    "    \n",
    "    \n",
    "button.on_click(on_button_clicked)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3e2ed6e6-abf8-4ad7-8f82-a7a6bdcc6fbf",
   "metadata": {},
   "outputs": [],
   "source": [
    "tpk = widgets.HBox(children=[top_k2, top_k])\n",
    "chb = widgets.HBox(children=[split_stences, single_mask])\n",
    "btn = widgets.HBox(children=[button, file])\n",
    "display(tpk, chb, text, btn, out)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fc320627-07b4-447c-9ac3-69d0b9e9b9da",
   "metadata": {
    "tags": []
   },
   "source": [
    "***\n",
    "* 挖空的词可以用数字或下划线`_`开头,如:`It's a _great tool` 或 `It's 1pretty 2useful`\n",
    "* 挖空的词也可以用中括号指定词性,`|`表示逻辑或,如:`it is a [RB] useful __.` 或 `[NN|PRP] is [adj]`\n",
    "* 默认自动补全末尾句号,如不希望补全,在末尾加反斜杠`\\`,如:`I am _`和`I am _\\`的区别\n",
    "* 当输入是完整句子而非短语时效果会更好,整段话或者整篇文章亦可。`split_stences`控制自动拆分成单句或段落多次输入\n",
    "\n",
    "### 词性粗标签\n",
    "\n",
    "|Tag|Description|释义|例子|\n",
    "| - | - | - | - |\n",
    "|ADJ|adjective|形容词|new, good, high, special, big, local|\n",
    "|ADP|adposition|介词|on, of, at, with, by, into, under|\n",
    "|ADV|adverb|副词|really, already, still, early, now|\n",
    "|CONJ|conjunction|连词|and, or, but, if, while, although|\n",
    "|DET|determiner, article|限定词|the, a, some, most, every, no, which|\n",
    "|NOUN|noun|名词|year, home, costs, time, Africa|\n",
    "|NUM|numeral|数词|twenty-four, fourth, 1991, 14:24|\n",
    "|PRT|particle|虚词|at, on, out, over per, that, up, with|\n",
    "|PRON|pronoun|代词|he, their, her, its, my, I, us|\n",
    "|VERB|verb|动词|is, say, told, given, playing, would|\n",
    "|.|punctuation marks|标点符号|. , ; !|\n",
    "|X|other|其它|ersatz, esprit, dunno, gr8, univeristy|\n",
    "\n",
    "### 词性细标签\n",
    "\n",
    "|Tag|Description|释义|例子|\n",
    "| - | - | - | - |\n",
    "|CC|Coordinating conjunction|连词|and, or,but, if, while,although|\n",
    "|CD|Cardinal number|数词|twenty-four, fourth, 1991,14:24|\n",
    "|DT|Determiner|限定词|the, a, some, most,every, no|\n",
    "|EX|Existential there|存在量词|there, there’s|\n",
    "|FW|Foreign word|外来词|dolce, ersatz, esprit, quo,maitre|\n",
    "|IN|Preposition or subordinating conjunction|介词连词|on, of,at, with,by,into, under|\n",
    "|JJ|Adjective|形容词|new,good, high, special, big, local|\n",
    "|JJR|Adjective, comparative|比较级词语|bleaker braver breezier briefer brighter brisker|\n",
    "|JJS|Adjective, superlative|最高级词语|calmest cheapest choicest classiest cleanest clearest|\n",
    "|LS|List item marker|标记|A A. B B. C C. D E F First G H I J K|\n",
    "|MD|Modal|情态动词|can cannot could couldn’t|\n",
    "|NN|Noun, singular or mass|名词|year,home, costs, time, education|\n",
    "|NNS|Noun, plural|名词复数|undergraduates scotches|\n",
    "|NNP|Proper noun, singular|专有名词|Alison,Africa,April,Washington|\n",
    "|NNPS|Proper noun, plural|专有名词复数|Americans Americas Amharas Amityvilles|\n",
    "|PDT|Predeterminer|前限定词|all both half many|\n",
    "|POS|Possessive ending|所有格标记|’ ‘s|\n",
    "|PRP|Personal pronoun|人称代词|hers herself him himself hisself|\n",
    "|PRP\\$|Possessive pronoun|所有格|her his mine my our ours|\n",
    "|RB|Adverb|副词|occasionally unabatingly maddeningly|\n",
    "|RBR|Adverb, comparative|副词比较级|further gloomier grander|\n",
    "|RBS|Adverb, superlative|副词最高级|best biggest bluntest earliest|\n",
    "|RP|Particle|虚词|aboard about across along apart|\n",
    "|SYM|Symbol|符号|% & ’ ” ”. ) )|\n",
    "|TO|to|词to|to|\n",
    "|UH|Interjection|感叹词|Goodbye Goody Gosh Wow|\n",
    "|VB|Verb, base form|动词|ask assemble assess|\n",
    "|VBD|Verb, past tense|动词过去式|dipped pleaded swiped|\n",
    "|VBG|Verb, gerund or present participle|动词现在分词|telegraphing stirring focusing|\n",
    "|VBN|Verb, past participle|动词过去分词|multihulled dilapidated aerosolized|\n",
    "|VBP|Verb, non-3rd person singular present|动词现在式非第三人称时态|predominate wrap resort sue|\n",
    "|VBZ|Verb, 3rd person singular present|动词现在式第三人称时态|bases reconstructs marks|\n",
    "|WDT|Wh-determiner|Wh限定词|who,which,when,what,where,how|\n",
    "|WP|Wh-pronoun|WH代词|that what whatever|\n",
    "|WP\\$|Possessive wh-pronoun|WH代词所有格|whose|\n",
    "|WRB|Wh-adverb|WH副词||\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}