{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "9a748d62-0cb4-432f-bd85-85a104fc9189", "metadata": {}, "outputs": [], "source": [ "from src.unmask import umaskall\n", "import ipywidgets as widgets" ] }, { "cell_type": "code", "execution_count": null, "id": "a04319b5-5f27-4049-b822-7b77209d8126", "metadata": {}, "outputs": [], "source": [ "text = widgets.Textarea(value=\"it _is a [RB] useful _\", layout=widgets.Layout(width='100%', height=\"10em\"))\n", "top_k = widgets.IntText(layout=widgets.Layout(width='10%',))\n", "top_k2 = widgets.IntSlider(min=1, max=100, value=10, step=1, description=\"Top-K\", readout=False, \n", " layout=widgets.Layout(width='90%',), style = {'description_width': 'auto'})\n", "widgets.jslink((top_k2, 'value'), (top_k, 'value'))\n", "split_stences = widgets.Checkbox(value=True, description=\"stence by stence\", indent=False)\n", "single_mask = widgets.Checkbox(value=False, description=\"one blank at a time\", indent=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "aa418240-afff-4c12-aad2-77be3b24d5fc", "metadata": {}, "outputs": [], "source": [ "import codecs\n", "file = widgets.FileUpload(multiple=False, description=\"Load File\", accept=\".txt\")\n", "def on_upload_change(change):\n", " c = get_content(change['owner'])\n", " if c:\n", " text.value = c\n", " on_button_clicked(button)\n", "file.observe(on_upload_change, names='_counter')\n", "\n", "def get_content(file):\n", " file = file.value\n", " if isinstance(file, dict):\n", " c = next(iter(file.values()))['content']\n", " else:\n", " c = file[0]['content']\n", " return codecs.decode(c, encoding=\"utf-8\")" ] }, { "cell_type": "code", "execution_count": null, "id": "6344db76-b4ef-4543-8b2c-5c2a8e7cd08d", "metadata": {}, "outputs": [], "source": [ "import sys\n", "class MyWriter:\n", " def __init__(self, file):\n", " self.logfile = file\n", " def write(self, text):\n", " sys.stdout.write(text)\n", " self.logfile.write(text)\n", " def close(self):\n", " sys.stdout.close()\n", " self.logfile.close()" ] }, { "cell_type": "code", "execution_count": null, "id": "db43299c-373f-4f59-a44a-a0d31df0ab0d", "metadata": {}, "outputs": [], "source": [ "button = widgets.Button(description=\"Go\", button_style=\"success\", \n", " icon=\"arrow-alt-circle-right\", layout=widgets.Layout(width='30%'))\n", "out = widgets.Output()\n", "\n", "@out.capture()\n", "def on_button_clicked(b):\n", " out.clear_output()\n", " with open(\"output-holly_cloze.txt\", 'w', encoding=\"utf-8\") as f:\n", " umaskall(text.value, top_k=top_k.value, \n", " split_stences=split_stences.value, single_mask=single_mask.value,\n", " io=MyWriter(f))\n", " \n", " \n", "button.on_click(on_button_clicked)" ] }, { "cell_type": "code", "execution_count": null, "id": "3e2ed6e6-abf8-4ad7-8f82-a7a6bdcc6fbf", "metadata": {}, "outputs": [], "source": [ "tpk = widgets.HBox(children=[top_k2, top_k])\n", "chb = widgets.HBox(children=[split_stences, single_mask])\n", "btn = widgets.HBox(children=[button, file])\n", "display(tpk, chb, text, btn, out)" ] }, { "cell_type": "markdown", "id": "fc320627-07b4-447c-9ac3-69d0b9e9b9da", "metadata": { "tags": [] }, "source": [ "***\n", "* 挖空的词可以用数字或下划线`_`开头,如:`It's a _great tool` 或 `It's 1pretty 2useful`\n", "* 挖空的词也可以用中括号指定词性,`|`表示逻辑或,如:`it is a [RB] useful __.` 或 `[NN|PRP] is [adj]`\n", "* 默认自动补全末尾句号,如不希望补全,在末尾加反斜杠`\\`,如:`I am _`和`I am _\\`的区别\n", "* 当输入是完整句子而非短语时效果会更好,整段话或者整篇文章亦可。`split_stences`控制自动拆分成单句或段落多次输入\n", "\n", "### 词性粗标签\n", "\n", "|Tag|Description|释义|例子|\n", "| - | - | - | - |\n", "|ADJ|adjective|形容词|new, good, high, special, big, local|\n", "|ADP|adposition|介词|on, of, at, with, by, into, under|\n", "|ADV|adverb|副词|really, already, still, early, now|\n", "|CONJ|conjunction|连词|and, or, but, if, while, although|\n", "|DET|determiner, article|限定词|the, a, some, most, every, no, which|\n", "|NOUN|noun|名词|year, home, costs, time, Africa|\n", "|NUM|numeral|数词|twenty-four, fourth, 1991, 14:24|\n", "|PRT|particle|虚词|at, on, out, over per, that, up, with|\n", "|PRON|pronoun|代词|he, their, her, its, my, I, us|\n", "|VERB|verb|动词|is, say, told, given, playing, would|\n", "|.|punctuation marks|标点符号|. , ; !|\n", "|X|other|其它|ersatz, esprit, dunno, gr8, univeristy|\n", "\n", "### 词性细标签\n", "\n", "|Tag|Description|释义|例子|\n", "| - | - | - | - |\n", "|CC|Coordinating conjunction|连词|and, or,but, if, while,although|\n", "|CD|Cardinal number|数词|twenty-four, fourth, 1991,14:24|\n", "|DT|Determiner|限定词|the, a, some, most,every, no|\n", "|EX|Existential there|存在量词|there, there’s|\n", "|FW|Foreign word|外来词|dolce, ersatz, esprit, quo,maitre|\n", "|IN|Preposition or subordinating conjunction|介词连词|on, of,at, with,by,into, under|\n", "|JJ|Adjective|形容词|new,good, high, special, big, local|\n", "|JJR|Adjective, comparative|比较级词语|bleaker braver breezier briefer brighter brisker|\n", "|JJS|Adjective, superlative|最高级词语|calmest cheapest choicest classiest cleanest clearest|\n", "|LS|List item marker|标记|A A. B B. C C. D E F First G H I J K|\n", "|MD|Modal|情态动词|can cannot could couldn’t|\n", "|NN|Noun, singular or mass|名词|year,home, costs, time, education|\n", "|NNS|Noun, plural|名词复数|undergraduates scotches|\n", "|NNP|Proper noun, singular|专有名词|Alison,Africa,April,Washington|\n", "|NNPS|Proper noun, plural|专有名词复数|Americans Americas Amharas Amityvilles|\n", "|PDT|Predeterminer|前限定词|all both half many|\n", "|POS|Possessive ending|所有格标记|’ ‘s|\n", "|PRP|Personal pronoun|人称代词|hers herself him himself hisself|\n", "|PRP\\$|Possessive pronoun|所有格|her his mine my our ours|\n", "|RB|Adverb|副词|occasionally unabatingly maddeningly|\n", "|RBR|Adverb, comparative|副词比较级|further gloomier grander|\n", "|RBS|Adverb, superlative|副词最高级|best biggest bluntest earliest|\n", "|RP|Particle|虚词|aboard about across along apart|\n", "|SYM|Symbol|符号|% & ’ ” ”. ) )|\n", "|TO|to|词to|to|\n", "|UH|Interjection|感叹词|Goodbye Goody Gosh Wow|\n", "|VB|Verb, base form|动词|ask assemble assess|\n", "|VBD|Verb, past tense|动词过去式|dipped pleaded swiped|\n", "|VBG|Verb, gerund or present participle|动词现在分词|telegraphing stirring focusing|\n", "|VBN|Verb, past participle|动词过去分词|multihulled dilapidated aerosolized|\n", "|VBP|Verb, non-3rd person singular present|动词现在式非第三人称时态|predominate wrap resort sue|\n", "|VBZ|Verb, 3rd person singular present|动词现在式第三人称时态|bases reconstructs marks|\n", "|WDT|Wh-determiner|Wh限定词|who,which,when,what,where,how|\n", "|WP|Wh-pronoun|WH代词|that what whatever|\n", "|WP\\$|Possessive wh-pronoun|WH代词所有格|whose|\n", "|WRB|Wh-adverb|WH副词||\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.11" } }, "nbformat": 4, "nbformat_minor": 5 }