{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "from local.torch_basics import *\n", "from local.test import *\n", "from local.core import *\n", "from local.data.all import *\n", "from local.text.core import *" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from local.notebook.showdoc import *" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#default_exp text.data\n", "#default_cls_lvl 3" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Text data\n", "\n", "> Functions and transforms to help gather text data in a `DataSource`" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Numericalizing" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def make_vocab(count, min_freq=3, max_vocab=60000):\n", " \"Create a vocab of `max_vocab` size from `Counter` `count` with items present more than `min_freq`\"\n", " vocab = [o for o,c in count.most_common(max_vocab) if c >= min_freq]\n", " for o in reversed(defaults.text_spec_tok): #Make sure all special tokens are in the vocab\n", " if o in vocab: vocab.remove(o)\n", " vocab.insert(0, o)\n", " vocab = vocab[:max_vocab]\n", " return vocab + [f'xxfake' for i in range(0, 8-len(vocab)%8)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "count = Counter(['a', 'a', 'a', 'a', 'b', 'b', 'c', 'c', 'd'])\n", "test_eq(set([x for x in make_vocab(count) if not x.startswith('xxfake')]), \n", " set(defaults.text_spec_tok + 'a'.split()))\n", "test_eq(len(make_vocab(count))%8, 0)\n", "test_eq(set([x for x in make_vocab(count, min_freq=1) if not x.startswith('xxfake')]), \n", " set(defaults.text_spec_tok + 'a b c d'.split()))\n", "test_eq(set([x for x in make_vocab(count,max_vocab=12, min_freq=1) if not x.startswith('xxfake')]), \n", " set(defaults.text_spec_tok + 'a b c'.split()))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "class TensorText(TensorBase): pass\n", "class LMTensorText(TensorText): pass" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# export\n", "class Numericalize(Transform):\n", " \"Reversible transform of tokenized texts to numericalized ids\"\n", " def __init__(self, vocab=None, min_freq=3, max_vocab=60000, sep=' '):\n", " self.vocab,self.min_freq,self.max_vocab,self.sep = vocab,min_freq,max_vocab,sep\n", " self.o2i = None if vocab is None else defaultdict(int, {v:k for k,v in enumerate(vocab)})\n", "\n", " def setup(self, dsrc):\n", " if dsrc is None: return\n", " if self.vocab is None:\n", " count = Counter(p for o in dsrc for p in o)\n", " self.vocab = make_vocab(count, min_freq=self.min_freq, max_vocab=self.max_vocab)\n", " self.o2i = defaultdict(int, {v:k for k,v in enumerate(self.vocab) if v != 'xxfake'})\n", "\n", " def encodes(self, o): return TensorText(tensor([self.o2i [o_] for o_ in o]))\n", " def decodes(self, o): return Str(self.sep.join([self.vocab[o_] for o_ in o if self.vocab[o_] != PAD]))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "num = Numericalize(min_freq=1, sep=' ')\n", "num.setup(L('This is an example of text'.split(), 'this is another text'.split()))\n", "test_eq(set([x for x in num.vocab if not x.startswith('xxfake')]), \n", " set(defaults.text_spec_tok + 'This is an example of text this another'.split()))\n", "test_eq(len(num.vocab)%8, 0)\n", "start = 'This is an example of text'\n", "t = num(start.split())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(t, tensor([11, 9, 12, 13, 14, 10]))\n", "test_eq(num.decode(t), start)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "num = Numericalize(min_freq=2, sep=' ')\n", "num.setup(L('This is an example of text'.split(), 'this is another text'.split()))\n", "test_eq(set([x for x in num.vocab if not x.startswith('xxfake')]), \n", " set(defaults.text_spec_tok + 'is text'.split()))\n", "test_eq(len(num.vocab)%8, 0)\n", "t = num(start.split())\n", "test_eq(t, tensor([0, 9, 0, 0, 0, 10]))\n", "test_eq(num.decode(t), f'{UNK} is {UNK} {UNK} {UNK} text')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## LM_DataLoader -" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "#TODO: add backward\n", "@delegates()\n", "class LMDataLoader(TfmdDL):\n", " def __init__(self, dataset, lens=None, cache=2, bs=64, seq_len=72, num_workers=0, **kwargs):\n", " super().__init__(dataset=dataset, bs=bs, num_workers=num_workers, **kwargs)\n", " self.items = ReindexCollection([(o[0] if isinstance(o, tuple) else o)\n", " for o in dataset], cache=cache)\n", " self.seq_len = seq_len\n", " if lens is None: lens = [len(o) for o in self.items]\n", " self.lens = ReindexCollection(lens, idxs=self.items.idxs)\n", " # The \"-1\" is to allow for final label\n", " self.m = round_multiple(sum(lens)-1, bs*seq_len, round_down=True)\n", " self.n = self.m//(seq_len)\n", " self.spb = self.n//bs\n", " self.make_chunks()\n", "\n", " def make_chunks(self): self.chunks = Chunks(self.items, self.lens)\n", " def shuffle_fn(self,idxs):\n", " self.items.shuffle()\n", " self.make_chunks()\n", " return idxs\n", "\n", " def create_item(self, seq):\n", " if seq>=self.n: raise IndexError\n", " st = ((seq%self.bs)*self.spb + (seq//self.bs)) * self.seq_len\n", " txt = self.chunks[st : st+self.seq_len+1]\n", " return LMTensorText(txt[:-1]),txt[1:]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "bs,sl = 4,3\n", "ints = L([0,1,2,3,4],[5,6,7,8,9,10],[11,12,13,14,15,16,17,18],[19,20],[21,22,23],[24]).map(tensor)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dl = LMDataLoader(ints, bs=bs, seq_len=sl)\n", "test_eq(list(dl),\n", " [[tensor([[0, 1, 2], [6, 7, 8], [12, 13, 14], [18, 19, 20]]),\n", " tensor([[1, 2, 3], [7, 8, 9], [13, 14, 15], [19, 20, 21]])],\n", " [tensor([[3, 4, 5], [ 9, 10, 11], [15, 16, 17], [21, 22, 23]]),\n", " tensor([[4, 5, 6], [10, 11, 12], [16, 17, 18], [22, 23, 24]])]])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#hide\n", "#Check lens work\n", "dl = LMDataLoader(ints, lens=ints.map(len), bs=bs, seq_len=sl)\n", "test_eq(list(dl),\n", " [[tensor([[0, 1, 2], [6, 7, 8], [12, 13, 14], [18, 19, 20]]),\n", " tensor([[1, 2, 3], [7, 8, 9], [13, 14, 15], [19, 20, 21]])],\n", " [tensor([[3, 4, 5], [ 9, 10, 11], [15, 16, 17], [21, 22, 23]]),\n", " tensor([[4, 5, 6], [10, 11, 12], [16, 17, 18], [22, 23, 24]])]])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dl = LMDataLoader(ints, bs=bs, seq_len=sl, shuffle=True)\n", "for x,y in dl: test_eq(x[:,1:], y[:,:-1])\n", "((x0,y0), (x1,y1)) = tuple(dl)\n", "#Second batch begins where first batch ended\n", "test_eq(y0[:,-1], x1[:,0]) \n", "test_eq(type(x0), LMTensorText)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Showing" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "@typedispatch\n", "def show_batch(x: TensorText, y, samples, ctxs=None, max_n=10, **kwargs):\n", " if ctxs is None: ctxs = get_empty_df(min(len(samples), max_n))\n", " ctxs = show_batch[object](x, y, samples, max_n=max_n, ctxs=ctxs, **kwargs)\n", " display_df(pd.DataFrame(ctxs))\n", " return ctxs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "@typedispatch\n", "def show_batch(x: LMTensorText, y, samples, ctxs=None, max_n=10, **kwargs):\n", " return show_batch[TensorText](x, None, samples, ctxs=ctxs, max_n=max_n, **kwargs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Integration example" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labeltextis_valid
0negativeUn-bleeping-believable! Meg Ryan doesn't even look her usual pert lovable self in this, which normally makes me forgive her shallow ticky acting schtick. Hard to believe she was the producer on this dog. Plus Kevin Kline: what kind of suicide trip has his career been on? Whoosh... Banzai!!! Finally this was directed by the guy who did Big Chill? Must be a replay of Jonestown - hollywood style. Wooofff!False
1positiveThis is a extremely well-made film. The acting, script and camera-work are all first-rate. The music is good, too, though it is mostly early in the film, when things are still relatively cheery. There are no really superstars in the cast, though several faces will be familiar. The entire cast does an excellent job with the script.<br /><br />But it is hard to watch, because there is no good end to a situation like the one presented. It is now fashionable to blame the British for setting Hindus and Muslims against each other, and then cruelly separating them into two countries. There is som...False
\n", "
" ], "text/plain": [ " label \\\n", "0 negative \n", "1 positive \n", "\n", " text \\\n", "0 Un-bleeping-believable! Meg Ryan doesn't even look her usual pert lovable self in this, which normally makes me forgive her shallow ticky acting schtick. Hard to believe she was the producer on this dog. Plus Kevin Kline: what kind of suicide trip has his career been on? Whoosh... Banzai!!! Finally this was directed by the guy who did Big Chill? Must be a replay of Jonestown - hollywood style. Wooofff! \n", "1 This is a extremely well-made film. The acting, script and camera-work are all first-rate. The music is good, too, though it is mostly early in the film, when things are still relatively cheery. There are no really superstars in the cast, though several faces will be familiar. The entire cast does an excellent job with the script.

But it is hard to watch, because there is no good end to a situation like the one presented. It is now fashionable to blame the British for setting Hindus and Muslims against each other, and then cruelly separating them into two countries. There is som... \n", "\n", " is_valid \n", "0 False \n", "1 False " ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "path = untar_data(URLs.IMDB_SAMPLE)\n", "df = pd.read_csv(path/'texts.csv')\n", "df.head(2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelis_validtext
0negativeFalse[xxbos, xxmaj, un, -, bleeping, -, believable, !, xxmaj, meg, xxmaj, ryan, does, n't, even, look, her, usual, pert, lovable, self, in, this, ,, which, normally, makes, me, forgive, her, shallow, ticky, acting, schtick, ., xxmaj, hard, to, believe, she, was, the, producer, on, this, dog, ., xxmaj, plus, xxmaj, kevin, xxmaj, kline, :, what, kind, of, suicide, trip, has, his, career, been, on, ?, xxmaj, whoosh, …, xxmaj, banzai, xxrep, 3, !, xxmaj, finally, this, was, directed, by, the, guy, who, did, xxmaj, big, xxmaj, chill, ?, xxmaj, must, be, a, replay, of, xxmaj, jonestown, -, hollywood,...
1positiveFalse[xxbos, xxmaj, this, is, a, extremely, well, -, made, film, ., xxmaj, the, acting, ,, script, and, camera, -, work, are, all, first, -, rate, ., xxmaj, the, music, is, good, ,, too, ,, though, it, is, mostly, early, in, the, film, ,, when, things, are, still, relatively, cheery, ., xxmaj, there, are, no, really, superstars, in, the, cast, ,, though, several, faces, will, be, familiar, ., xxmaj, the, entire, cast, does, an, excellent, job, with, the, script, ., \\n\\n, xxmaj, but, it, is, hard, to, watch, ,, because, there, is, no, good, end, to, a, situation, like, the, one, ...]
\n", "
" ], "text/plain": [ " label is_valid \\\n", "0 negative False \n", "1 positive False \n", "\n", " text \n", "0 [xxbos, xxmaj, un, -, bleeping, -, believable, !, xxmaj, meg, xxmaj, ryan, does, n't, even, look, her, usual, pert, lovable, self, in, this, ,, which, normally, makes, me, forgive, her, shallow, ticky, acting, schtick, ., xxmaj, hard, to, believe, she, was, the, producer, on, this, dog, ., xxmaj, plus, xxmaj, kevin, xxmaj, kline, :, what, kind, of, suicide, trip, has, his, career, been, on, ?, xxmaj, whoosh, …, xxmaj, banzai, xxrep, 3, !, xxmaj, finally, this, was, directed, by, the, guy, who, did, xxmaj, big, xxmaj, chill, ?, xxmaj, must, be, a, replay, of, xxmaj, jonestown, -, hollywood,... \n", "1 [xxbos, xxmaj, this, is, a, extremely, well, -, made, film, ., xxmaj, the, acting, ,, script, and, camera, -, work, are, all, first, -, rate, ., xxmaj, the, music, is, good, ,, too, ,, though, it, is, mostly, early, in, the, film, ,, when, things, are, still, relatively, cheery, ., xxmaj, there, are, no, really, superstars, in, the, cast, ,, though, several, faces, will, be, familiar, ., xxmaj, the, entire, cast, does, an, excellent, job, with, the, script, ., \\n\\n, xxmaj, but, it, is, hard, to, watch, ,, because, there, is, no, good, end, to, a, situation, like, the, one, ...] " ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_tok,count = tokenize_df(df, 'text')\n", "df_tok.head(2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "splits = RandomSplitter()(range_of((df_tok)))\n", "tfm = Numericalize(make_vocab(count))\n", "dsrc = DataSource(df_tok, [[attrgetter('text'), tfm]], splits=splits, dl_type=LMDataLoader)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "xxbos xxmaj cheap , mediocre sequel to the successful \" the xxmaj mummy 's xxmaj hand \" has presumably dead evil xxmaj professor xxunk xxmaj xxunk his predecessor xxmaj xxunk xxunk xxmaj xxunk the quest of revenge xxunk to xxmaj america using mummy xxunk xxmaj xxunk , xxmaj jr who has no reason being in the disguise .. any stunt man could do the same credible work xxunk around and choking xxunk the goal of killing the surviving members of the xxmaj banning family whose patriarch xxunk xxmaj xxunk assistant xxmaj babe xxunk xxmaj xxunk the xxunk corpse of xxmaj princess xxmaj xxunk from her xxunk in xxunk .. xxunk considers this an outlandish act of xxunk and wants the family to suffer for doing such an awful xxunk towards an ancient xxmaj xxunk xxunk . xxmaj bey and the mummy xxmaj kharis find a nice xxunk in a xxunk where the xxmaj high xxmaj priest of xxmaj xxunk can work as a xxunk in disguise . xxmaj every xxmaj full xxmaj moon , xxmaj bey will xxunk xxmaj kharis a form of liquid xxunk of several xxmaj xxunk leaves which will keep him not only alive but xxunk to his master 's wishes . xxmaj bey xxunk xxmaj kharis to kill xxmaj stephen and his sister xxunk xxmaj gordon ) , while also xxunk time for xxmaj babe to return so that he will become victim # 3 . xxmaj dr . xxmaj john xxunk xxmaj xxunk to we d xxunk xxmaj xxunk ) , but does n't know that xxmaj bey secretly xxunk his xxunk making plans to xxunk her with xxmaj kharis ' help . xxmaj john 's life is in danger because of his father .. he 's also the last remaining member of the xxmaj banning line . xxmaj if xxmaj bey has xxmaj xxunk , there 's no chance of any more xxmaj xxunk being born . xxmaj the police must find xxmaj kharis and the one responsible for his carnage .. bey . \n", "\n", " xxmaj this film is a xxunk from xxup hand set years later as members of that film , xxmaj xxunk , xxmaj ford & xxmaj xxunk all appear in \" aging \" make - up providing xxunk showing the gaps in time as xxmaj xxunk has been preparing for the deaths of the xxmaj xxunk . xxmaj the xxunk romance of xxmaj john and xxmaj xxunk seems merely in this plot so that xxmaj bey will screw up xxunk his perfect plan which was being carried out successfully before he xxunk it up . xxmaj and , xxmaj bey merely sees her xxunk with xxmaj john on the grass .. the whole \" love - at - first - sight \" rubbish really did n't xxunk for me . xxmaj plus you have the mummy being able to kill people with one arm .. is any mummy really xxup that powerful ? xxmaj this film also uses a xxunk of footage from the previous film to save budget on this sequel to it . xxmaj there really is n't that much story here and yes , typical of xxmaj universal monster pictures , even in xxmaj america a mob of people will light .. ho hum .. xxunk going after xxmaj kharis . xxmaj you know how it 'll end .. john and the super - powerful xxmaj kharis will xxunk off in some huge mansion with fire burning all around them with the evil one being xxunk in xxunk .\n" ] } ], "source": [ "show_at(dsrc.train, 0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dbunch = dsrc.databunch(bs=16, seq_len=72)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
texttext_
0xxbos xxmaj as far as cinematography goes , this film was pretty good for the mid 50 's . xxmaj there were a few times that the lighting was way too hot but the shots were generally in frame and stayed in focus . xxmaj the acting was above average for a low budget stinker but the direction was horrible . xxmaj several scenes were dragged out way too long in anxxmaj as far as cinematography goes , this film was pretty good for the mid 50 's . xxmaj there were a few times that the lighting was way too hot but the shots were generally in frame and stayed in focus . xxmaj the acting was above average for a low budget stinker but the direction was horrible . xxmaj several scenes were dragged out way too long in an attempt
1giancarlo 's ) was more successful or xxunk or more interesting . xxmaj it merely gives us a rewarding glimpse at what it must be like to search for identity when two countries and xxunk are involved . and this look is not driven by xxunk or disdain to either country , which makes it such a great film for any and every country dealing with the xxunk resulting from xxunk .'s ) was more successful or xxunk or more interesting . xxmaj it merely gives us a rewarding glimpse at what it must be like to search for identity when two countries and xxunk are involved . and this look is not driven by xxunk or disdain to either country , which makes it such a great film for any and every country dealing with the xxunk resulting from xxunk . xxmaj
2curious about xxmaj xxunk or the homosexual undertones , do yourself a favor and find a better xxmaj western . xxbos xxmaj halfway through xxmaj xxunk xxmaj koltai 's \" evening , \" a woman on her deathbed asks a figure appearing in her hallucination : \" can you tell me where my life went ? \" xxmaj the line could be embarrassingly theatrical , but the woman speaking it is xxmajabout xxmaj xxunk or the homosexual undertones , do yourself a favor and find a better xxmaj western . xxbos xxmaj halfway through xxmaj xxunk xxmaj koltai 's \" evening , \" a woman on her deathbed asks a figure appearing in her hallucination : \" can you tell me where my life went ? \" xxmaj the line could be embarrassingly theatrical , but the woman speaking it is xxmaj vanessa
3want to watch some terrible movie - then xxmaj xxunk 's xxmaj xxunk is definitely the choice , but xxunk my advice and do n't rate it by xxunk means . xxbos xxmaj as has been noted , this formula has been filmed several times , most recently as \" you 've xxmaj got xxmaj mail \" , with xxmaj tom xxmaj hanks and xxunk xxmaj xxunk \" xxmaj ryan . xxmajto watch some terrible movie - then xxmaj xxunk 's xxmaj xxunk is definitely the choice , but xxunk my advice and do n't rate it by xxunk means . xxbos xxmaj as has been noted , this formula has been filmed several times , most recently as \" you 've xxmaj got xxmaj mail \" , with xxmaj tom xxmaj hanks and xxunk xxmaj xxunk \" xxmaj ryan . xxmaj of
4they do n't even show him steal away a xxunk from his poor creators ( maybe he was trying to create the head detective from xxmaj in xxmaj living xxmaj color ) . xxmaj so after these five have been killed the teacher and the nerdy girl are all who is left , the teacher figures out that xxmaj jigsaw only needs a head to finish his masterpiece , since he isdo n't even show him steal away a xxunk from his poor creators ( maybe he was trying to create the head detective from xxmaj in xxmaj living xxmaj color ) . xxmaj so after these five have been killed the teacher and the nerdy girl are all who is left , the teacher figures out that xxmaj jigsaw only needs a head to finish his masterpiece , since he is still
5the majority of the movie going public is stupid . xxmaj they must be right because xxmaj the majority of people actually liked this film . i mean xxunk xxunk in a matter of seconds . xxmaj the secret treasure room hidden under the xxmaj manhattan xxunk ? xxmaj you 'd think with all the work that 's gone on in xxmaj new xxmaj york underground xxmaj that room would have beenmajority of the movie going public is stupid . xxmaj they must be right because xxmaj the majority of people actually liked this film . i mean xxunk xxunk in a matter of seconds . xxmaj the secret treasure room hidden under the xxmaj manhattan xxunk ? xxmaj you 'd think with all the work that 's gone on in xxmaj new xxmaj york underground xxmaj that room would have been discovered
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "dbunch.show_batch(max_n=6)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "b = dbunch.one_batch()\n", "test_eq(type(x), LMTensorText)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(len(dbunch.valid_ds[0][0]), dbunch.valid_dl.lens[0])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Classification" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def pad_input(samples, pad_idx=1, pad_fields=0, pad_first=False, backwards=False):\n", " \"Function that collect samples and adds padding. Flips token order if needed\"\n", " pad_fields = L(pad_fields)\n", " max_len_l = pad_fields.map(lambda f: max([len(s[f]) for s in samples]))\n", " if backwards: pad_first = not pad_first\n", " def _f(field_idx, x):\n", " if field_idx not in pad_fields: return x\n", " idx = pad_fields.items.index(field_idx) #TODO: remove items if L.index is fixed\n", " sl = slice(-len(x), sys.maxsize) if pad_first else slice(0, len(x))\n", " pad = x.new_zeros(max_len_l[idx]-x.shape[0])+pad_idx\n", " x1 = torch.cat([pad, x] if pad_first else [x, pad])\n", " if backwards: x1 = x1.flip(0)\n", " return retain_type(x1, x)\n", " return [tuple(map(lambda idxx: _f(*idxx), enumerate(s))) for s in samples]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(pad_input([(tensor([1,2,3]),1), (tensor([4,5]), 2), (tensor([6]), 3)], pad_idx=0), \n", " [(tensor([1,2,3]),1), (tensor([4,5,0]),2), (tensor([6,0,0]), 3)])\n", "test_eq(pad_input([(tensor([1,2,3]), (tensor([6]))), (tensor([4,5]), tensor([4,5])), (tensor([6]), (tensor([1,2,3])))], pad_idx=0, pad_fields=1), \n", " [(tensor([1,2,3]),(tensor([6,0,0]))), (tensor([4,5]),tensor([4,5,0])), ((tensor([6]),tensor([1, 2, 3])))])\n", "test_eq(pad_input([(tensor([1,2,3]),1), (tensor([4,5]), 2), (tensor([6]), 3)], pad_idx=0, pad_first=True), \n", " [(tensor([1,2,3]),1), (tensor([0,4,5]),2), (tensor([0,0,6]), 3)])\n", "test_eq(pad_input([(tensor([1,2,3]),1), (tensor([4,5]), 2), (tensor([6]), 3)], pad_idx=0, backwards=True), \n", " [(tensor([3,2,1]),1), (tensor([5,4,0]),2), (tensor([6,0,0]), 3)])\n", "x = test_eq(pad_input([(tensor([1,2,3]),1), (tensor([4,5]), 2), (tensor([6]), 3)], pad_idx=0, backwards=True), \n", " [(tensor([3,2,1]),1), (tensor([5,4,0]),2), (tensor([6,0,0]), 3)])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#hide\n", "#Check retain type\n", "x = [(TensorText([1,2,3]),1), (TensorText([4,5]), 2), (TensorText([6]), 3)]\n", "y = pad_input(x, pad_idx=0)\n", "for s in y: test_eq(type(s[0]), TensorText)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def _default_sort(x): return len(x[0])\n", "\n", "@delegates(TfmdDL)\n", "class SortedDL(TfmdDL):\n", " def __init__(self, dataset, sort_func=None, res=None, **kwargs):\n", " super().__init__(dataset, **kwargs)\n", " self.sort_func = _default_sort if sort_func is None else sort_func\n", " self.res = [self.sort_func(self.do_item(i)) for i in range_of(self.dataset)] if res is None else res\n", " self.idx_max = np.argmax(self.res)\n", "\n", " def get_idxs(self):\n", " idxs = super().get_idxs()\n", " if self.shuffle: return idxs\n", " return sorted(idxs, key=lambda i: self.res[i], reverse=True)\n", "\n", " def shuffle_fn(self,idxs):\n", " idxs = np.random.permutation(len(self.dataset))\n", " idx_max = np.extract(idxs==self.idx_max, idxs)[0]\n", " idxs[0],idxs[idx_max] = idxs[idx_max],idxs[0]\n", " sz = self.bs*50\n", " chunks = [idxs[i:i+sz] for i in range(0, len(idxs), sz)]\n", " chunks = [sorted(s, key=lambda i: self.res[i], reverse=True) for s in chunks]\n", " sort_idx = np.concatenate(chunks)\n", "\n", " sz = self.bs\n", " batches = [sort_idx[i:i+sz] for i in range(0, len(sort_idx), sz)]\n", " sort_idx = np.concatenate(np.random.permutation(batches[1:-1])) if len(batches) > 2 else np.array([],dtype=np.int)\n", " sort_idx = np.concatenate((batches[0], sort_idx) if len(batches)==1 else (batches[0], sort_idx, batches[-1]))\n", " return iter(sort_idx)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds = [(tensor([1,2]),1), (tensor([3,4,5,6]),2), (tensor([7]),3), (tensor([8,9,10]),4)]\n", "dl = SortedDL(ds, bs=2, before_batch=partial(pad_input, pad_idx=0))\n", "test_eq(list(dl), [(tensor([[ 3, 4, 5, 6], [ 8, 9, 10, 0]]), tensor([2, 4])), \n", " (tensor([[1, 2], [7, 0]]), tensor([1, 3]))])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds = [(tensor(range(random.randint(1,10))),i) for i in range(101)]\n", "dl = SortedDL(ds, bs=2, create_batch=partial(pad_input, pad_idx=-1), shuffle=True, num_workers=0)\n", "batches = list(dl)\n", "max_len = len(batches[0][0])\n", "for b in batches: \n", " assert(len(b[0])) <= max_len \n", " test_ne(b[0][-1], -1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "splits = RandomSplitter()(range_of(df_tok))\n", "dsrc = DataSource(df_tok, splits=splits, tfms=[\n", " [attrgetter(\"text\"), Numericalize(make_vocab(count))],\n", " [attrgetter(\"label\"), Categorize()]], dl_type=SortedDL)\n", "dbch = dsrc.databunch(before_batch=pad_input)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
textcategory
0xxbos xxmaj raising xxmaj victor xxmaj vargas : a xxmaj review \\n\\n xxmaj you know , xxmaj raising xxmaj victor xxmaj vargas is like sticking your hands into a big , xxunk bowl of xxunk . xxmaj it 's warm and gooey , but you 're not sure if it feels right . xxmaj try as i might , no matter how warm and gooey xxmaj raising xxmaj victor xxmaj vargas became i was always aware that something did n't quite feel right . xxmaj victor xxmaj vargas suffers from a certain xxunk on the director 's part . xxmaj apparently , the director thought that the ethnic backdrop of a xxmaj latino family on the lower east side , and an xxunk storyline would make the film critic proof . xxmaj he was right , but it did n't fool me . xxmaj raising xxmaj victor xxmaj vargas is the story about a xxunk - year old boy called , you guessed it , xxmaj victor xxmaj vargas ( victor xxmaj xxunk ) who lives his teenage years chasing more xxunk than the xxmaj rolling xxmaj xxunk could do in all the years they 've xxunk . xxmaj the movie starts off in ` ugly xxmaj fat ' xxmaj donna 's bedroom where xxmaj victor is sure to seduce her , but a cry from outside xxunk his plans when his best - friend xxmaj harold ( kevin xxmaj xxunk ) comes - a - looking for him . xxmaj caught in the attempt by xxmaj harold and his sister , xxmaj victor xxmaj vargas runs off for xxunk control . xxmaj yet even with the embarrassing implication that he 's been xxunk the xxunk girl in the neighborhood , nothing xxunk young xxmaj victor from going off on the hunt for more fresh meat . xxmaj on a hot , xxmaj new xxmaj york xxmaj city day they make way to the local public swimming pool where xxmaj victor 's eyes catch a glimpse of the lovely young xxunk xxmaj judy ( judy xxmaj xxunk ) , who 's not just pretty , but a strong and independent too . xxmaj the relationship that develops between xxmaj victor and xxmaj judy becomes the focus of the film . xxmaj the story also focuses on xxmaj victor 's family that is comprised of his grandmother or xxunk ( xxunk xxmaj guzman ) , his brother xxmaj nino ( also played by real life brother to xxmaj victor , xxmaj xxunk xxmaj xxunk ) and his sister xxmaj vicky ( xxunk xxmaj xxunk ) . xxmaj the action follows xxmaj victor between scenes with xxmaj judy and scenes with his family . xxmaj victor tries to xxunk with being an oversexed pimp - daddy , his feelings for xxmaj judy and his grandmother 's conservative xxmaj catholic upbringing . \\n\\n xxmaj the problems that xxunk from xxmaj raising xxmaj victor xxmaj vargas are a few , but glaring errors . xxmaj throughout the film you get to know certain characters like xxmaj vicky , xxmaj nino , xxmaj xxunk , xxmaj judy and even xxmaj judy 's best friend xxmaj xxunk . xxmaj the problem is , we know nothing of xxmaj victor xxmaj vargas except that he is the biggest gigolo in the neighborhood . xxmaj we know that he knows how to lick his lips , and xxunk his xxunk , and carry himself for the sake of xxunk girls into the xxunk , but that 's all . xxmaj we know that xxmaj nino plays piano , and quiet well , you could see it by the awards on the family piano . xxmaj we know his sister xxmaj xxunk , is a gossip - loving girl with an xxunk interest in watching xxup tv . xxmaj we know that xxunk is a hard - working traditional xxmaj xxunk woman who 's trying to raise her kids with xxunk in a world of excess corruption . xxmaj yet where is the titular character , xxmaj victor xxmaj vargas ? xxmaj he 's in this movie somewhere , but we only know what the movie tells us . xxmaj this is by far the film 's biggest flaw . xxmaj victor xxmaj vargas is n't so much a character but a xxunk - xxunk ball , xxunk between scenes with xxmaj judy and his xxmaj grandmother , but we never get to know who xxmaj victor xxmaj vargas really is . xxmaj this is important because as xxmaj i 've mentioned the only thing we know of xxmaj victor xxmaj vargas is that he 's a sexually active teenager with a xxunk the size of xxmaj manhattan . xxmaj he 's a total xxmaj xxunk - male . xxmaj victor xxmaj vargas is not the kind of character i sympathize with at all . xxmaj why should anyone ? xxmaj so by the end of the movie , in the aftermath of the climax are we truly led to believe that somehow xxmaj victor xxmaj vargas has attained xxup any depth and learned the errors of his ways ? xxmaj how could such a two - dimensional character have any depth ? xxmaj if only the director had worried a little more about xxunk out his main character instead of worrying about getting that perfect hand - held shot . \\n\\n xxmaj raising xxmaj victor xxmaj vargas brings to life the world of the xxmaj latino inner - city neighborhood to the big screen . xxmaj something that few films have done before in the past . xxmaj the film has been xxunk for feeling so real , and i wo n't \\n\\n argue with that . i have n't seen this level of reality since xxup xxunk aired xxmaj survivor . xxmaj seriously , although the movie has some nice shots of the city , the writer / director xxmaj peter xxmaj sollett was way too xxunk on close - ups and hand - held shots . xxmaj this problem is particularly noticed in xxunk scenes that are so claustrophobic i was forced to perform deep - breathing xxunk to keep from passing out . xxmaj as the film continues , the shots get tighter and tighter with faces xxunk from xxunk to xxunk on the screen ; you can practically xxunk xxmaj victor xxmaj vargas 's cheap xxunk . xxmaj the overall effect is unrealistic in contrast . xxmaj the xxunk scenes of inner - city apartments make them look small and xxunk , which is not true . xxmaj i 've been in those type apartments ; i used to live in one . xxmaj they 're not xxunk but they have high xxunk and they 're decent living xxunk . xxmaj by the movie 's standards you 'd think that these apartments were xxunk xxunk of xxunk - and - xxunk , xxunk paint and xxunk walls . xxmaj unfortunately , xxmaj sollett 's constant use of close - ups and one particularly bad shot with a xxunk - in on one scene come off as totally amateurish . xxmaj but xxmaj raising xxmaj victor xxmaj vargas is only xxmaj sollett 's second film , and his most well known , a solid effort in filmmaking that will hopefully get better as he continues to make films . xxmaj one review i read xxunk the movie as , ` ethnicity for xxmaj ethnicity 's xxmaj sake , ' and i can not agree more . xxmaj if xxmaj victor xxmaj vargas were truly a great film and story , then the characters ' xxunk would n't matter whether they were xxmaj latino , xxmaj chinese , etc . xxmaj yet if you were to take this story and stick it in middle - class xxunk with a bunch of xxunk - xxunk white kids the results would n't be such glowing reviews , and we 'd see the film 's flaws more clearly . xxmaj indeed , some other aspects of the use of xxmaj latinos in this film bother me . xxmaj while some aspects of xxmaj victor xxmaj vargas are accurate others i have to question . xxmaj for example , xxmaj victor , xxmaj nino and xxmaj vicky all share the same room to sleep . xxmaj this set off an alarm for me because it seemed contrary to what i believe . xxmaj any self - xxunk xxmaj latino family would n't have two older brothers sharing the same room with a thirteen - year old girl . xxmaj at first i was xxunk , perhaps i was wrong , but after speaking with my grandmother i knew my problem with this was justified . xxmaj considering how conservative the grandmother is , you 'd think that xxmaj vicky would have been sleeping in her room . \\n\\n xxmaj as a xxmaj latino who grew up in a somewhat conservative xxmaj cuban household , raised by my grandmother while my mother was working full - time , i could relate to the movie in many ways , which is why my critical xxunk are xxunk because i really wanted to love this movie . xxmaj unfortunately , my lack of respect for xxmaj victor xxmaj vargas xxunk my feelings for the film . xxmaj maybe it 's because xxmaj victor xxmaj vargas reminds me of those guys who were getting laid while i was playing with my xxmaj xxunk xxmaj xxunk when i was xxunk . xxmaj maybe it 's because without any further xxunk by the film , xxmaj victor xxmaj vargas is merely a stereotypical hot - blooded xxmaj latino , who 'll just end up shouting to girls from his car , ` hey bay - xxunk , xxunk want to get into my xxunk xxunk - xxunk ? ' xxmaj either way i do n't like him , so ultimately how can i like a film about him ? xxmaj so if you 'll excuse me , xxmaj i 'm going to go stick my hands into a bowl of xxunk .negative
1xxbos xxmaj now that xxmaj che(2008 ) has finished its relatively short xxmaj australian cinema run ( extremely limited xxunk screen in xxmaj xxunk , after xxunk ) , i can xxunk join both xxunk of \" at xxmaj the xxmaj movies \" in taking xxmaj steven xxmaj soderbergh to task . \\n\\n xxmaj it 's usually satisfying to watch a film director change his style / subject , but xxmaj soderbergh 's most recent stinker , xxmaj the xxmaj girlfriend xxmaj xxunk ) , was also missing a story , so narrative ( and editing ? ) seem to suddenly be xxmaj soderbergh 's main challenge . xxmaj strange , after xxunk years in the business . xxmaj he was probably never much good at narrative , just xxunk it well inside \" edgy \" projects . \\n\\n xxmaj none of this excuses him this present , almost diabolical failure . xxmaj as xxmaj david xxmaj stratton xxunk , \" two parts of xxmaj che do n't ( even ) make a whole \" . \\n\\n xxmaj epic xxunk in name only , xxmaj che(2008 ) barely qualifies as a feature film ! xxmaj it certainly has no legs , xxunk as except for its xxunk ultimate resolution forced upon it by history , xxmaj soderbergh 's xxunk - long xxunk just goes nowhere . \\n\\n xxmaj even xxmaj margaret xxmaj xxunk , the more xxunk of xxmaj australia 's xxmaj at xxmaj the xxmaj movies duo , noted about xxmaj soderbergh 's xxunk waste of ( xxunk digital xxunk ) : \" you 're in the woods … you 're in the woods … you 're in the woods … \" . i too am surprised xxmaj soderbergh did n't give us another xxunk of xxup that somewhere between his xxunk two xxmaj parts , because he still left out massive xxunk of xxmaj che 's \" xxunk \" life ! \\n\\n xxmaj for a xxunk of an important but infamous historical figure , xxmaj soderbergh xxunk xxunk , if not deliberately insults , his audiences by \\n\\n 1 . never providing most of xxmaj che 's story ; \\n\\n 2 . xxunk xxunk film xxunk with mere xxunk xxunk ; \\n\\n 3 . xxunk both true hindsight and a narrative of events ; \\n\\n 4 . barely developing an idea , or a character ; \\n\\n 5 . remaining xxunk episodic ; \\n\\n 6 . xxunk proper context for scenes xxrep 3 - whatever we do get is xxunk in xxunk xxunk ; \\n\\n 7 . xxunk xxunk all audiences ( even xxmaj spanish - xxunk will be confused by the xxunk xxunk in xxmaj english ) ; and \\n\\n 8 . xxunk xxunk his main subject into one dimension . xxmaj why , at xxup this late stage ? xxmaj the xxmaj t - shirt franchise has been a success ! \\n\\n xxmaj our sense of xxunk is surely due to xxmaj peter xxmaj xxunk and xxmaj benjamin xxunk xxmaj xxunk xxunk their screenplay solely on xxmaj xxunk 's memoirs . xxmaj so , like a poor student who has read only xxup one of his xxunk xxunk for his xxunk , xxmaj soderbergh 's product is xxunk limited in perspective . \\n\\n xxmaj the audience is held captive within the same xxunk knowledge , scenery and circumstances of the \" revolutionaries \" , but that does n't xxunk our sympathy . xxmaj instead , it xxunk on us that \" ah , xxmaj soderbergh 's trying to xxunk his audiences the same as the xxmaj latino peasants were at the time \" . xxmaj but these are the xxup same illiterate xxmaj latino peasants who sold out the good doctor to his enemies . xxmaj why does xxmaj soderbergh feel the need to xxunk us with them , and keep us equally mentally captive ? xxmaj such audience xxunk must have a purpose . \\n\\n xxmaj part2 is more xxunk than xxmaj part1 , but it 's literally mind - numbing with its repetitive bush - bashing , misery of outlook , and lack of variety or character xxunk . deltoro 's xxmaj che has no opportunity to grow as a person while he struggles to xxunk his own ill - xxunk troops . xxmaj the only xxunk is the humour as xxmaj che deals with his sometimes deeply ignorant \" revolutionaries \" , some of whom xxunk lack self - control around local peasants or food . xxmaj we certainly get no insight into what caused the conditions , nor any xxunk xxunk of their xxunk xxunk , such as it was . \\n\\n xxmaj part2 's excruciating xxunk remains xxunk episodic : again , nothing is telegraphed or xxunk . xxmaj thus even the scenes with xxmaj xxunk xxmaj xxunk ( xxunk xxmaj xxunk ) are unexpected and disconcerting . xxmaj any xxunk events are portrayed xxunk and xxmaj latino - xxunk , with xxmaj part1 's interviews replaced by time - xxunk xxunk between the corrupt xxmaj xxunk president ( xxunk de xxmaj xxunk ) and xxup us xxmaj government xxunk promising xxup cia xxunk ( ! ) . \\n\\n xxmaj the rest of xxmaj part2 's \" woods \" and day - for - night blue xxunk just xxunk the audience until they 're xxunk the xxunk . \\n\\n xxmaj perhaps deltoro felt too xxunk the frustration of many non - american xxmaj latinos about never getting a truthful , xxunk history of xxmaj che 's exploits within their own countries . xxmaj when foreign xxunk still wo n't deliver a free press to their people -- for whatever reason -- then one can see how a popular xxmaj american indie producer might set out to entice the not - so - well - read ( \" i may not be able to read or write , but xxmaj i 'm xxup not xxunk xxmaj inspector xxmaj xxunk ) ) out to their own local cinemas . xxmaj the film 's obvious xxunk and gross over - xxunk hint very strongly that it 's aiming only at the xxunk of the less - informed xxup who xxup still xxup speak xxup little xxmaj english . xxmaj if they did , they 'd have read xxunk on the subject already , and xxunk the relevant social issues amongst themselves -- learning the lessons of history as they should . \\n\\n xxmaj such insights are precisely what societies still need -- and not just the remaining illiterate xxmaj latinos of xxmaj central and xxmaj south xxmaj america -- yet it 's what xxmaj che(2008 ) xxunk fails to deliver . xxmaj soderbergh xxunk his lead because he 's weak on narrative . i am xxunk why xxmaj xxunk deltoro deliberately chose xxmaj soderbergh for this project if he knew this . xxmaj it 's been xxunk , hindsight about xxmaj xxunk was xxunk wanted : it 's what i went to see this film for , but the director xxunk robs us of that . \\n\\n xxmaj david xxmaj stratton , writing in xxmaj the xxmaj australian ( xxunk ) observed that while xxmaj part1 was \" uneven \" , xxmaj part2 actually \" goes rapidly downhill \" from there , \" xxunk xxmaj che 's final campaign in xxmaj xxunk in excruciating detail \" , which \" … feels almost unbearably slow and turgid \" . \\n\\n che : the xxmaj xxunk aka xxmaj part2 is certainly no xxunk for xxmaj xxunk , painting it a picture of misery and xxunk . xxmaj the entire second half is only redeemed by the aforementioned humour , and the dramatic -- yet tragic -- capture and execution of the film 's subject . \\n\\n xxmaj the rest of this xxunk cinema xxunk is just confusing , irritating misery -- xxunk , for a xxmaj soderbergh film , to be avoided at all costs . xxmaj it is bound to break the hearts of all who know even just a xxunk about the xxunk / 10 )negative
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "dbch.show_batch(max_n=2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## TransformBlock for text" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def TextBlock(vocab=None, is_lm=False):\n", " return TransformBlock(type_tfms=Numericalize(vocab), dl_type=LMDataLoader if is_lm else SortedDL, \n", " dbunch_kwargs={} if is_lm else {'before_batch': pad_input})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## TextDataBunch -" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "class TextDataBunch(DataBunch):\n", " @classmethod\n", " @delegates(DataBunch.from_dblock)\n", " def from_folder(cls, path, train='train', valid='valid', valid_pct=None, seed=None, vocab=None, text_vocab=None, is_lm=False, **kwargs):\n", " \"Create from imagenet style dataset in `path` with `train`,`valid`,`test` subfolders (or provide `valid_pct`).\"\n", " splitter = GrandparentSplitter(train_name=train, valid_name=valid) if valid_pct is None else RandomSplitter(valid_pct, seed=seed)\n", " dblock = DataBlock(blocks=(TextBlock(text_vocab, is_lm), CategoryBlock(vocab=vocab)),\n", " get_items=get_text_files,\n", " splitter=splitter,\n", " get_x=read_file,\n", " get_y=parent_label)\n", " return cls.from_dblock(dblock, path, path=path, **kwargs)\n", " \n", " @classmethod\n", " @delegates(DataBunch.from_dblock)\n", " def from_df(cls, df, path='.', valid_pct=0.2, seed=None, text_col=0, label_col=1, label_delim=None, y_block=None, \n", " text_vocab=None, is_lm=False, **kwargs):\n", " if y_block is None: y_block = MultiCategoryBlock if is_listy(label_col) and len(label_col) > 1 else CategoryBlock\n", " dblock = DataBlock(blocks=(TextBlock(text_vocab, is_lm), y_block),\n", " get_x=ColReader(text_col),\n", " get_y=ColReader(label_col, label_delim=label_delim),\n", " splitter=RandomSplitter(valid_pct, seed=seed))\n", " return cls.from_dblock(dblock, df, path=path, **kwargs)\n", " \n", " @classmethod\n", " def from_csv(cls, path, csv_fname='labels.csv', header='infer', delimiter=None, **kwargs):\n", " df = pd.read_csv(Path(path)/csv_fname, header=header, delimiter=delimiter)\n", " return cls.from_df(df, path=path, **kwargs)\n", " \n", "TextDataBunch.from_csv = delegates(to=TextDataBunch.from_df)(TextDataBunch.from_csv)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Export -" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Converted 00_test.ipynb.\n", "Converted 01_core_foundation.ipynb.\n", "Converted 01a_core_utils.ipynb.\n", "Converted 01b_core_dispatch.ipynb.\n", "Converted 01c_core_transform.ipynb.\n", "Converted 02_core_script.ipynb.\n", "Converted 03_torchcore.ipynb.\n", "Converted 03a_layers.ipynb.\n", "Converted 04_data_load.ipynb.\n", "Converted 05_data_core.ipynb.\n", "Converted 06_data_transforms.ipynb.\n", "Converted 07_data_block.ipynb.\n", "Converted 08_vision_core.ipynb.\n", "Converted 09_vision_augment.ipynb.\n", "Converted 09a_vision_data.ipynb.\n", "Converted 09b_vision_utils.ipynb.\n", "Converted 10_pets_tutorial.ipynb.\n", "Converted 11_vision_models_xresnet.ipynb.\n", "Converted 12_optimizer.ipynb.\n", "Converted 13_learner.ipynb.\n", "Converted 13a_metrics.ipynb.\n", "Converted 14_callback_schedule.ipynb.\n", "Converted 14a_callback_data.ipynb.\n", "Converted 15_callback_hook.ipynb.\n", "Converted 15a_vision_models_unet.ipynb.\n", "Converted 16_callback_progress.ipynb.\n", "Converted 17_callback_tracker.ipynb.\n", "Converted 18_callback_fp16.ipynb.\n", "Converted 19_callback_mixup.ipynb.\n", "Converted 20_interpret.ipynb.\n", "Converted 20a_distributed.ipynb.\n", "Converted 21_vision_learner.ipynb.\n", "Converted 22_tutorial_imagenette.ipynb.\n", "Converted 23_tutorial_transfer_learning.ipynb.\n", "Converted 30_text_core.ipynb.\n", "Converted 31_text_data.ipynb.\n", "Converted 32_text_models_awdlstm.ipynb.\n", "Converted 33_text_models_core.ipynb.\n", "Converted 34_callback_rnn.ipynb.\n", "Converted 35_tutorial_wikitext.ipynb.\n", "Converted 36_text_models_qrnn.ipynb.\n", "Converted 37_text_learner.ipynb.\n", "Converted 38_tutorial_ulmfit.ipynb.\n", "Converted 40_tabular_core.ipynb.\n", "Converted 41_tabular_model.ipynb.\n", "Converted 42_tabular_rapids.ipynb.\n", "Converted 50_data_block_examples.ipynb.\n", "Converted 60_medical_imaging.ipynb.\n", "Converted 65_medical_text.ipynb.\n", "Converted 70_callback_wandb.ipynb.\n", "Converted 71_callback_tensorboard.ipynb.\n", "Converted 90_notebook_core.ipynb.\n", "Converted 91_notebook_export.ipynb.\n", "Converted 92_notebook_showdoc.ipynb.\n", "Converted 93_notebook_export2html.ipynb.\n", "Converted 94_notebook_test.ipynb.\n", "Converted 95_index.ipynb.\n", "Converted 96_data_external.ipynb.\n", "Converted 97_utils_test.ipynb.\n", "Converted notebook2jekyll.ipynb.\n", "Converted xse_resnext.ipynb.\n" ] } ], "source": [ "#hide\n", "from local.notebook.export import notebook2script\n", "notebook2script(all_fs=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 2 }