Coverage for nltk.corpus.reader.switchboard : 42%
![](keybd_closed.png)
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
# Natural Language Toolkit: Switchboard Corpus Reader # # Copyright (C) 2001-2012 NLTK Project # Author: Edward Loper <edloper@gradient.cis.upenn.edu> # URL: <http://www.nltk.org/> # For license information, see LICENSE.TXT
""" A specialized list object used to encode switchboard utterances. The elements of the list are the words in the utterance; and two attributes, ``speaker`` and ``id``, are provided to retrieve the spearker identifier and utterance id. Note that utterance ids are only unique within a given discourse. """ list.__init__(self, words) self.speaker = speaker self.id = int(id) if len(self) == 0: text = '' elif isinstance(self[0], tuple): text = ' '.join('%s/%s' % w for w in self) else: text = ' '.join(self) return '<%s.%s: %r>' % (self.speaker, self.id, text)
# Use the "tagged" file even for non-tagged data methods, since # it's tokenized.
return StreamBackedCorpusView(self.abspath('tagged'), self._words_block_reader)
def tagged_words_block_reader(stream): return self._tagged_words_block_reader(stream, simplify_tags) return StreamBackedCorpusView(self.abspath('tagged'), tagged_words_block_reader)
return StreamBackedCorpusView(self.abspath('tagged'), self._turns_block_reader)
def tagged_turns_block_reader(stream): return self._tagged_turns_block_reader(stream, simplify_tags) return StreamBackedCorpusView(self.abspath('tagged'), tagged_turns_block_reader)
return StreamBackedCorpusView(self.abspath('tagged'), self._discourses_block_reader)
def tagged_discourses_block_reader(stream): return self._tagged_discourses_block_reader(stream, simplify_tags) return StreamBackedCorpusView(self.abspath('tagged'), tagged_discourses_block_reader)
# returns at most 1 discourse. (The other methods depend on this.) return [[self._parse_utterance(u, include_tag=False) for b in read_blankline_block(stream) for u in b.split('\n') if u.strip()]]
# returns at most 1 discourse. (The other methods depend on this.) return [[self._parse_utterance(u, include_tag=True, simplify_tags=simplify_tags) for b in read_blankline_block(stream) for u in b.split('\n') if u.strip()]]
return self._discourses_block_reader(stream)[0]
return self._tagged_discourses_block_reader(stream, simplify_tags)[0]
return sum(self._discourses_block_reader(stream)[0], [])
return sum(self._tagged_discourses_block_reader(stream, simplify_tags)[0], [])
m = self._UTTERANCE_RE.match(utterance) if m is None: raise ValueError('Bad utterance %r' % utterance) speaker, id, text = m.groups() words = [str2tuple(s, self._SEP) for s in text.split()] if not include_tag: words = [w for (w,t) in words] elif simplify_tags: words = [(w, self._tag_mapping_function(t)) for (w,t) in words] return SwitchboardTurn(words, speaker, id)
|