Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

# Natural Language Toolkit: Some texts for exploration in chapter 1 of the book 

# 

# Copyright (C) 2001-2012 NLTK Project 

# Author: Steven Bird <sb@csse.unimelb.edu.au> 

# 

# URL: <http://www.nltk.org/> 

# For license information, see LICENSE.TXT 

from __future__ import print_function 

 

from nltk.corpus import gutenberg, genesis, inaugural,\ 

       nps_chat, webtext, treebank, wordnet 

from nltk.text import Text 

from nltk.probability import FreqDist 

from nltk.util import bigrams 

from nltk.misc import babelize_shell 

 

print("*** Introductory Examples for the NLTK Book ***") 

print("Loading text1, ..., text9 and sent1, ..., sent9") 

print("Type the name of the text or sentence to view it.") 

print("Type: 'texts()' or 'sents()' to list the materials.") 

 

text1 = Text(gutenberg.words('melville-moby_dick.txt')) 

print("text1:", text1.name) 

 

text2 = Text(gutenberg.words('austen-sense.txt')) 

print("text2:", text2.name) 

 

text3 = Text([str(w) for w in genesis.words('english-kjv.txt')], name="The Book of Genesis") 

print("text3:", text3.name) 

 

text4 = Text(inaugural.words(), name="Inaugural Address Corpus") 

print("text4:", text4.name) 

 

text5 = Text(nps_chat.words(), name="Chat Corpus") 

print("text5:", text5.name) 

 

text6 = Text(webtext.words('grail.txt'), name="Monty Python and the Holy Grail") 

print("text6:", text6.name) 

 

text7 = Text(treebank.words(), name="Wall Street Journal") 

print("text7:", text7.name) 

 

text8 = Text(webtext.words('singles.txt'), name="Personals Corpus") 

print("text8:", text8.name) 

 

text9 = Text(gutenberg.words('chesterton-thursday.txt')) 

print("text9:", text9.name) 

 

def texts(): 

    print("text1:", text1.name) 

    print("text2:", text2.name) 

    print("text3:", text3.name) 

    print("text4:", text4.name) 

    print("text5:", text5.name) 

    print("text6:", text6.name) 

    print("text7:", text7.name) 

    print("text8:", text8.name) 

    print("text9:", text9.name) 

 

sent1 = ["Call", "me", "Ishmael", "."] 

sent2 = ["The", "family", "of", "Dashwood", "had", "long", 

         "been", "settled", "in", "Sussex", "."] 

sent3 = ["In", "the", "beginning", "God", "created", "the", 

         "heaven", "and", "the", "earth", "."] 

sent4 = ["Fellow", "-", "Citizens", "of", "the", "Senate", 

         "and", "of", "the", "House", "of", "Representatives", ":"] 

sent5 = ["I", "have", "a", "problem", "with", "people", 

         "PMing", "me", "to", "lol", "JOIN"] 

sent6 = ['SCENE', '1', ':', '[', 'wind', ']', '[', 'clop', 'clop', 

         'clop', ']', 'KING', 'ARTHUR', ':', 'Whoa', 'there', '!'] 

sent7 = ["Pierre", "Vinken", ",", "61", "years", "old", ",", 

         "will", "join", "the", "board", "as", "a", "nonexecutive", 

         "director", "Nov.", "29", "."] 

sent8 = ['25', 'SEXY', 'MALE', ',', 'seeks', 'attrac', 'older', 

         'single', 'lady', ',', 'for', 'discreet', 'encounters', '.'] 

sent9 = ["THE", "suburb", "of", "Saffron", "Park", "lay", "on", "the", 

         "sunset", "side", "of", "London", ",", "as", "red", "and", 

         "ragged", "as", "a", "cloud", "of", "sunset", "."] 

 

def sents(): 

    print("sent1:", " ".join(sent1)) 

    print("sent2:", " ".join(sent2)) 

    print("sent3:", " ".join(sent3)) 

    print("sent4:", " ".join(sent4)) 

    print("sent5:", " ".join(sent5)) 

    print("sent6:", " ".join(sent6)) 

    print("sent7:", " ".join(sent7)) 

    print("sent8:", " ".join(sent8)) 

    print("sent9:", " ".join(sent9))