Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

# Natural Language Toolkit (NLTK) Help 

# 

# Copyright (C) 2001-2012 NLTK Project 

# Authors: Steven Bird <sb@csse.unimelb.edu.au> 

# URL: <http://www.nltk.org/> 

# For license information, see LICENSE.TXT 

 

""" 

Provide structured access to documentation. 

""" 

from __future__ import print_function 

 

import re 

from textwrap import wrap 

 

from nltk.data import load 

 

def brown_tagset(tagpattern=None): 

    _format_tagset("brown_tagset", tagpattern) 

 

def claws5_tagset(tagpattern=None): 

    _format_tagset("claws5_tagset", tagpattern) 

 

def upenn_tagset(tagpattern=None): 

    _format_tagset("upenn_tagset", tagpattern) 

 

##################################################################### 

# UTILITIES 

##################################################################### 

 

def _print_entries(tags, tagdict): 

    for tag in tags: 

        entry = tagdict[tag] 

        defn = [tag + ": " + entry[0]] 

        examples = wrap(entry[1], width=75, initial_indent='    ', subsequent_indent='    ') 

        print("\n".join(defn + examples)) 

 

def _format_tagset(tagset, tagpattern=None): 

    tagdict = load("help/tagsets/" + tagset + ".pickle") 

    if not tagpattern: 

        _print_entries(sorted(tagdict), tagdict) 

    elif tagpattern in tagdict: 

        _print_entries([tagpattern], tagdict) 

    else: 

        tagpattern = re.compile(tagpattern) 

        tags = [tag for tag in sorted(tagdict) if tagpattern.match(tag)] 

        if tags: 

            _print_entries(tags, tagdict) 

        else: 

            print("No matching tags found.") 

 

if __name__ == '__main__': 

    brown_tagset(r'NN.*') 

    upenn_tagset(r'.*\$') 

    claws5_tagset('UNDEFINED') 

    brown_tagset(r'NN')