Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

# Natural Language Toolkit: Dispersion Plots 

# 

# Copyright (C) 2001-2012 NLTK Project 

# Author: Steven Bird <sb@csse.unimelb.edu.au> 

# URL: <http://www.nltk.org/> 

# For license information, see LICENSE.TXT 

 

""" 

A utility for displaying lexical dispersion. 

""" 

 

def dispersion_plot(text, words, ignore_case=False): 

    """ 

    Generate a lexical dispersion plot. 

 

    :param text: The source text 

    :type text: list(str) or enum(str) 

    :param words: The target words 

    :type words: list of str 

    :param ignore_case: flag to set if case should be ignored when searching text 

    :type ignore_case: bool 

    """ 

 

    try: 

        import pylab 

    except ImportError: 

        raise ValueError('The plot function requires the matplotlib package (aka pylab).' 

                     'See http://matplotlib.sourceforge.net/') 

 

    text = list(text) 

    words.reverse() 

 

    if ignore_case: 

        words_to_comp = list(map(str.lower, words)) 

        text_to_comp = list(map(str.lower, text)) 

    else: 

        words_to_comp = words 

        text_to_comp = text 

 

    points = [(x,y) for x in range(len(text_to_comp)) 

                    for y in range(len(words_to_comp)) 

                    if text_to_comp[x] == words_to_comp[y]] 

    if points: 

        x, y = zip(*points) 

    else: 

        x = y = () 

    pylab.plot(x, y, "b|", scalex=.1) 

    pylab.yticks(list(range(len(words))), words, color="b") 

    pylab.ylim(-1, len(words)) 

    pylab.title("Lexical Dispersion Plot") 

    pylab.xlabel("Word Offset") 

    pylab.show() 

 

if __name__ == '__main__': 

    from nltk.corpus import gutenberg 

    words = ['Elinor', 'Marianne', 'Edward', 'Willoughby'] 

    dispersion_plot(gutenberg.words('austen-sense.txt'), words)