Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

# Natural Language Toolkit: Minimal Sets 

# 

# Copyright (C) 2001-2012 NLTK Project 

# Author: Steven Bird <sb@csse.unimelb.edu.au> 

# URL: <http://www.nltk.org> 

# For license information, see LICENSE.TXT 

 

from collections import defaultdict 

 

class MinimalSet(object): 

    """ 

    Find contexts where more than one possible target value can 

    appear.  E.g. if targets are word-initial letters, and contexts 

    are the remainders of words, then we would like to find cases like 

    "fat" vs "cat", and "training" vs "draining".  If targets are 

    parts-of-speech and contexts are words, then we would like to find 

    cases like wind (noun) 'air in rapid motion', vs wind (verb) 

    'coil, wrap'. 

    """ 

    def __init__(self, parameters=None): 

        """ 

        Create a new minimal set. 

 

        :param parameters: The (context, target, display) tuples for the item 

        :type parameters: list(tuple(str, str, str)) 

        """ 

        self._targets = set()  # the contrastive information 

        self._contexts = set() # what we are controlling for 

        self._seen = defaultdict(set)  # to record what we have seen 

        self._displays = {}    # what we will display 

 

        if parameters: 

            for context, target, display in parameters: 

                self.add(context, target, display) 

 

    def add(self, context, target, display): 

        """ 

        Add a new item to the minimal set, having the specified 

        context, target, and display form. 

 

        :param context: The context in which the item of interest appears 

        :type context: str 

        :param target: The item of interest 

        :type target: str 

        :param display: The information to be reported for each item 

        :type display: str 

        """ 

        # Store the set of targets that occurred in this context 

        self._seen[context].add(target) 

 

        # Keep track of which contexts and targets we have seen 

        self._contexts.add(context) 

        self._targets.add(target) 

 

        # For a given context and target, store the display form 

        self._displays[(context, target)] = display 

 

    def contexts(self, minimum=2): 

        """ 

        Determine which contexts occurred with enough distinct targets. 

 

        :param minimum: the minimum number of distinct target forms 

        :type minimum: int 

        :rtype list 

        """ 

        return [c for c in self._contexts if len(self._seen[c]) >= minimum] 

 

    def display(self, context, target, default=""): 

        if (context, target) in self._displays: 

            return self._displays[(context, target)] 

        else: 

            return default 

 

    def display_all(self, context): 

        result = [] 

        for target in self._targets: 

            x = self.display(context, target) 

            if x: result.append(x) 

        return result 

 

    def targets(self): 

        return self._targets