Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

# Natural Language Toolkit: Semantic Interpretation 

# 

# Author: Ewan Klein <ewan@inf.ed.ac.uk> 

# 

# Copyright (C) 2001-2012 NLTK Project 

# URL: <http://www.nltk.org/> 

# For license information, see LICENSE.TXT 

 

""" 

Utility functions for batch-processing sentences: parsing and 

extraction of the semantic representation of the root node of the the 

syntax tree, followed by evaluation of the semantic representation in 

a first-order model. 

""" 

from __future__ import print_function 

 

import re 

from . import evaluate 

 

 

############################################################## 

## Utility functions for connecting parse output to semantics 

############################################################## 

 

def batch_parse(inputs, grammar, trace=0): 

    """ 

    Convert input sentences into syntactic trees. 

 

    :param inputs: sentences to be parsed 

    :type inputs: list of str 

    :param grammar: ``FeatureGrammar`` or name of feature-based grammar 

    :rtype: dict 

    :return: a mapping from input sentences to a list of ``Tree``s 

    """ 

 

    # put imports here to avoid circult dependencies 

    from nltk.grammar import FeatureGrammar 

    from nltk.parse import FeatureChartParser, load_parser 

 

    if isinstance(grammar, FeatureGrammar): 

        cp = FeatureChartParser(grammar) 

    else: 

        cp = load_parser(grammar, trace=trace) 

    parses = [] 

    for sent in inputs: 

        tokens = sent.split() # use a tokenizer? 

        syntrees = cp.nbest_parse(tokens) 

        parses.append(syntrees) 

    return parses 

 

def root_semrep(syntree, semkey='SEM'): 

    """ 

    Find the semantic representation at the root of a tree. 

 

    :param syntree: a parse ``Tree`` 

    :param semkey: the feature label to use for the root semantics in the tree 

    :return: the semantic representation at the root of a ``Tree`` 

    :rtype: sem.Expression 

    """ 

    from nltk.grammar import FeatStructNonterminal 

 

    node = syntree.node 

    assert isinstance(node, FeatStructNonterminal) 

    try: 

        return node[semkey] 

    except KeyError: 

        print(node, end=' ') 

        print("has no specification for the feature %s" % semkey) 

    raise 

 

def batch_interpret(inputs, grammar, semkey='SEM', trace=0): 

    """ 

    Add the semantic representation to each syntactic parse tree 

    of each input sentence. 

 

    :param inputs: a list of sentences 

    :param grammar: ``FeatureGrammar`` or name of feature-based grammar 

    :return: a mapping from sentences to lists of pairs (parse-tree, semantic-representations) 

    :rtype: dict 

    """ 

    return [[(syn, root_semrep(syn, semkey)) for syn in syntrees] 

            for syntrees in batch_parse(inputs, grammar, trace=trace)] 

 

def batch_evaluate(inputs, grammar, model, assignment, trace=0): 

    """ 

    Add the truth-in-a-model value to each semantic representation 

    for each syntactic parse of each input sentences. 

 

    :param inputs: a list of sentences 

    :param grammar: ``FeatureGrammar`` or name of feature-based grammar 

    :return: a mapping from sentences to lists of triples (parse-tree, semantic-representations, evaluation-in-model) 

    :rtype: dict 

    """ 

    return [[(syn, sem, model.evaluate(str(sem), assignment, trace=trace)) 

            for (syn, sem) in interpretations] 

            for interpretations in batch_interpret(inputs, grammar)] 

 

 

########################################## 

# REs used by the parse_valuation function 

########################################## 

_VAL_SPLIT_RE = re.compile(r'\s*=+>\s*') 

_ELEMENT_SPLIT_RE = re.compile(r'\s*,\s*') 

_TUPLES_RE = re.compile(r"""\s* 

                                (\([^)]+\))  # tuple-expression 

                                \s*""", re.VERBOSE) 

 

def parse_valuation_line(s): 

    """ 

    Parse a line in a valuation file. 

 

    Lines are expected to be of the form:: 

 

      noosa => n 

      girl => {g1, g2} 

      chase => {(b1, g1), (b2, g1), (g1, d1), (g2, d2)} 

 

    :param s: input line 

    :type s: str 

    :return: a pair (symbol, value) 

    :rtype: tuple 

    """ 

    pieces = _VAL_SPLIT_RE.split(s) 

    symbol = pieces[0] 

    value = pieces[1] 

    # check whether the value is meant to be a set 

    if value.startswith('{'): 

        value = value[1:-1] 

        tuple_strings = _TUPLES_RE.findall(value) 

        # are the set elements tuples? 

        if tuple_strings: 

            set_elements = [] 

            for ts in tuple_strings: 

                ts = ts[1:-1] 

                element = tuple(_ELEMENT_SPLIT_RE.split(ts)) 

                set_elements.append(element) 

        else: 

            set_elements = _ELEMENT_SPLIT_RE.split(value) 

        value = set(set_elements) 

    return symbol, value 

 

def parse_valuation(s): 

    """ 

    Convert a valuation file into a valuation. 

 

    :param s: the contents of a valuation file 

    :type s: str 

    :return: a ``nltk.sem`` valuation 

    :rtype: Valuation 

    """ 

    statements = [] 

    for linenum, line in enumerate(s.splitlines()): 

        line = line.strip() 

        if line.startswith('#') or line=='': continue 

        try: statements.append(parse_valuation_line(line)) 

        except ValueError: 

            raise ValueError('Unable to parse line %s: %s' % (linenum, line)) 

    val = evaluate.Valuation(statements) 

    return val 

 

 

def demo_model0(): 

    global m0, g0 

    #Initialize a valuation of non-logical constants.""" 

    v = [('john', 'b1'), 

        ('mary', 'g1'), 

        ('suzie', 'g2'), 

        ('fido', 'd1'), 

        ('tess', 'd2'), 

        ('noosa', 'n'), 

        ('girl', set(['g1', 'g2'])), 

        ('boy', set(['b1', 'b2'])), 

        ('dog', set(['d1', 'd2'])), 

        ('bark', set(['d1', 'd2'])), 

        ('walk', set(['b1', 'g2', 'd1'])), 

        ('chase', set([('b1', 'g1'), ('b2', 'g1'), ('g1', 'd1'), ('g2', 'd2')])), 

        ('see', set([('b1', 'g1'), ('b2', 'd2'), ('g1', 'b1'),('d2', 'b1'), ('g2', 'n')])), 

        ('in', set([('b1', 'n'), ('b2', 'n'), ('d2', 'n')])), 

        ('with', set([('b1', 'g1'), ('g1', 'b1'), ('d1', 'b1'), ('b1', 'd1')])) 

     ] 

    #Read in the data from ``v`` 

    val = evaluate.Valuation(v) 

    #Bind ``dom`` to the ``domain`` property of ``val`` 

    dom = val.domain 

    #Initialize a model with parameters ``dom`` and ``val``. 

    m0 = evaluate.Model(dom, val) 

    #Initialize a variable assignment with parameter ``dom`` 

    g0 = evaluate.Assignment(dom) 

 

 

def read_sents(file): 

    sents = [l.rstrip() for l in open(file)] 

    # get rid of blank lines 

    sents = [l for l in sents if len(l) > 0] 

    sents = [l for l in sents if not l[0] == '#'] 

    return sents 

 

def demo_legacy_grammar(): 

    """ 

    Check that batch_interpret() is compatible with legacy grammars that use 

    a lowercase 'sem' feature. 

 

    Define 'test.fcfg' to be the following 

 

    """ 

    from nltk.grammar import parse_fcfg 

 

    g = parse_fcfg(""" 

    % start S 

    S[sem=<hello>] -> 'hello' 

    """) 

    print("Reading grammar: %s" % g) 

    print("*" * 20) 

    for reading in batch_interpret(['hello'], g, semkey='sem'): 

        syn, sem = reading[0] 

        print() 

        print("output: ", sem) 

 

def demo(): 

    import sys 

    from optparse import OptionParser 

    description = \ 

    """ 

    Parse and evaluate some sentences. 

    """ 

 

    opts = OptionParser(description=description) 

 

    opts.set_defaults(evaluate=True, beta=True, syntrace=0, 

                      semtrace=0, demo='default', grammar='', sentences='') 

 

    opts.add_option("-d", "--demo", dest="demo", 

                    help="choose demo D; omit this for the default demo, or specify 'chat80'", metavar="D") 

    opts.add_option("-g", "--gram", dest="grammar", 

                    help="read in grammar G", metavar="G") 

    opts.add_option("-m", "--model", dest="model", 

                        help="import model M (omit '.py' suffix)", metavar="M") 

    opts.add_option("-s", "--sentences", dest="sentences", 

                        help="read in a file of test sentences S", metavar="S") 

    opts.add_option("-e", "--no-eval", action="store_false", dest="evaluate", 

                    help="just do a syntactic analysis") 

    opts.add_option("-b", "--no-beta-reduction", action="store_false", 

                    dest="beta", help="don't carry out beta-reduction") 

    opts.add_option("-t", "--syntrace", action="count", dest="syntrace", 

                    help="set syntactic tracing on; requires '-e' option") 

    opts.add_option("-T", "--semtrace", action="count", dest="semtrace", 

                    help="set semantic tracing on") 

 

    (options, args) = opts.parse_args() 

 

    SPACER = '-' * 30 

 

    demo_model0() 

 

    sents = [ 

    'Fido sees a boy with Mary', 

    'John sees Mary', 

    'every girl chases a dog', 

    'every boy chases a girl', 

    'John walks with a girl in Noosa', 

    'who walks'] 

 

    gramfile = 'grammars/sample_grammars/sem2.fcfg' 

 

    if options.sentences: 

        sentsfile = options.sentences 

    if options.grammar: 

        gramfile = options.grammar 

    if options.model: 

        exec("import %s as model" % options.model) 

 

    if sents is None: 

        sents = read_sents(sentsfile) 

 

    # Set model and assignment 

    model = m0 

    g = g0 

 

    if options.evaluate: 

        evaluations = \ 

            batch_evaluate(sents, gramfile, model, g, trace=options.semtrace) 

    else: 

        semreps = \ 

            batch_interpret(sents, gramfile, trace=options.syntrace) 

 

    for i, sent in enumerate(sents): 

        n = 1 

        print('\nSentence: %s' % sent) 

        print(SPACER) 

        if options.evaluate: 

 

            for (syntree, semrep, value) in evaluations[i]: 

                if isinstance(value, dict): 

                    value = set(value.keys()) 

                print('%d:  %s' % (n, semrep)) 

                print(value) 

                n += 1 

        else: 

 

            for (syntree, semrep) in semreps[i]: 

                print('%d:  %s' % (n, semrep)) 

                n += 1 

 

if __name__ == "__main__": 

    #demo() 

    demo_legacy_grammar()