Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

# Natural Language Toolkit: Shift-Reduce Parser 

# 

# Copyright (C) 2001-2012 NLTK Project 

# Author: Edward Loper <edloper@gradient.cis.upenn.edu> 

#         Steven Bird <sb@csse.unimelb.edu.au> 

# URL: <http://www.nltk.org/> 

# For license information, see LICENSE.TXT 

from __future__ import print_function 

 

from nltk.grammar import Nonterminal 

from nltk.tree import Tree 

 

from nltk.parse.api import ParserI 

 

##////////////////////////////////////////////////////// 

##  Shift/Reduce Parser 

##////////////////////////////////////////////////////// 

class ShiftReduceParser(ParserI): 

    """ 

    A simple bottom-up CFG parser that uses two operations, "shift" 

    and "reduce", to find a single parse for a text. 

 

    ``ShiftReduceParser`` maintains a stack, which records the 

    structure of a portion of the text.  This stack is a list of 

    strings and Trees that collectively cover a portion of 

    the text.  For example, while parsing the sentence "the dog saw 

    the man" with a typical grammar, ``ShiftReduceParser`` will produce 

    the following stack, which covers "the dog saw":: 

 

       [(NP: (Det: 'the') (N: 'dog')), (V: 'saw')] 

 

    ``ShiftReduceParser`` attempts to extend the stack to cover the 

    entire text, and to combine the stack elements into a single tree, 

    producing a complete parse for the sentence. 

 

    Initially, the stack is empty.  It is extended to cover the text, 

    from left to right, by repeatedly applying two operations: 

 

      - "shift" moves a token from the beginning of the text to the 

        end of the stack. 

      - "reduce" uses a CFG production to combine the rightmost stack 

        elements into a single Tree. 

 

    Often, more than one operation can be performed on a given stack. 

    In this case, ``ShiftReduceParser`` uses the following heuristics 

    to decide which operation to perform: 

 

      - Only shift if no reductions are available. 

      - If multiple reductions are available, then apply the reduction 

        whose CFG production is listed earliest in the grammar. 

 

    Note that these heuristics are not guaranteed to choose an 

    operation that leads to a parse of the text.  Also, if multiple 

    parses exists, ``ShiftReduceParser`` will return at most one of 

    them. 

 

    :see: ``nltk.grammar`` 

    """ 

    def __init__(self, grammar, trace=0): 

        """ 

        Create a new ``ShiftReduceParser``, that uses ``grammar`` to 

        parse texts. 

 

        :type grammar: Grammar 

        :param grammar: The grammar used to parse texts. 

        :type trace: int 

        :param trace: The level of tracing that should be used when 

            parsing a text.  ``0`` will generate no tracing output; 

            and higher numbers will produce more verbose tracing 

            output. 

        """ 

        self._grammar = grammar 

        self._trace = trace 

        self._check_grammar() 

 

    def grammar(self): 

        return self._grammar 

 

    def parse(self, tokens): 

        tokens = list(tokens) 

        self._grammar.check_coverage(tokens) 

 

        # initialize the stack. 

        stack = [] 

        remaining_text = tokens 

 

        # Trace output. 

        if self._trace: 

            print('Parsing %r' % " ".join(tokens)) 

            self._trace_stack(stack, remaining_text) 

 

        # iterate through the text, pushing the token onto 

        # the stack, then reducing the stack. 

        while len(remaining_text) > 0: 

            self._shift(stack, remaining_text) 

            while self._reduce(stack, remaining_text): pass 

 

        # Did we reduce everything? 

        if len(stack) != 1: return None 

 

        # Did we end up with the right category? 

        if stack[0].node != self._grammar.start().symbol(): 

            return None 

 

        # We parsed successfully! 

        return stack[0] 

 

    def _shift(self, stack, remaining_text): 

        """ 

        Move a token from the beginning of ``remaining_text`` to the 

        end of ``stack``. 

 

        :type stack: list(str and Tree) 

        :param stack: A list of strings and Trees, encoding 

            the structure of the text that has been parsed so far. 

        :type remaining_text: list(str) 

        :param remaining_text: The portion of the text that is not yet 

            covered by ``stack``. 

        :rtype: None 

        """ 

        stack.append(remaining_text[0]) 

        remaining_text.remove(remaining_text[0]) 

        if self._trace: self._trace_shift(stack, remaining_text) 

 

    def _match_rhs(self, rhs, rightmost_stack): 

        """ 

        :rtype: bool 

        :return: true if the right hand side of a CFG production 

            matches the rightmost elements of the stack.  ``rhs`` 

            matches ``rightmost_stack`` if they are the same length, 

            and each element of ``rhs`` matches the corresponding 

            element of ``rightmost_stack``.  A nonterminal element of 

            ``rhs`` matches any Tree whose node value is equal 

            to the nonterminal's symbol.  A terminal element of ``rhs`` 

            matches any string whose type is equal to the terminal. 

        :type rhs: list(terminal and Nonterminal) 

        :param rhs: The right hand side of a CFG production. 

        :type rightmost_stack: list(string and Tree) 

        :param rightmost_stack: The rightmost elements of the parser's 

            stack. 

        """ 

 

        if len(rightmost_stack) != len(rhs): return 0 

        for i in range(len(rightmost_stack)): 

            if isinstance(rightmost_stack[i], Tree): 

                if not isinstance(rhs[i], Nonterminal): return 0 

                if rightmost_stack[i].node != rhs[i].symbol(): return 0 

            else: 

                if isinstance(rhs[i], Nonterminal): return 0 

                if rightmost_stack[i] != rhs[i]: return 0 

        return 1 

 

    def _reduce(self, stack, remaining_text, production=None): 

        """ 

        Find a CFG production whose right hand side matches the 

        rightmost stack elements; and combine those stack elements 

        into a single Tree, with the node specified by the 

        production's left-hand side.  If more than one CFG production 

        matches the stack, then use the production that is listed 

        earliest in the grammar.  The new Tree replaces the 

        elements in the stack. 

 

        :rtype: Production or None 

        :return: If a reduction is performed, then return the CFG 

            production that the reduction is based on; otherwise, 

            return false. 

        :type stack: list(string and Tree) 

        :param stack: A list of strings and Trees, encoding 

            the structure of the text that has been parsed so far. 

        :type remaining_text: list(str) 

        :param remaining_text: The portion of the text that is not yet 

            covered by ``stack``. 

        """ 

        if production is None: productions = self._grammar.productions() 

        else: productions = [production] 

 

        # Try each production, in order. 

        for production in productions: 

            rhslen = len(production.rhs()) 

 

            # check if the RHS of a production matches the top of the stack 

            if self._match_rhs(production.rhs(), stack[-rhslen:]): 

 

                # combine the tree to reflect the reduction 

                tree = Tree(production.lhs().symbol(), stack[-rhslen:]) 

                stack[-rhslen:] = [tree] 

 

                # We reduced something 

                if self._trace: 

                    self._trace_reduce(stack, production, remaining_text) 

                return production 

 

        # We didn't reduce anything 

        return None 

 

    def trace(self, trace=2): 

        """ 

        Set the level of tracing output that should be generated when 

        parsing a text. 

 

        :type trace: int 

        :param trace: The trace level.  A trace level of ``0`` will 

            generate no tracing output; and higher trace levels will 

            produce more verbose tracing output. 

        :rtype: None 

        """ 

        # 1: just show shifts. 

        # 2: show shifts & reduces 

        # 3: display which tokens & productions are shifed/reduced 

        self._trace = trace 

 

    def _trace_stack(self, stack, remaining_text, marker=' '): 

        """ 

        Print trace output displaying the given stack and text. 

 

        :rtype: None 

        :param marker: A character that is printed to the left of the 

            stack.  This is used with trace level 2 to print 'S' 

            before shifted stacks and 'R' before reduced stacks. 

        """ 

        s = '  '+marker+' [ ' 

        for elt in stack: 

            if isinstance(elt, Tree): 

                s += repr(Nonterminal(elt.node)) + ' ' 

            else: 

                s += repr(elt) + ' ' 

        s += '* ' + ' '.join(remaining_text) + ']' 

        print(s) 

 

    def _trace_shift(self, stack, remaining_text): 

        """ 

        Print trace output displaying that a token has been shifted. 

 

        :rtype: None 

        """ 

        if self._trace > 2: print('Shift %r:' % stack[-1]) 

        if self._trace == 2: self._trace_stack(stack, remaining_text, 'S') 

        elif self._trace > 0: self._trace_stack(stack, remaining_text) 

 

    def _trace_reduce(self, stack, production, remaining_text): 

        """ 

        Print trace output displaying that ``production`` was used to 

        reduce ``stack``. 

 

        :rtype: None 

        """ 

        if self._trace > 2: 

            rhs = " ".join(production.rhs()) 

            print('Reduce %r <- %s' % (production.lhs(), rhs)) 

        if self._trace == 2: self._trace_stack(stack, remaining_text, 'R') 

        elif self._trace > 1: self._trace_stack(stack, remaining_text) 

 

    def _check_grammar(self): 

        """ 

        Check to make sure that all of the CFG productions are 

        potentially useful.  If any productions can never be used, 

        then print a warning. 

 

        :rtype: None 

        """ 

        productions = self._grammar.productions() 

 

        # Any production whose RHS is an extension of another production's RHS 

        # will never be used. 

        for i in range(len(productions)): 

            for j in range(i+1, len(productions)): 

                rhs1 = productions[i].rhs() 

                rhs2 = productions[j].rhs() 

                if rhs1[:len(rhs2)] == rhs2: 

                    print('Warning: %r will never be used' % productions[i]) 

 

##////////////////////////////////////////////////////// 

##  Stepping Shift/Reduce Parser 

##////////////////////////////////////////////////////// 

class SteppingShiftReduceParser(ShiftReduceParser): 

    """ 

    A ``ShiftReduceParser`` that allows you to setp through the parsing 

    process, performing a single operation at a time.  It also allows 

    you to change the parser's grammar midway through parsing a text. 

 

    The ``initialize`` method is used to start parsing a text. 

    ``shift`` performs a single shift operation, and ``reduce`` performs 

    a single reduce operation.  ``step`` will perform a single reduce 

    operation if possible; otherwise, it will perform a single shift 

    operation.  ``parses`` returns the set of parses that have been 

    found by the parser. 

 

    :ivar _history: A list of ``(stack, remaining_text)`` pairs, 

        containing all of the previous states of the parser.  This 

        history is used to implement the ``undo`` operation. 

    :see: ``nltk.grammar`` 

    """ 

    def __init__(self, grammar, trace=0): 

        self._grammar = grammar 

        self._trace = trace 

        self._stack = None 

        self._remaining_text = None 

        self._history = [] 

 

    def nbest_parse(self, tokens, n=None): 

        tokens = list(tokens) 

        self.initialize(tokens) 

        while self.step(): pass 

 

        return self.parses()[:n] 

 

    def stack(self): 

        """ 

        :return: The parser's stack. 

        :rtype: list(str and Tree) 

        """ 

        return self._stack 

 

    def remaining_text(self): 

        """ 

        :return: The portion of the text that is not yet covered by the 

            stack. 

        :rtype: list(str) 

        """ 

        return self._remaining_text 

 

    def initialize(self, tokens): 

        """ 

        Start parsing a given text.  This sets the parser's stack to 

        ``[]`` and sets its remaining text to ``tokens``. 

        """ 

        self._stack = [] 

        self._remaining_text = tokens 

        self._history = [] 

 

    def step(self): 

        """ 

        Perform a single parsing operation.  If a reduction is 

        possible, then perform that reduction, and return the 

        production that it is based on.  Otherwise, if a shift is 

        possible, then perform it, and return 1.  Otherwise, 

        return 0. 

 

        :return: 0 if no operation was performed; 1 if a shift was 

            performed; and the CFG production used to reduce if a 

            reduction was performed. 

        :rtype: Production or bool 

        """ 

        return self.reduce() or self.shift() 

 

    def shift(self): 

        """ 

        Move a token from the beginning of the remaining text to the 

        end of the stack.  If there are no more tokens in the 

        remaining text, then do nothing. 

 

        :return: True if the shift operation was successful. 

        :rtype: bool 

        """ 

        if len(self._remaining_text) == 0: return 0 

        self._history.append( (self._stack[:], self._remaining_text[:]) ) 

        self._shift(self._stack, self._remaining_text) 

        return 1 

 

    def reduce(self, production=None): 

        """ 

        Use ``production`` to combine the rightmost stack elements into 

        a single Tree.  If ``production`` does not match the 

        rightmost stack elements, then do nothing. 

 

        :return: The production used to reduce the stack, if a 

            reduction was performed.  If no reduction was performed, 

            return None. 

 

        :rtype: Production or None 

        """ 

        self._history.append( (self._stack[:], self._remaining_text[:]) ) 

        return_val = self._reduce(self._stack, self._remaining_text, 

                                  production) 

 

        if not return_val: self._history.pop() 

        return return_val 

 

    def undo(self): 

        """ 

        Return the parser to its state before the most recent 

        shift or reduce operation.  Calling ``undo`` repeatedly return 

        the parser to successively earlier states.  If no shift or 

        reduce operations have been performed, ``undo`` will make no 

        changes. 

 

        :return: true if an operation was successfully undone. 

        :rtype: bool 

        """ 

        if len(self._history) == 0: return 0 

        (self._stack, self._remaining_text) = self._history.pop() 

        return 1 

 

    def reducible_productions(self): 

        """ 

        :return: A list of the productions for which reductions are 

            available for the current parser state. 

        :rtype: list(Production) 

        """ 

        productions = [] 

        for production in self._grammar.productions(): 

            rhslen = len(production.rhs()) 

            if self._match_rhs(production.rhs(), self._stack[-rhslen:]): 

                productions.append(production) 

        return productions 

 

    def parses(self): 

        """ 

        :return: A list of the parses that have been found by this 

            parser so far. 

        :rtype: list of Tree 

        """ 

        if len(self._remaining_text) != 0: return [] 

        if len(self._stack) != 1: return [] 

        if self._stack[0].node != self._grammar.start().symbol(): 

            return [] 

        return self._stack 

 

# copied from nltk.parser 

 

    def set_grammar(self, grammar): 

        """ 

        Change the grammar used to parse texts. 

 

        :param grammar: The new grammar. 

        :type grammar: CFG 

        """ 

        self._grammar = grammar 

 

##////////////////////////////////////////////////////// 

##  Demonstration Code 

##////////////////////////////////////////////////////// 

 

def demo(): 

    """ 

    A demonstration of the shift-reduce parser. 

    """ 

 

    from nltk import parse, parse_cfg 

 

    grammar = parse_cfg(""" 

    S -> NP VP 

    NP -> Det N | Det N PP 

    VP -> V NP | V NP PP 

    PP -> P NP 

    NP -> 'I' 

    N -> 'man' | 'park' | 'telescope' | 'dog' 

    Det -> 'the' | 'a' 

    P -> 'in' | 'with' 

    V -> 'saw' 

    """) 

 

    sent = 'I saw a man in the park'.split() 

 

    parser = parse.ShiftReduceParser(grammar, trace=2) 

    for p in parser.nbest_parse(sent): 

        print(p) 

 

if __name__ == '__main__': 

    demo()