Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

# Natural Language Toolkit: Dependency Grammars 

# 

# Copyright (C) 2001-2012 NLTK Project 

# Author: Jason Narad <jason.narad@gmail.com> 

# 

# URL: <http://www.nltk.org/> 

# For license information, see LICENSE.TXT 

# 

from __future__ import print_function 

 

import math 

 

from nltk.compat import xrange 

from nltk.grammar import parse_dependency_grammar 

 

from .dependencygraph import DependencyGraph, conll_data2 

 

################################################################# 

# DependencyScorerI - Interface for Graph-Edge Weight Calculation 

################################################################# 

 

class DependencyScorerI(object): 

    """ 

    A scorer for calculated the weights on the edges of a weighted 

    dependency graph.  This is used by a 

    ``ProbabilisticNonprojectiveParser`` to initialize the edge 

    weights of a ``DependencyGraph``.  While typically this would be done 

    by training a binary classifier, any class that can return a 

    multidimensional list representation of the edge weights can 

    implement this interface.  As such, it has no necessary 

    fields. 

    """ 

 

    def __init__(self): 

        if self.__class__ == DependencyScorerI: 

            raise TypeError('DependencyScorerI is an abstract interface') 

 

    def train(self, graphs): 

        """ 

        :type graphs: list(DependencyGraph) 

        :param graphs: A list of dependency graphs to train the scorer. 

        Typically the edges present in the graphs can be used as 

        positive training examples, and the edges not present as negative 

        examples. 

        """ 

        raise NotImplementedError() 

 

    def score(self, graph): 

        """ 

        :type graph: DependencyGraph 

        :param graph: A dependency graph whose set of edges need to be 

        scored. 

        :rtype: A three-dimensional list of numbers. 

        :return: The score is returned in a multidimensional(3) list, such 

        that the outer-dimension refers to the head, and the 

        inner-dimension refers to the dependencies.  For instance, 

        scores[0][1] would reference the list of scores corresponding to 

        arcs from node 0 to node 1.  The node's 'address' field can be used 

        to determine its number identification. 

 

        For further illustration, a score list corresponding to Fig.2 of 

        Keith Hall's 'K-best Spanning Tree Parsing' paper: 

              scores = [[[], [5],  [1],  [1]], 

                       [[], [],   [11], [4]], 

                       [[], [10], [],   [5]], 

                       [[], [8],  [8],  []]] 

        When used in conjunction with a MaxEntClassifier, each score would 

        correspond to the confidence of a particular edge being classified 

        with the positive training examples. 

        """ 

        raise NotImplementedError() 

 

    #//////////////////////////////////////////////////////////// 

    # Comparisons 

    #//////////////////////////////////////////////////////////// 

 

    def __cmp__(self, other): 

        raise NotImplementedError() 

 

    def __hash__(self, other): 

        raise NotImplementedError() 

 

 

 

################################################################# 

# NaiveBayesDependencyScorer 

################################################################# 

 

class NaiveBayesDependencyScorer(DependencyScorerI): 

    """ 

    A dependency scorer built around a MaxEnt classifier.  In this 

    particular class that classifier is a ``NaiveBayesClassifier``. 

    It uses head-word, head-tag, child-word, and child-tag features 

    for classification. 

    """ 

 

    def __init__(self): 

        print() # Do nothing without throwing error? 

 

    def train(self, graphs): 

        """ 

        Trains a ``NaiveBayesClassifier`` using the edges present in 

        graphs list as positive examples, the edges not present as 

        negative examples.  Uses a feature vector of head-word, 

        head-tag, child-word, and child-tag. 

 

        :type graphs: list(DependencyGraph) 

        :param graphs: A list of dependency graphs to train the scorer. 

        """ 

 

        # Create training labeled training examples 

        labeled_examples = [] 

        for graph in graphs: 

            for head_node in graph.nodelist: 

                for child_index in range(len(graph.nodelist)): 

                    child_node = graph.get_by_address(child_index) 

                    if child_index in head_node['deps']: 

                        label = "T" 

                    else: 

                        label = "F" 

                    features = [head_node['word'], head_node['tag'], child_node['word'], child_node['tag']] 

                    labeled_examples.append((dict(a=head_node['word'],b=head_node['tag'],c=child_node['word'],d=child_node['tag']), label)) 

        # Train the classifier 

        import nltk 

        nltk.usage(nltk.ClassifierI) 

        self.classifier = nltk.classify.NaiveBayesClassifier.train(labeled_examples) 

 

    def score(self, graph): 

        """ 

        Converts the graph into a feature-based representation of 

        each edge, and then assigns a score to each based on the 

        confidence of the classifier in assigning it to the 

        positive label.  Scores are returned in a multidimensional list. 

 

        :type graph: DependencyGraph 

        :param graph: A dependency graph to score. 

        :rtype: 3 dimensional list 

        :return: Edge scores for the graph parameter. 

        """ 

        # Convert graph to feature representation 

        edges = [] 

        for i in range(len(graph.nodelist)): 

            for j in range(len(graph.nodelist)): 

                head_node = graph.get_by_address(i) 

                child_node = graph.get_by_address(j) 

                print(head_node) 

                print(child_node) 

                edges.append((dict(a=head_node['word'],b=head_node['tag'],c=child_node['word'],d=child_node['tag']))) 

        # Score edges 

        edge_scores = [] 

        row = [] 

        count = 0 

        for pdist in self.classifier.batch_prob_classify(edges): 

            print('%.4f %.4f' % (pdist.prob('T'), pdist.prob('F'))) 

            row.append([math.log(pdist.prob("T"))]) 

            count += 1 

            if count == len(graph.nodelist): 

                edge_scores.append(row) 

                row = [] 

                count = 0 

        return edge_scores 

 

 

################################################################# 

# A Scorer for Demo Purposes 

################################################################# 

# A short class necessary to show parsing example from paper 

class DemoScorer: 

 

    def train(self, graphs): 

        print('Training...') 

 

    def score(self, graph): 

        # scores for Keith Hall 'K-best Spanning Tree Parsing' paper 

        return [[[], [5],  [1],  [1]], 

                [[], [],   [11], [4]], 

                [[], [10], [],   [5]], 

                [[], [8],  [8],  []]] 

 

################################################################# 

# Non-Projective Probabilistic Parsing 

################################################################# 

 

class ProbabilisticNonprojectiveParser(object): 

    """ 

    A probabilistic non-projective dependency parser.  Nonprojective 

    dependencies allows for "crossing branches" in the parse tree 

    which is necessary for representing particular linguistic 

    phenomena, or even typical parses in some languages.  This parser 

    follows the MST parsing algorithm, outlined in McDonald(2005), 

    which likens the search for the best non-projective parse to 

    finding the maximum spanning tree in a weighted directed graph. 

    """ 

    def __init__(self): 

        """ 

        Creates a new non-projective parser. 

        """ 

        print('initializing prob. nonprojective...') 

 

    def train(self, graphs, dependency_scorer): 

        """ 

        Trains a ``DependencyScorerI`` from a set of ``DependencyGraph`` objects, 

        and establishes this as the parser's scorer.  This is used to 

        initialize the scores on a ``DependencyGraph`` during the parsing 

        procedure. 

 

        :type graphs: list(DependencyGraph) 

        :param graphs: A list of dependency graphs to train the scorer. 

        :type dependency_scorer: DependencyScorerI 

        :param dependency_scorer: A scorer which implements the 

            ``DependencyScorerI`` interface. 

        """ 

        self._scorer = dependency_scorer 

        self._scorer.train(graphs) 

 

    def initialize_edge_scores(self, graph): 

        """ 

        Assigns a score to every edge in the ``DependencyGraph`` graph. 

        These scores are generated via the parser's scorer which 

        was assigned during the training process. 

 

        :type graph: DependencyGraph 

        :param graph: A dependency graph to assign scores to. 

        """ 

        self.scores = self._scorer.score(graph) 

 

    def collapse_nodes(self, new_node, cycle_path, g_graph, b_graph, c_graph): 

        """ 

        Takes a list of nodes that have been identified to belong to a cycle, 

        and collapses them into on larger node.  The arcs of all nodes in 

        the graph must be updated to account for this. 

 

        :type new_node: Node. 

        :param new_node: A Node (Dictionary) to collapse the cycle nodes into. 

        :type cycle_path: A list of integers. 

        :param cycle_path: A list of node addresses, each of which is in the cycle. 

        :type g_graph, b_graph, c_graph: DependencyGraph 

        :param g_graph, b_graph, c_graph: Graphs which need to be updated. 

        """ 

        print('Collapsing nodes...') 

        # Collapse all cycle nodes into v_n+1 in G_Graph 

        for cycle_node_index in cycle_path: 

            g_graph.remove_by_address(cycle_node_index) 

        g_graph.nodelist.append(new_node) 

        g_graph.redirect_arcs(cycle_path, new_node['address']) 

 

    def update_edge_scores(self, new_node, cycle_path): 

        """ 

        Updates the edge scores to reflect a collapse operation into 

        new_node. 

 

        :type new_node: A Node. 

        :param new_node: The node which cycle nodes are collapsed into. 

        :type cycle_path: A list of integers. 

        :param cycle_path: A list of node addresses that belong to the cycle. 

        """ 

        print('cycle', cycle_path) 

        cycle_path = self.compute_original_indexes(cycle_path) 

        print('old cycle ', cycle_path) 

        print('Prior to update:\n', self.scores) 

        for i, row in enumerate(self.scores): 

            for j, column in enumerate(self.scores[i]): 

                print(self.scores[i][j]) 

                if j in cycle_path and not i in cycle_path and len(self.scores[i][j]) > 0: 

                    new_vals = [] 

                    subtract_val = self.compute_max_subtract_score(j, cycle_path) 

                    print(self.scores[i][j], ' - ', subtract_val) 

                    for cur_val in self.scores[i][j]: 

                        new_vals.append(cur_val - subtract_val) 

                    self.scores[i][j] = new_vals 

        for i, row in enumerate(self.scores): 

            for j, cell in enumerate(self.scores[i]): 

                if i in cycle_path and j in cycle_path: 

                    self.scores[i][j] = [] 

        print('After update:\n', self.scores) 

 

    def compute_original_indexes(self, new_indexes): 

        """ 

        As nodes are collapsed into others, they are replaced 

        by the new node in the graph, but it's still necessary 

        to keep track of what these original nodes were.  This 

        takes a list of node addresses and replaces any collapsed 

        node addresses with their original addresses. 

 

        :type new_address: A list of integers. 

        :param new_addresses: A list of node addresses to check for 

        subsumed nodes. 

        """ 

        swapped = True 

        while(swapped): 

            originals = [] 

            swapped = False 

            for new_index in new_indexes: 

                if new_index in self.inner_nodes: 

                    for old_val in self.inner_nodes[new_index]: 

                        if not old_val in originals: 

                            originals.append(old_val) 

                            swapped = True 

                else: 

                    originals.append(new_index) 

            new_indexes = originals 

        return new_indexes 

 

    def compute_max_subtract_score(self, column_index, cycle_indexes): 

        """ 

        When updating scores the score of the highest-weighted incoming 

        arc is subtracted upon collapse.  This returns the correct 

        amount to subtract from that edge. 

 

        :type column_index: integer. 

        :param column_index: A index representing the column of incoming arcs 

        to a particular node being updated 

        :type cycle_indexes: A list of integers. 

        :param cycle_indexes: Only arcs from cycle nodes are considered.  This 

        is a list of such nodes addresses. 

        """ 

        max_score = -100000 

        for row_index in cycle_indexes: 

            for subtract_val in self.scores[row_index][column_index]: 

                if subtract_val > max_score: 

                    max_score = subtract_val 

        return max_score 

 

 

    def best_incoming_arc(self, node_index): 

        """ 

        Returns the source of the best incoming arc to the 

        node with address: node_index 

 

        :type node_index: integer. 

        :param node_index: The address of the 'destination' node, 

        the node that is arced to. 

        """ 

        originals = self.compute_original_indexes([node_index]) 

        print('originals:', originals) 

        max_arc = None 

        max_score = None 

        for row_index in range(len(self.scores)): 

            for col_index in range(len(self.scores[row_index])): 

#               print self.scores[row_index][col_index] 

                if col_index in originals and self.scores[row_index][col_index] > max_score: 

                    max_score = self.scores[row_index][col_index] 

                    max_arc = row_index 

                    print(row_index, ',', col_index) 

        print(max_score) 

        for key in self.inner_nodes: 

            replaced_nodes = self.inner_nodes[key] 

            if max_arc in replaced_nodes: 

                return key 

        return max_arc 

 

    def original_best_arc(self, node_index): 

        """ 

        ??? 

        """ 

        originals = self.compute_original_indexes([node_index]) 

        max_arc = None 

        max_score = None 

        max_orig = None 

        for row_index in range(len(self.scores)): 

            for col_index in range(len(self.scores[row_index])): 

                if col_index in originals and self.scores[row_index][col_index] > max_score: 

                    max_score = self.scores[row_index][col_index] 

                    max_arc = row_index 

                    max_orig = col_index 

        return [max_arc, max_orig] 

 

 

    def parse(self, tokens, tags): 

        """ 

        Parses a list of tokens in accordance to the MST parsing algorithm 

        for non-projective dependency parses.  Assumes that the tokens to 

        be parsed have already been tagged and those tags are provided.  Various 

        scoring methods can be used by implementing the ``DependencyScorerI`` 

        interface and passing it to the training algorithm. 

 

        :type tokens: list(str) 

        :param tokens: A list of words or punctuation to be parsed. 

        :type tags: list(str) 

        :param tags: A list of tags corresponding by index to the words in the tokens list. 

        """ 

        self.inner_nodes = {} 

        # Initialize g_graph 

        g_graph = DependencyGraph() 

        for index, token in enumerate(tokens): 

            g_graph.nodelist.append({'word':token, 'tag':tags[index], 'deps':[], 'rel':'NTOP', 'address':index+1}) 

        # Fully connect non-root nodes in g_graph 

        g_graph.connect_graph() 

        original_graph = DependencyGraph() 

        for index, token in enumerate(tokens): 

            original_graph.nodelist.append({'word':token, 'tag':tags[index], 'deps':[], 'rel':'NTOP', 'address':index+1}) 

 

        # Initialize b_graph 

        b_graph = DependencyGraph() 

        b_graph.nodelist = [] 

        # Initialize c_graph 

        c_graph = DependencyGraph() 

        c_graph.nodelist = [{'word':token, 'tag':tags[index], 'deps':[], 

                             'rel':'NTOP', 'address':index+1} 

                            for index, token in enumerate(tokens)] 

        # Assign initial scores to g_graph edges 

        self.initialize_edge_scores(g_graph) 

        print(self.scores) 

        # Initialize a list of unvisited vertices (by node address) 

        unvisited_vertices = [vertex['address'] for vertex in c_graph.nodelist] 

        # Iterate over unvisited vertices 

        nr_vertices = len(tokens) 

        betas = {} 

        while(len(unvisited_vertices) > 0): 

            # Mark current node as visited 

            current_vertex = unvisited_vertices.pop(0) 

            print('current_vertex:', current_vertex) 

            # Get corresponding node n_i to vertex v_i 

            current_node = g_graph.get_by_address(current_vertex) 

            print('current_node:', current_node) 

            # Get best in-edge node b for current node 

            best_in_edge = self.best_incoming_arc(current_vertex) 

            betas[current_vertex] = self.original_best_arc(current_vertex) 

            print('best in arc: ', best_in_edge, ' --> ', current_vertex) 

            # b_graph = Union(b_graph, b) 

            for new_vertex in [current_vertex, best_in_edge]: 

                b_graph.add_node({'word':'TEMP', 'deps':[], 'rel': 'NTOP', 'address': new_vertex}) 

            b_graph.add_arc(best_in_edge, current_vertex) 

            # Beta(current node) = b  - stored for parse recovery 

            # If b_graph contains a cycle, collapse it 

            cycle_path = b_graph.contains_cycle() 

            if cycle_path: 

            # Create a new node v_n+1 with address = len(nodes) + 1 

                new_node = {'word': 'NONE', 'deps':[], 'rel': 'NTOP', 'address': nr_vertices + 1} 

            # c_graph = Union(c_graph, v_n+1) 

                c_graph.add_node(new_node) 

            # Collapse all nodes in cycle C into v_n+1 

                self.update_edge_scores(new_node, cycle_path) 

                self.collapse_nodes(new_node, cycle_path, g_graph, b_graph, c_graph) 

                for cycle_index in cycle_path: 

                    c_graph.add_arc(new_node['address'], cycle_index) 

#                   self.replaced_by[cycle_index] = new_node['address'] 

 

                self.inner_nodes[new_node['address']] = cycle_path 

 

            # Add v_n+1 to list of unvisited vertices 

                unvisited_vertices.insert(0, nr_vertices + 1) 

            # increment # of nodes counter 

                nr_vertices += 1 

            # Remove cycle nodes from b_graph; B = B - cycle c 

                for cycle_node_address in cycle_path: 

                    b_graph.remove_by_address(cycle_node_address) 

            print('g_graph:\n', g_graph) 

            print() 

            print('b_graph:\n', b_graph) 

            print() 

            print('c_graph:\n', c_graph) 

            print() 

            print('Betas:\n', betas) 

            print('replaced nodes', self.inner_nodes) 

            print() 

        #Recover parse tree 

        print('Final scores:\n', self.scores) 

        print('Recovering parse...') 

        for i in range(len(tokens) + 1, nr_vertices + 1): 

            betas[betas[i][1]] = betas[i] 

        print('Betas: ', betas) 

        new_graph = DependencyGraph() 

        for node in original_graph.nodelist: 

            node['deps'] = [] 

        for i in range(1, len(tokens) + 1): 

#           print i, betas[i] 

            original_graph.add_arc(betas[i][0], betas[i][1]) 

#       print original_graph 

        return original_graph 

        print('Done.') 

 

 

 

################################################################# 

# Rule-based Non-Projective Parser 

################################################################# 

 

class NonprojectiveDependencyParser(object): 

    """ 

    A non-projective, rule-based, dependency parser.  This parser 

    will return the set of all possible non-projective parses based on 

    the word-to-word relations defined in the parser's dependency 

    grammar, and will allow the branches of the parse tree to cross 

    in order to capture a variety of linguistic phenomena that a 

    projective parser will not. 

    """ 

 

    def __init__(self, dependency_grammar): 

        """ 

        Creates a new ``NonprojectiveDependencyParser``. 

 

        :param dependency_grammar: a grammar of word-to-word relations. 

        :type depenedncy_grammar: DependencyGrammar 

            """ 

        self._grammar = dependency_grammar 

 

    def parse(self, tokens): 

        """ 

        Parses the input tokens with respect to the parser's grammar.  Parsing 

        is accomplished by representing the search-space of possible parses as 

        a fully-connected directed graph.  Arcs that would lead to ungrammatical 

        parses are removed and a lattice is constructed of length n, where n is 

        the number of input tokens, to represent all possible grammatical 

        traversals.  All possible paths through the lattice are then enumerated 

        to produce the set of non-projective parses. 

 

        param tokens: A list of tokens to parse. 

        type tokens: list(str) 

        return: A set of non-projective parses. 

        rtype: list(DependencyGraph) 

        """ 

        # Create graph representation of tokens 

        self._graph = DependencyGraph() 

        self._graph.nodelist = []  # Remove the default root 

        for index, token in enumerate(tokens): 

            self._graph.nodelist.append({'word':token, 'deps':[], 'rel':'NTOP', 'address':index}) 

        for head_node in self._graph.nodelist: 

            deps = [] 

            for dep_node in self._graph.nodelist: 

                if self._grammar.contains(head_node['word'], dep_node['word']) and not head_node['word'] == dep_node['word']: 

                    deps.append(dep_node['address']) 

            head_node['deps'] = deps 

        # Create lattice of possible heads 

        roots = [] 

        possible_heads = [] 

        for i, word in enumerate(tokens): 

            heads = [] 

            for j, head in enumerate(tokens): 

                if (i != j) and self._grammar.contains(head, word): 

                    heads.append(j) 

            if len(heads) == 0: 

                roots.append(i) 

            possible_heads.append(heads) 

        # Set roots to attempt 

        if len(roots) > 1: 

            print("No parses found.") 

            return False 

        elif len(roots) == 0: 

            for i in range(len(tokens)): 

                roots.append(i) 

        # Traverse lattice 

        analyses = [] 

        for root in roots: 

            stack = [] 

            analysis = [[] for i in range(len(possible_heads))] 

            i = 0 

            forward = True 

            while(i >= 0): 

                if forward: 

                    if len(possible_heads[i]) == 1: 

                        analysis[i] = possible_heads[i][0] 

                    elif len(possible_heads[i]) == 0: 

                        analysis[i] = -1 

                    else: 

                        head = possible_heads[i].pop() 

                        analysis[i] = head 

                        stack.append([i, head]) 

                if not forward: 

                    index_on_stack = False 

                    for stack_item in stack: 

#                       print stack_item 

                        if stack_item[0] == i: 

                            index_on_stack = True 

                    orig_length = len(possible_heads[i]) 

#                    print len(possible_heads[i]) 

                    if index_on_stack and orig_length == 0: 

                        for j in xrange(len(stack) -1, -1, -1): 

                            stack_item = stack[j] 

                            if stack_item[0] == i: 

                                possible_heads[i].append(stack.pop(j)[1]) 

#                        print stack 

                    elif index_on_stack and orig_length > 0: 

                        head = possible_heads[i].pop() 

                        analysis[i] = head 

                        stack.append([i, head]) 

                        forward = True 

 

#                   print 'Index on stack:', i, index_on_stack 

                if i + 1 == len(possible_heads): 

                    analyses.append(analysis[:]) 

                    forward = False 

                if forward: 

                    i += 1 

                else: 

                    i -= 1 

        # Filter parses 

        graphs = [] 

        #ensure 1 root, every thing has 1 head 

        for analysis in analyses: 

            root_count = 0 

            root = [] 

            for i, cell in enumerate(analysis): 

                if cell == -1: 

                    root_count += 1 

                    root = i 

            if root_count == 1: 

                graph = DependencyGraph() 

                graph.nodelist[0]['deps'] = root + 1 

                for i in range(len(tokens)): 

                    node = {'word':tokens[i], 'address':i+1} 

                    node['deps'] = [j+1 for j in range(len(tokens)) if analysis[j] == i] 

                    graph.nodelist.append(node) 

#                cycle = graph.contains_cycle() 

#                if not cycle: 

                graphs.append(graph) 

        return graphs 

 

 

################################################################# 

# Demos 

################################################################# 

 

def demo(): 

#   hall_demo() 

    nonprojective_conll_parse_demo() 

    rule_based_demo() 

 

 

def hall_demo(): 

    npp = ProbabilisticNonprojectiveParser() 

    npp.train([], DemoScorer()) 

    parse_graph = npp.parse(['v1', 'v2', 'v3'], [None, None, None]) 

    print(parse_graph) 

 

def nonprojective_conll_parse_demo(): 

    graphs = [DependencyGraph(entry) 

              for entry in conll_data2.split('\n\n') if entry] 

    npp = ProbabilisticNonprojectiveParser() 

    npp.train(graphs, NaiveBayesDependencyScorer()) 

    parse_graph = npp.parse(['Cathy', 'zag', 'hen', 'zwaaien', '.'], ['N', 'V', 'Pron', 'Adj', 'N', 'Punc']) 

    print(parse_graph) 

 

def rule_based_demo(): 

    grammar = parse_dependency_grammar(""" 

    'taught' -> 'play' | 'man' 

    'man' -> 'the' | 'in' 

    'in' -> 'corner' 

    'corner' -> 'the' 

    'play' -> 'golf' | 'dachshund' | 'to' 

    'dachshund' -> 'his' 

    """) 

    print(grammar) 

    ndp = NonprojectiveDependencyParser(grammar) 

    graphs = ndp.parse(['the', 'man', 'in', 'the', 'corner', 'taught', 'his', 'dachshund', 'to', 'play', 'golf']) 

    print('Graphs:') 

    for graph in graphs: 

        print(graph) 

 

if __name__ == '__main__': 

    demo()