Coverage for nltk.parse.featurechart : 91%
![](keybd_closed.png)
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
# -*- coding: utf-8 -*- # Natural Language Toolkit: Chart Parser for Feature-Based Grammars # # Copyright (C) 2001-2012 NLTK Project # Author: Rob Speer <rspeer@mit.edu> # Peter Ljunglöf <peter.ljunglof@heatherleaf.se> # URL: <http://www.nltk.org/> # For license information, see LICENSE.TXT
Extension of chart parsing implementation to handle grammars with feature structures as nodes. """
FeatStructNonterminal, is_nonterminal, is_terminal) FundamentalRule, LeafInitRule, EmptyPredictRule, BottomUpPredictRule, SingleEdgeFundamentalRule, BottomUpPredictCombineRule, CachedTopDownPredictRule, TopDownInitRule)
#//////////////////////////////////////////////////////////// # Tree Edge #////////////////////////////////////////////////////////////
""" A specialized tree edge that allows shared variable bindings between nonterminals on the left-hand side and right-hand side.
Each ``FeatureTreeEdge`` contains a set of ``bindings``, i.e., a dictionary mapping from variables to values. If the edge is not complete, then these bindings are simply stored. However, if the edge is complete, then the constructor applies these bindings to every nonterminal in the edge whose symbol implements the interface ``SubstituteBindingsI``. """ """ Construct a new edge. If the edge is incomplete (i.e., if ``dot<len(rhs)``), then store the bindings as-is. If the edge is complete (i.e., if ``dot==len(rhs)``), then apply the bindings to all nonterminals in ``lhs`` and ``rhs``, and then clear the bindings. See ``TreeEdge`` for a description of the other arguments. """
# If the edge is complete, then substitute in the bindings, # and then throw them away. (If we didn't throw them away, we # might think that 2 complete edges are different just because # they have different bindings, even though all bindings have # already been applied.)
# Initialize the edge.
def from_production(production, index): """ :return: A new ``TreeEdge`` formed from the given production. The new edge's left-hand side and right-hand side will be taken from ``production``; its span will be ``(index,index)``; and its dot position will be ``0``. :rtype: TreeEdge """ rhs=production.rhs(), dot=0)
""" :return: A new ``FeatureTreeEdge`` formed from this edge. The new edge's dot position is increased by ``1``, and its end index will be replaced by ``new_end``. :rtype: FeatureTreeEdge :param new_end: The new end index. :type new_end: int :param bindings: Bindings for the new edge. :type bindings: dict """ lhs=self._lhs, rhs=self._rhs, dot=self._dot+1, bindings=bindings)
""" Return a copy of this edge's bindings dictionary. """
""" :return: The set of variables used by this edge. :rtype: set(Variable) """ self._bindings.keys() + self._bindings.values(), fs_class=FeatStruct)
else: sorted(self._bindings.items()))
# two edges with different bindings are not equal. self._dot, self._bindings), (other._span, other._lhs, other._rhs, other._dot, other._bindings))
# cache this:? tuple(sorted(self._bindings))))
#//////////////////////////////////////////////////////////// # A specialized Chart for feature grammars #////////////////////////////////////////////////////////////
# TODO: subsumes check when adding new edges
""" A Chart for feature grammars. :see: ``Chart`` for more information. """
""" Returns an iterator over the edges in this chart. See ``Chart.select`` for more information about the ``restrictions`` on the edges. """ # If there are no restrictions, then return all edges.
# Find the index corresponding to the given restrictions.
# If it doesn't exist, then create it.
for key in restr_keys)
""" A helper function for ``select``, which creates a new index for a given set of attributes (aka restriction keys). """ # Make sure it's a valid index. raise ValueError('Bad restriction: %s' % key)
# Create the index.
# Add all existing edges to the index. for key in restr_keys)
""" A helper function for ``insert``, which registers the new edge with all existing indexes. """ for key in restr_keys)
""" Helper function which returns the ``TYPE`` feature of the ``item``, if it exists, otherwise it returns the ``item`` itself """ else:
(edge.lhs()[TYPE] == start[TYPE]) and (unify(edge.lhs(), start, rename_vars=True)) ):
#//////////////////////////////////////////////////////////// # Fundamental Rule #////////////////////////////////////////////////////////////
""" A specialized version of the fundamental rule that operates on nonterminals whose symbols are ``FeatStructNonterminal``s. Rather tha simply comparing the nonterminals for equality, they are unified. Variable bindings from these unifications are collected and stored in the chart using a ``FeatureTreeEdge``. When a complete edge is generated, these bindings are applied to all nonterminals in the edge.
The fundamental rule states that:
- ``[A -> alpha \* B1 beta][i:j]`` - ``[B2 -> gamma \*][j:k]``
licenses the edge:
- ``[A -> alpha B3 \* beta][i:j]``
assuming that B1 and B2 can be unified to generate B3. """ # Make sure the rule is applicable. left_edge.is_incomplete() and right_edge.is_complete() and isinstance(left_edge, FeatureTreeEdge)): return # Create a copy of the bindings. # We rename vars here, because we don't want variables # from the two different productions to match. # Unify B1 (left_edge.next) with B2 (right_edge.lhs) to # generate B3 (result). else: # Create a copy of the bindings.
# Construct the new edge.
# Add it to the chart, with appropriate child pointers.
""" A specialized version of the completer / single edge fundamental rule that operates on nonterminals whose symbols are ``FeatStructNonterminal``s. Rather than simply comparing the nonterminals for equality, they are unified. """
is_complete=False, next=right_edge.lhs()):
is_complete=True, lhs=next(left_edge)):
#//////////////////////////////////////////////////////////// # Top-Down Prediction #////////////////////////////////////////////////////////////
""" A specialized version of the (cached) top down predict rule that operates on nonterminals whose symbols are ``FeatStructNonterminal``s. Rather than simply comparing the nonterminals for equality, they are unified.
The top down expand rule states that:
- ``[A -> alpha \* B1 beta][i:j]``
licenses the edge:
- ``[B2 -> \* gamma][j:j]``
for each grammar production ``B2 -> gamma``, assuming that B1 and B2 can be unified. """
# If we've already applied this rule to an edge with the same # next & end, and the chart & grammar have not changed, then # just return (no new edges to add).
# If the left corner in the predicted production is # leaf, it must match with the input.
# We rename vars here, because we don't want variables # from the two different productions to match.
# Record the fact that we've applied this rule.
#//////////////////////////////////////////////////////////// # Bottom-Up Prediction #////////////////////////////////////////////////////////////
# We rename vars here, because we don't want variables # from the two different productions to match. fs_class=FeatStruct)
.move_dot_forward(edge.end(), bindings))
#//////////////////////////////////////////////////////////// # Feature Chart Parser #////////////////////////////////////////////////////////////
FeatureTopDownInitRule(), FeatureTopDownPredictRule(), FeatureSingleEdgeFundamentalRule()] FeatureEmptyPredictRule(), FeatureBottomUpPredictRule(), FeatureSingleEdgeFundamentalRule()] FeatureEmptyPredictRule(), FeatureBottomUpPredictCombineRule(), FeatureSingleEdgeFundamentalRule()]
strategy=BU_LC_FEATURE_STRATEGY, trace_chart_width=20, chart_class=FeatureChart, **parser_args): strategy=strategy, trace_chart_width=trace_chart_width, chart_class=chart_class, **parser_args)
#//////////////////////////////////////////////////////////// # Instantiate Variable Chart #////////////////////////////////////////////////////////////
""" A specialized chart that 'instantiates' variables whose names start with '@', by replacing them with unique new variables. In particular, whenever a complete edge is added to the chart, any variables in the edge's ``lhs`` whose names start with '@' will be replaced by unique new ``Variable``s. """
""" If the edge is a ``FeatureTreeEdge``, and it is complete, then instantiate all variables whose names start with '@', by replacing them with unique new variables.
Note that instantiation is done in-place, since the parsing algorithms might already hold a reference to the edge for future use. """ # If the edge is a leaf, or is not complete, or is # already in the chart, then just return it as-is.
# Get a list of variables that need to be instantiated. # If there are none, then return as-is.
# Instantiate the edge!
for var in edge.lhs().variables() if var.name.startswith('@'))
#//////////////////////////////////////////////////////////// # Demo #////////////////////////////////////////////////////////////
S -> NP VP PP -> Prep NP NP -> NP PP VP -> VP PP VP -> Verb NP VP -> Verb NP -> Det[pl=?x] Noun[pl=?x] NP -> "John" NP -> "I" Det -> "the" Det -> "my" Det[-pl] -> "a" Noun[-pl] -> "dog" Noun[-pl] -> "cookie" Verb -> "ate" Verb -> "saw" Prep -> "with" Prep -> "under" """)
should_print_trees=True, should_print_sentence=True, trace=1, parser=FeatureChartParser, sent='I saw John with a dog with my cookie'): print("Time: %s" % (time.clock() - t)) else: print("Nr trees:", len(trees))
import profile profile.run('for i in range(1): demo()', '/tmp/profile.out') import pstats p = pstats.Stats('/tmp/profile.out') p.strip_dirs().sort_stats('time', 'cum').print_stats(60) p.strip_dirs().sort_stats('cum', 'time').print_stats(60)
from nltk.data import load demo() print() grammar = load('grammars/book_grammars/feat0.fcfg') cp = FeatureChartParser(grammar, trace=2) sent = 'Kim likes children' tokens = sent.split() trees = cp.nbest_parse(tokens) for tree in trees: print(tree) |