Coverage for nltk.classify.tadm : 46%
![](keybd_closed.png)
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
# Natural Language Toolkit: Interface to TADM Classifier # # Copyright (C) 2001-2012 NLTK Project # Author: Joseph Frazee <jfrazee@mail.utexas.edu> # URL: <http://www.nltk.org/> # For license information, see LICENSE.TXT
except ImportError: numpy = None
global _tadm_bin 'tadm', bin, env_vars=['TADM_DIR'], binary_names=['tadm'], url='http://tadm.sf.net')
""" Generate an input file for ``tadm`` based on the given corpus of classified tokens.
:type train_toks: list(tuple(dict, str)) :param train_toks: Training data, represented as a list of pairs, the first member of which is a feature dictionary, and the second of which is a classification label. :type encoding: TadmEventMaxentFeatureEncoding :param encoding: A feature encoding, used to convert featuresets into feature vectors. :type stream: stream :param stream: The stream to which the ``tadm`` input file should be written. """ # See the following for a file format description: # # http://sf.net/forum/forum.php?thread_id=1391502&forum_id=473054 # http://sf.net/forum/forum.php?thread_id=1675097&forum_id=473054 ' '.join('%d %d' % u for u in v)))
""" Given the stdout output generated by ``tadm`` when training a model, return a ``numpy`` array containing the corresponding weight vector. """ weights = [] for line in paramfile: weights.append(float(line.strip())) return numpy.array(weights, 'd')
""" Call the ``tadm`` binary with the given arguments. """ raise TypeError('args should be a list of strings')
# Call tadm via a subprocess cmd = [_tadm_bin] + args p = subprocess.Popen(cmd, stdout=sys.stdout) (stdout, stderr) = p.communicate()
# Check the return code. if p.returncode != 0: print() print(stderr) raise OSError('tadm command failed!')
from nltk.classify.util import names_demo from nltk.classify.maxent import TadmMaxentClassifier classifier = names_demo(TadmMaxentClassifier.train)
import sys from nltk.classify.maxent import TadmEventMaxentFeatureEncoding from nltk.classify.tadm import write_tadm_file tokens = [({'f0':1, 'f1':1, 'f3':1}, 'A'), ({'f0':1, 'f2':1, 'f4':1}, 'B'), ({'f0':2, 'f2':1, 'f3':1, 'f4':1}, 'A')] encoding = TadmEventMaxentFeatureEncoding.train(tokens) write_tadm_file(tokens, encoding, sys.stdout) print() for i in range(encoding.length()): print('%s --> %d' % (encoding.describe(i), i)) print()
encoding_demo() names_demo() |