#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 06 20:00:00 2015
@author: tastyminerals@gmail.com
"""
from __future__ import division
from collections import Counter
import re
import os
import sys
import threading as thr
import multiprocessing as mproc
import ttk
import Tkinter as tk
import tkFileDialog as tkf
import tkMessageBox
import Queue
import shutil
from string import punctuation as punct
from PIL import ImageTk as itk
import webbrowser
import model
import query
import pos_tagger
# setting resources dir
RESDIR = os.path.dirname(os.path.realpath(__file__))
# Set interface fonts
# Helvetica, Times, Arial, Georgia, Tahoma, Verdana
TKFONT = 'TkDefaultFont 10'
TKTEXT_FONT = 'Sans 10'
# remove the underscore from punctuation var
PUNCT = re.sub('_', '', punct)
[docs]def set_win_icon(window, icon_path):
"""Set a custom icon for a given window."""
img = itk.PhotoImage(file=icon_path)
window.tk.call('wm', 'iconphoto', window._w, img)
[docs]def fnode(*args):
"""Convert re matches into Tkinter format"""
start_mark = ''.join([str(args[0]), '.', str(args[1])])
end_mark = ''.join([str(args[0]), '.', str(args[2])])
return start_mark, end_mark
[docs]def handle_punct(matched_str):
"""
Do no add \b if the token starts or ends with a punctuation sign.
Also escape any sensitive punctuation.
<Obviously, this will not protect from incorrect highlighting in all cases,
but it will reduce them significantly.>
Args:
*matched_str* (str) -- string matched by a query
Returns:
(str) -- with sensitive punctuation removed
"""
# handle sensitive punctuation
esc_matched = []
for char in matched_str:
if char in PUNCT:
esc_matched.append('\\'+char)
else:
esc_matched.append(char)
matched_str = ''.join(esc_matched)
# handle word boundaries
if matched_str[0] in PUNCT or matched_str[-1] in PUNCT:
return re.escape(matched_str)
return r'\b' + matched_str + r'\b'
[docs]class NNSearch(ttk.Frame):
"""UI class that handles all user interactions."""
def __init__(self, master):
self.query = '' # user query
# stats vars
self.fully_tagged_sents = {} # fully POS-tagged sents dict
self.stats_ready = False # used not to recalc stats
self.graphs_ready = False # used not to recalc graphs
self.sstats_ready = False # do not recalc search stats
self.textstats = {}
self.sstats = {}
# processing threads
self.processed = False # clicked 'Process!' button
self.model_queue = Queue.PriorityQueue()
self.process_thread = None
self.stats_thread = None
self.graphs_thread = None
self.sstats_thread = None
self.pos_tagger_proc = None
# text and file vars
self.loaded_text = None
self.is_file_loaded = False
self.current_fname = 'text_field' # currently processed file
# results vars
self.model_results = None
self.process_results = None
self.matches = None
self.view1_text_pos = ''
self.view2_text = ''
self.view2_text_pos = ''
self.view3_text = ''
self.view3_text_pos = ''
# right label headers for Number stats pop-up window
self.num_rlabl = '{0}\n{1}\n{2}\n---------\n{3}\n{4}\n{5}\n{6}'
# right label headers for search stats pop-up window
self.ss_rlabl = '{0}\n{1}\n{2}\n'
# pos-tagger vars
self.pos_fpath = ''
self.pos_loaded_text = ''
self.pos_dir_path = RESDIR
self.pos_out_dir_path = ''
# build UI
self.clean_up()
ttk.Frame.__init__(self, master)
# resizing main UI
self.grid(sticky='nsew')
self.columnconfigure(0, weight=1)
self.columnconfigure(1, weight=1)
self.rowconfigure(0, weight=1)
self.rowconfigure(1, weight=1)
self.rowconfigure(2, weight=1)
self.rowconfigure(3, weight=1)
master.minsize(height=300, width=400)
self._build_gui()
# read Penn Treebank tags description
self.short_treebank, self.longer_treebank = model.get_penn_treebank()
# init found strings for find window
self.ffound = ''
# init found strings cache for find window
self.prev_found_cache = []
[docs] def press_return(self, *args):
"""
Trigger query processing when <Enter> or "Search" button is pressed.
"""
# handle exceptions
if not self.processed and (self.Text.edit_modified() or
self.is_file_loaded):
self.show_message('Please click "Process!" button', 'warning.png')
return
elif not self.processed and (not self.Text.edit_modified() and
not self.is_file_loaded):
self.show_message('No text data provided!', 'error.png')
return
# print self.fully_tagged_sents
self.query = self.Entry.get().strip() # get query from entry widget
# self.Entry.delete(0, 'end') # removes query from entry widget
valid = query.preprocess_query(self.query, self.short_treebank)
if valid and valid[0] == 1:
msg = 'Incorrect query syntax in: %s' % valid[1]
self.show_message(msg, 'error.png')
return
if valid and valid[0] == 2:
msg = 'Incorrect POS-tag used: %s' % valid[1]
self.show_message(msg, 'error.png')
return
# find query matches
matches = query.find_matches(valid, self.fully_tagged_sents)
self.matches = matches
pos, view = self.get_opts()
if matches and not any([m for m in matches.values() if m]):
msg = 'No matches found \n revise you query'
self.show_message(msg, 'sad.png')
# reset highighting
self.Text.tag_delete('style')
return
# if POS-tags selected, add POS-tags to text
elif not self.query and pos and view == 1:
self.Text.tag_delete('style')
self.prepare_view1()
self.highlighter(matches)
return
elif not self.query and not pos and view == 1:
self.Text.tag_delete('style')
self.prepare_view1()
self.highlighter(matches)
elif not self.query and view == 2:
self.Text.tag_delete('style')
self.prepare_view2(matches)
self.highlighter(matches)
return
elif not self.query and view == 3:
self.Text.tag_delete('style')
self.prepare_view3(matches)
self.highlighter(matches)
return
elif matches:
# prepare results and add pos-tags to results
self.prepare_view1()
self.prepare_view2(matches)
self.prepare_view3(matches)
# reset search stats
self.set_search_stats_ready(False)
# insert the results
self.highlighter(matches)
[docs] def ctrl_a(self, callback=False):
"""
Select all in entry or text widget.
Overwrite tkinter default 'ctrl+/' keybind.
"""
# checking which text widget has focus
if self.Entry is self.focus_get():
self.Entry.select_range(0, 'end')
elif self.Text is self.focus_get():
self.Text.tag_add('sel', '1.0', 'end')
return 'break'
[docs] def ctrl_c(self, callback=False):
"""
Copy selected text.
Overwrite tkinter default keybind.
"""
try:
# checking which text widget has focus
if self.Entry is self.focus_get():
self.clipboard_clear()
text = self.Entry.get(tk.SEL_FIRST, tk.SEL_LAST)
self.clipboard_append(text)
elif self.Text is self.focus_get():
self.clipboard_clear()
text = self.Text.get(tk.SEL_FIRST, tk.SEL_LAST)
self.clipboard_append(text)
return 'break'
except TypeError:
print 'Nothing to copy.'
return 'break'
[docs] def ctrl_d(self, callback=False):
"""
Delete character.
"""
if self.Entry is self.focus_get():
self.Entry.delete(tk.SEL_FIRST, tk.SEL_LAST)
elif self.Text is self.focus_get():
self.Text.delete(tk.SEL_FIRST, tk.SEL_LAST)
return 'break'
[docs] def ctrl_s(self, callback=False):
"""
Save text in the Entry widget.
"""
if self.Text is self.focus_get():
self.save_data()
return 'break'
[docs] def ctrl_x(self, callback=False):
"""
Cut selected text.
Overwrite tkinter keybind.
"""
# checking which text widget has focus
if self.Entry is self.focus_get():
self.ctrl_c()
self.ctrl_d()
elif self.Text is self.focus_get():
self.ctrl_c()
self.ctrl_d()
return 'break'
[docs] def ctrl_v(self, callback=False):
"""
Paste copied text.
Overwrite tkinter default keybind.
"""
# checking which text widget has focus
try:
if self.Entry is self.focus_get():
self.Entry.insert(tk.INSERT, self.Main.clipboard_get())
elif self.Text is self.focus_get():
self.Text.insert(tk.INSERT, self.Main.clipboard_get())
return 'break'
except tk.TclError:
tkMessageBox.showinfo('nn-search 2.0', 'Nothing to paste.')
return
[docs] def ctrl_z(self, callback=False):
"""
Undo the last modification in the text widget.
Overwrite tkinter default keybind.
"""
try:
# checking which text widget has focus
if self.Entry is self.focus_get():
self.Entry.edit_undo()
elif self.Text is self.focus_get():
self.Text.edit_undo()
return 'break'
except (tk.TclError, AttributeError):
tkMessageBox.showinfo('nn-search 2.0', 'Nothing to undo.')
return
[docs] def ctrl_u(self, callback=False):
"""
Undo the last modification in the text widget.
Overwrite tkinter default keybind.
"""
try:
# checking which text widget has focus
if self.Entry is self.focus_get():
self.Entry.edit_redo()
elif self.Text is self.focus_get():
self.Text.edit_redo()
return 'break'
except (tk.TclError, AttributeError):
tkMessageBox.showinfo('nn-search 2.0', 'Nothing to redo.')
return
[docs] def ctrl_r(self, callback=False):
"""Run Process if hit Ctrl+r"""
try:
# checking which text widget has focus
if self.Entry is self.focus_get():
self.process_command()
elif self.Text is self.focus_get():
self.process_command()
return 'break'
except (tk.TclError, AttributeError):
tkMessageBox.showinfo('nn-search 2.0', 'Can not run "Process".')
return
[docs] def ctrl_f(self, callback=False):
"""
Display text find window.
"""
if self.Text is self.focus_get():
self.show_find()
return 'break'
[docs] def insert_text(self, text, plain=False):
"""
Insert given text into the Text widget.
Args:
| *text* -- text to be inserted
| *plain* -- if True no preprocessing for paragraphs is done.
"""
para = text.split('\n\n')
for par in para:
self.Text.insert(tk.END, par)
self.Text.insert(tk.END, '\n')
[docs] def pos_tagger_load_file(self):
"""
Load a file specified by the user in pos-tagger tool.
"""
types = (("txt file", "*.txt"),
("Microsoft Word file", ("*.doc", "*.docx")),
("PDF file", "*.pdf"),
("All files", "*.*"))
self.pos_fpath = tkf.askopenfilename(filetypes=types)
try:
loaded_text = model.read_input_file(self.pos_fpath)
except TypeError: # when clicked Load and didn't choose any file
return
except (OSError, IOError):
msg = "Can not open the specified file!"
self.show_message(msg, 'warning.png')
return
self.pos_loaded_text = loaded_text
# lock input file button of pos-tagger
self.pos_indir_butt.config(state='disabled')
# unlock Process and Stop buttons
self.pos_run_butt.config(state='normal')
self.pos_stop_butt.config(state='normal')
self.pos_dir_path = ''
# update icon
self.pos_icon1 = itk.PhotoImage(file=self.img_path('set.png'))
self.pos_infile_labl.configure(image=self.pos_icon1, text='OK',
compound='left')
self.pos_infile_labl.grid(sticky='we', row=0, column=1)
[docs] def load_output_dir(self):
"""
Load a directory specified by the user.
"""
self.pos_out_dir_path = tkf.askdirectory()
# update icon
self.pos_icon3 = itk.PhotoImage(file=self.img_path('set.png'))
self.pos_outdir_labl.configure(image=self.pos_icon3, text='OK',
compound='left')
[docs] def check_pos_tagger_save_results(self):
"""
Checking if the thread is alive and informing the user.
"""
if self.pos_tagger_proc.is_alive():
self.after(100, self.check_pos_tagger_save_results)
elif self.pos_tagger_proc.exitcode != 0:
self.pos_run_butt.config(text='Process', image=self.pos_runic,
state='normal')
self.pos_butt.config(state='normal')
msg = 'POS-tagging terminated!'
self.show_message(msg, 'thunder.png')
else:
self.pos_tagger_proc.join()
self.pos_run_butt.config(text='Process', image=self.pos_runic,
state='normal')
self.pos_butt.config(state='normal')
msg = 'POS-tagging complete!\n' +\
'Check the results in the "output" directory\n' +\
'or the directory you specified.'
self.show_message(msg, 'pos_done.png')
[docs] def kill_pos_proc(self):
"""
Kill spawned pos-tagger process.
"""
self.pos_tagger_proc.terminate()
self.pos_run_butt.config(text='Process', image=self.pos_runic,
state='normal')
self.pos_butt.config(state='normal')
[docs] def pos_tagger_run(self):
"""
Run pos-tagger on the specified files.
"""
out_dir = self.pos_out_dir_path or os.path.join(RESDIR, 'output')
self.thunder = itk.PhotoImage(file=self.img_path('thunder.png'))
self.pos_run_butt.config(text='Working...', image=self.thunder,
state='disabled')
self.pos_butt.config(state='disabled')
if self.pos_loaded_text:
in_file_data = {self.pos_fpath: self.pos_loaded_text}
args = [in_file_data, None, out_dir]
# spawn a new process
self.pos_tagger_proc = mproc.Process(target=pos_tagger.main,
args=(args, True))
self.pos_tagger_proc.start()
self.after(100, self.check_pos_tagger_save_results)
elif self.pos_dir_path:
files = [os.path.join(self.pos_dir_path, fname) for fname
in os.listdir(self.pos_dir_path)]
in_dir_data = {}
for text_file in files:
fname = os.path.basename(text_file)
loaded_text = model.read_input_file(text_file)
in_dir_data[fname] = loaded_text
args = [None, in_dir_data, out_dir]
# spawn a new process
self.pos_tagger_proc = mproc.Process(target=pos_tagger.main,
args=(args, True))
self.pos_tagger_proc.start()
# lock and change Process button
self.after(100, self.check_pos_tagger_save_results)
[docs] def pos_tagger_win(self):
"""
Display a pos-tagger window.
Implement pos-tagger using TextBlob's averaged perceptron.
"""
self.tagger_win = tk.Toplevel()
self.tagger_win.title('POS-tagger')
# set custom window icon
set_win_icon(self.tagger_win, self.img_path('wand.png'))
# self.tagger_win.lift()
self.tagger_win.wm_attributes('-topmost', True)
self.tagger_win.resizable(0, 0)
pos_taggerFr = ttk.Frame(self.tagger_win, borderwidth=2,
relief='groove')
pos_taggerFr.grid(sticky='nsew')
# add a header
msg = 'Standalone POS-tagger'
ttk.Label(pos_taggerFr, font='TkDefaultFont 10 bold',
text=msg).grid(row=0)
pos_taggerFrInn0 = ttk.Frame(pos_taggerFr, borderwidth=2,
relief='groove')
pos_taggerFrInn0.grid(row=1, column=0, sticky='nsew')
self.ifile = itk.PhotoImage(file=self.img_path('input_file.png'))
self.pos_infile_butt = ttk.Button(pos_taggerFrInn0, padding=(2, 2),
compound='left',
image=self.ifile, text='Input file',
command=self.pos_tagger_load_file)
self.pos_infile_butt.grid(row=0, column=0, sticky='we', pady=2, padx=2)
# input file label
self.pos_icon1 = itk.PhotoImage(file=self.img_path('unset.png'))
self.pos_infile_labl = ttk.Label(pos_taggerFrInn0,
image=self.pos_icon1,
compound='left',
text='Not set',
font=TKFONT)
self.pos_infile_labl.grid(row=0, column=1, sticky='we')
# input file Button
self.idir = itk.PhotoImage(file=self.img_path('input_dir.png'))
self.pos_indir_butt = ttk.Button(pos_taggerFrInn0, padding=(2, 2),
compound='left',
image=self.idir,
text='Input directory',
command=self.load_input_dir)
self.pos_indir_butt.grid(row=2, column=0, sticky='we', padx=2)
# input dir label
self.pos_icon2 = itk.PhotoImage(file=self.img_path('unset.png'))
self.pos_indir_labl = ttk.Label(pos_taggerFrInn0,
image=self.pos_icon2,
compound='left',
text='Not set',
font=TKFONT)
self.pos_indir_labl.grid(row=2, column=1, sticky='we')
# output dir Button
self.odir = itk.PhotoImage(file=self.img_path('out_dir.png'))
self.pos_outdir_butt = ttk.Button(pos_taggerFrInn0, padding=(2, 2),
compound='left',
image=self.odir,
text='Output directory',
command=self.load_output_dir)
self.pos_outdir_butt.grid(row=3, column=0, sticky='we', pady=2, padx=2)
# output dir label
self.pos_icon3 = itk.PhotoImage(file=self.img_path('unset.png'))
self.pos_outdir_labl = ttk.Label(pos_taggerFrInn0,
image=self.pos_icon3,
compound='left',
text='Not set',
font=TKFONT)
self.pos_outdir_labl.grid(row=3, column=1, sticky='we')
# Process button
pos_taggerFrInn1 = ttk.Frame(pos_taggerFr, borderwidth=2,
relief='groove')
pos_taggerFrInn1.grid(row=2, column=0, sticky='nsew')
self.pos_runic = itk.PhotoImage(file=self.img_path('run_tagger.png'))
self.pos_run_butt = ttk.Button(pos_taggerFrInn1, padding=(3, 3),
compound='left',
image=self.pos_runic,
text='Process',
command=self.pos_tagger_run)
self.pos_run_butt.grid(row=0, column=0, sticky='we', pady=2)
self.pos_run_butt.config(state='disabled')
# Stop button
self.pos_stopic = itk.PhotoImage(file=self.img_path('stop_tagger.png'))
self.pos_stop_butt = ttk.Button(pos_taggerFrInn1, padding=(3, 3),
compound='left',
image=self.pos_stopic,
text='Stop',
command=self.kill_pos_proc)
self.pos_stop_butt.grid(row=0, column=1, sticky='we', pady=2)
self.pos_stop_butt.config(state='disabled')
# Close button
pos_taggerFrInn2 = ttk.Frame(pos_taggerFr, borderwidth=2, height=15,
relief='flat')
pos_taggerFrInn2.grid(row=3, column=0, sticky='nsew')
self.pos_butt = ttk.Button(pos_taggerFr, padding=(0, 0), text='Close',
command=self.tagger_win.destroy)
self.pos_butt.grid(sticky='ns')
self.centrify_widget(self.tagger_win)
[docs] def show_message(self, msg, icon, top=True):
"""
Show a warning window with a given message.
Args:
| *msg* (str) -- a message to display
| *icon* (str) -- icon name
| *top* (bool) -- make message window on top
"""
message = tk.Toplevel()
# set custom window icon
set_win_icon(message, self.img_path('info.png'))
message.title('')
if top:
message.wm_attributes('-topmost', 1)
message.resizable(0, 0)
warnFr0 = ttk.Frame(message, borderwidth=2, relief='groove')
warnFr0.grid(sticky='nsew')
warnFr1 = ttk.Frame(warnFr0, borderwidth=2, relief='flat')
warnFr1.grid(sticky='nsew')
ttk.Label(warnFr1, font='TkDefaultFont 11', anchor=tk.CENTER, text=msg).grid()
self.err_img = itk.PhotoImage(file=self.img_path(icon))
ttk.Label(warnFr1, image=self.err_img).grid()
ttk.Button(warnFr0, padding=(0, 2), text='OK',
command=message.destroy, takefocus=True).grid()
self.centrify_widget(message)
[docs] def save_data(self):
"""
Save Text widget contents
"""
types = (("txt file", "*.txt"),
("All files", "*.*"))
try:
opened_file = tkf.asksaveasfile(mode='w', filetypes=types)
except (IOError, OSError):
msg = 'Can not open the specified file!'
self.show_message(msg, 'error.png')
return
try:
text = self.Text.get('1.0', tk.END).encode('utf-8')
opened_file.write(text)
except AttributeError:
return
except (IOError, OSError):
msg = 'Can not write the specified file!\n' + \
'Make sure there is enough free space on disk'
self.show_message(msg, 'error.png')
return
[docs] def load_data(self):
"""
Open a file dialog window.
Load a file. Handle file loading errors accordingly.
Invoke data preprocessing and pos-tagging functions.
<I think it is better to do main text processing together with the
file loading operation. This reduces query search response time.>
Returns:
| *loaded_text* (str) -- preprocessed text
| if IOError, OSError return None
"""
types = (("txt file", "*.txt"),
("Microsoft Word file", ("*.doc", "*.docx")),
("PDF file", "*.pdf"),
("All files", "*.*"))
fpath = tkf.askopenfilename(filetypes=types)
try:
# limit the file size to 10 mb
fsize = os.path.getsize(fpath) / (1024 * 1024)
if fsize > 10:
self.show_message("The file is too big!", 'warning.png')
return None
self.loaded_text = model.read_input_file(fpath)
fname = os.path.basename(fpath)
if len(fname) > 20:
fname = fname[:17] + '...'
except TypeError: # when clicked Load and didn't choose any file
return
except (OSError, IOError):
msg = "Can not open the specified file!"
self.show_message(msg, 'warning.png')
return
# update the file stats
self.current_fname = os.path.splitext(fname)[0]
self.set_file_loaded(True)
self.stats0.config(text="Name: {0}".format(fname))
self.stats1.config(text="Size: {0}kb".format(round(fsize * 1024, 1)))
# insert read text into Text widget
self.insert_text(self.loaded_text)
# reset text statistics after we loaded a new file
self.set_stats_ready(False)
self.set_graphs_ready(False)
self.Text.edit_modified(False)
[docs] def check_process_thread_save_results(self):
"""
Check every 10ms if model thread is alive.
Destroy progress bar when model thread finishes.
Unlock UI widgets.
"""
if self.process_thread.is_alive():
self.after(50, self.check_process_thread_save_results)
else:
self.process_thread.join()
# get the results of model processing
self.process_results = self.model_queue.get()
self.fully_tagged_sents = self.process_results[1]
self.progress_bar.stop()
self.prog_win.destroy()
self.lock_ui(False)
[docs] def run_progressbar(self):
"""
Run progress bar.
"""
self.prog_win = tk.Toplevel()
# set custom window icon
set_win_icon(self.prog_win, self.img_path('proc.png'))
self.prog_win.wm_attributes('-topmost', 1) # keep window topmost
self.prog_win.title('Processing')
self.prog_win.resizable(0, 0)
self.progFr = ttk.Frame(self.prog_win, borderwidth=2, relief='flat')
self.progFr.grid(sticky='nsew')
msg = "Exercise some patience..."
ttk.Label(self.progFr, font=TKFONT, text=msg).grid()
self.prog_img = itk.PhotoImage(file=self.img_path('cup.png'))
ttk.Label(self.progFr, image=self.prog_img).grid()
self.progress_bar = ttk.Progressbar(self.progFr, orient=tk.HORIZONTAL,
length=200,
mode='indeterminate',
takefocus=True)
self.progress_bar.grid()
self.centrify_widget(self.prog_win)
self.progress_bar.start()
[docs] def process_command(self):
"""
Start the indeterminate progress bar.
Lock UI widgets.
Process text loaded into Text widget.
<Some UI widgets are connected to this function. The purpose of this
function is to display a progress bar while running model functions in
a separate thread>.
"""
# if Text modified, update Name and Size
if self.Text.edit_modified() or self.is_file_loaded:
# update the file stats
loaded_text = self.Text.get("1.0", 'end-1c')
self.stats0.config(text="Name: {0}".format('Text field'))
acc_size = round(len(loaded_text) / 1024, 1)
self.stats1.config(text="Size: {0}kb".format(acc_size))
# reset Text precaching
else:
self.show_message('No text data provided!', 'error.png')
self.set_processed(False)
return
self.lock_ui(True)
self.run_progressbar()
# now handle the Process button command
self.process_thread = thr.Thread(target=model.process_text,
args=(self.model_queue, loaded_text))
self.process_thread.start()
# check if model_thread finished
self.after(50, self.check_process_thread_save_results)
self.set_processed(True)
self.set_stats_ready(False)
self.set_graphs_ready(False)
self.set_search_stats_ready(False)
[docs] def check_nums_thread_save_results(self):
"""
Check every 10ms if model thread is alive.
While displaying waiting label in Toplevel.
Unlock UI widgets.
"""
self.lock_toplevel(self.stats_win_butt, True)
self.stats_butt1.config(state='disabled')
if self.stats_thread.is_alive():
self.after(10, self.check_nums_thread_save_results)
else:
self.stats_thread.join()
# get the results of model processing
self.textstats = self.model_queue.get()
# centering window position
self.stats_win.title("Statistics")
self.stats_win.resizable(0, 0)
# update the information to calculated stats
stats_text = self.num_rlabl.format(self.textstats.get('tokens'),
self.textstats.get('words'),
self.textstats.get('sents'),
self.textstats.get('diversity'),
self.textstats.get('subj'),
self.textstats.get('polar'),
self.textstats.get('corr'))
self.update_idletasks()
self.stats_win.geometry("")
self.set_stats_ready(True)
self.lock_toplevel(self.stats_win_butt, False)
self.stats_butt1.config(state='normal')
self.rtext.config(text=stats_text)
[docs] def show_nums_win(self):
"""
Create a new TopLevel window.
Calculate text stats and insert them as Label widgets.
Add "Close" button. Check if we already calculated stats, if yes then
reuse instead of recalculating.
<Numbers stats calculation is done in a separate Thread in order to
leave UI responsive. This, however, makes the code that handles
Numbers pop-up window look ugly and confusing.
show_nums_win() invokes self.check_nums_thread_save_results() which
checks whenever Thread is done, updates the Numbers pop-up window>
"""
if self.processed and not self.stats_ready:
self.model_queue = Queue.PriorityQueue()
# now handle the Process button command
self.stats_thread = thr.Thread(target=model.get_stats,
args=(self.model_queue,
self.process_results[0]))
self.stats_thread.start()
stats = ('tokens', 'words', 'sents', 'diversity', 'subj', 'polar',
'corr')
self.textstats = dict((stat, 'Wait...') for stat in stats)
# check if model_thread finished
self.after(10, self.check_nums_thread_save_results)
elif not self.processed and (self.is_file_loaded or
self.Text.edit_modified()):
self.show_message('Please click "Process!" button',
'warning.png')
return
elif not self.processed and not (self.is_file_loaded or
self.Text.edit_modified()):
self.show_message('No text data provided!', 'error.png')
return
# update the information to calculated stats
stats_text = self.num_rlabl.format(self.textstats.get('tokens'),
self.textstats.get('words'),
self.textstats.get('sents'),
self.textstats.get('diversity'),
self.textstats.get('subj'),
self.textstats.get('polar'),
self.textstats.get('corr'))
# create a pop-up window, use self instances, we need to update them
self.stats_win = tk.Toplevel()
# set custom window icon
set_win_icon(self.stats_win, self.img_path('stats.png'))
# centering window position
self.stats_win.title("Statistics")
self.stats_win.resizable(0, 0)
self.statsFr = ttk.Frame(self.stats_win, borderwidth=2,
relief='groove')
self.statsFr.grid()
self.statsFrInn1 = ttk.Frame(self.statsFr, borderwidth=2,
relief='groove')
self.statsFrInn1.grid(row=0, column=0, sticky='ns')
self.statsFrInn2 = ttk.Frame(self.statsFr, borderwidth=2,
relief='groove')
self.statsFrInn2.grid(row=0, column=1, sticky='ns')
num_llabl = 'Tokens count:\nWords count:\nSentences count: \n' +\
'-------------------------------\n' +\
'Lexical diversity [0,1]:\nSubjectivity [0,1]: \n' +\
'Polarity [-1,1]: \nCorrectness [0,1]: \n'
self.ltext = ttk.Label(self.statsFrInn1, font=TKFONT,
text=num_llabl)
self.ltext.grid()
self.rtext = ttk.Label(self.statsFrInn2, font='TkDefaultFont 10 bold',
text=stats_text)
self.rtext.grid()
self.stats_win_butt = ttk.Button(self.statsFr, text='Close',
padding=(0, 0),
command=self.stats_win.destroy)
self.stats_win_butt.grid(sticky='ns')
self.centrify_widget(self.stats_win)
def _tags_help(self, postags):
"""
Show a pop-up window with POS-tag descriptions.
Args:
*postags* (list) -- POS-tag descriptions
"""
ids, tags, desc = postags
header = ids[0], tags[0], desc[0]
penn_win = tk.Toplevel()
# set custom window icon
set_win_icon(penn_win, self.img_path('info.png'))
penn_win.title('Penn Treebank POS-tags')
penn_win.resizable(0, 0)
# creating Frames for headers
pennFr = ttk.Frame(penn_win, borderwidth=2, relief='groove')
pennFr.grid(sticky='nsew')
pennFrInnHead0 = ttk.Frame(pennFr, borderwidth=2, relief='flat')
pennFrInnHead0.grid(row=0, column=0)
pennFrInnHead1 = ttk.Frame(pennFr, borderwidth=2, relief='flat')
pennFrInnHead1.grid(row=0, column=1)
pennFrInnHead2 = ttk.Frame(pennFr, borderwidth=2, relief='flat')
pennFrInnHead2.grid(row=0, column=2)
# creating Frames for ids, tags and desc
pennFrInn0 = ttk.Frame(pennFr, borderwidth=2, relief='groove')
pennFrInn0.grid(row=1, column=0)
pennFrInn1 = ttk.Frame(pennFr, borderwidth=2, relief='groove')
pennFrInn1.grid(row=1, column=1)
pennFrInn2 = ttk.Frame(pennFr, borderwidth=2, relief='groove')
pennFrInn2.grid(row=1, column=2)
# inserting Labels
ttk.Label(pennFrInnHead0, font='TkDefaultFont 10 bold',
text=header[0]).grid()
ttk.Label(pennFrInnHead1, font='TkDefaultFont 10 bold',
text=header[1]).grid()
ttk.Label(pennFrInnHead2, font='TkDefaultFont 10 bold',
text=header[2]).grid()
ttk.Label(pennFrInn0, font=TKFONT,
text='\n'.join(ids[1:])).grid()
ttk.Label(pennFrInn1, font='TkDefaultFont 10 bold',
text='\n'.join(tags[1:])).grid()
ttk.Label(pennFrInn2, font=TKFONT,
text='\n'.join(desc[1:])).grid()
# create Close button
penn_butt = ttk.Button(pennFr, padding=(0, 0), text='Close',
command=penn_win.destroy)
penn_butt.grid(column=2)
self.centrify_widget(penn_win)
[docs] def check_graphs_thread_save_results(self):
"""
Check every 10ms if model thread is alive.
While displaying waiting label in Toplevel.
Unlock UI widgets.
"""
if self.graphs_thread.is_alive():
self.after(10, self.check_graphs_thread_save_results)
else:
self.graphs_thread.join()
# get the results of model processing
self.srt_tags = self.model_queue.get()
self.ngrams = self.model_queue.get()
self.set_graphs_ready(True)
# remove "Wait" message
self.waitFr.destroy()
self.stats_butt2.config(state='normal')
self.finish_graphs_window()
[docs] def finish_graphs_window(self):
"""
Finish building Graphs window when the results are ready.
<Plotting, word/ngrams calculation etc. takes time. We first show
'Wait...' Toplevel window and then fill it with the elements>.
"""
# create Frames for Toplevel window
graphFr = ttk.Frame(self.graphs_win, borderwidth=2, relief='groove')
graphFr.grid(row=0, column=0, sticky='nsew')
graphFrInn = ttk.Frame(graphFr, borderwidth=2, relief='groove')
graphFrInn.grid(row=1, column=0, sticky='nsew')
graphFr0 = ttk.Frame(graphFrInn, borderwidth=2, relief='groove')
graphFr0.grid(row=0, column=0, sticky='nsew')
graphFr0Inn = ttk.Frame(graphFr0, borderwidth=2, relief='groove')
graphFr0Inn.grid(row=1, column=0, sticky='nsew')
graphFr1 = ttk.Frame(graphFrInn, borderwidth=2, relief='groove')
graphFr1.grid(row=0, column=1, sticky='nsew')
# graphFr2 will contain two inner Frames for pie charts and ngram cnts
graphFr2 = ttk.Frame(graphFrInn, borderwidth=2, relief='groove')
graphFr2.grid(row=0, column=2, sticky='nsew')
graphFrInn0 = ttk.Frame(graphFr2, borderwidth=2, relief='groove')
graphFrInn0.grid(row=0, column=0, sticky='nsew')
graphFrInn1 = ttk.Frame(graphFr2, borderwidth=2, relief='groove')
graphFrInn1.grid(row=1, column=0, sticky='nsew')
ngramsFr = ttk.Frame(graphFrInn0, borderwidth=2, relief='groove')
ngramsFr.grid(row=1, column=0, sticky="nsew")
# add buttons
self.pos_img = itk.PhotoImage(file=self.img_path('info.png'))
tag_help = ttk.Button(graphFr0, padding=(0, 0),
text='POS-tags help', image=self.pos_img,
compound='left', command=self.pos_tags_short)
tag_help.grid(row=0, sticky='we')
# add Close button
close_butt = ttk.Button(graphFr, padding=(0, 0), text='Close',
command=self.graphs_win.destroy)
close_butt.grid(sticky='ns')
# extract POS-tags, occurences, calculate ratio
self.tgs = '\n'.join([k for k in self.srt_tags])
self.tgs_cnts = '\n'.join([str(v) for v in self.srt_tags.values()])
total_cnt = sum([v for v in self.srt_tags.values()])
self.ratios = '\n'.join([str(round(v/total_cnt*100, 1)) + '%' for v
in self.srt_tags.values()])
# create two inner Frames, one for POS-tags, another for counts
graphFr0Inn0 = ttk.Frame(graphFr0Inn, borderwidth=2, relief='groove')
graphFr0Inn0.grid(row=0, column=0, sticky='nsew')
graphFr0Inn1 = ttk.Frame(graphFr0Inn, borderwidth=2, relief='groove')
graphFr0Inn1.grid(row=0, column=1, sticky='nsew')
graphFr0Inn2 = ttk.Frame(graphFr0Inn, borderwidth=2, relief='groove')
graphFr0Inn2.grid(row=0, column=2, sticky='nsew')
# insert POS-tags, counts and ratios
ttk.Label(graphFr0Inn0, font=TKFONT, text=self.tgs).grid()
ttk.Label(graphFr0Inn1, font='TkDefaultFont 10 bold',
text=self.tgs_cnts).grid()
ttk.Label(graphFr0Inn2, font=TKFONT,
text=self.ratios).grid()
# insert POS-tags plot
plot1_path = os.path.join(RESDIR, '_graphs', self.current_fname+'.png')
image = itk.Image.open(plot1_path)
image = image.resize((550, 500), itk.Image.ANTIALIAS)
self.plot1 = itk.PhotoImage(image)
# self.plot1 = itk.PhotoImage(file=plot_path)
plot1Label = ttk.Label(graphFr1, image=self.plot1)
plot1Label.grid(row=0, column=0, sticky="nsew")
# insert functional/content words pie chart
pie_path = os.path.join(RESDIR, '_graphs', self.current_fname+'_pie.png')
pie_image = itk.Image.open(pie_path)
pie_image = pie_image.resize((400, 300), itk.Image.ANTIALIAS)
self.pie_plot = itk.PhotoImage(pie_image)
pie_header = "Functional vs content words ratio"
ttk.Label(graphFrInn1, font='TkDefaultFont 10 bold',
text=pie_header).grid(row=0)
plot2Label = ttk.Label(graphFrInn1, image=self.pie_plot)
plot2Label.grid(row=1, column=0, sticky="nsew")
# insert ngrams stats
top5, top5_cnts = zip(*[['"' + r[0] + '"', str(r[1])]
for r in self.ngrams[0]])
top5 = '\n'.join(top5)
top5_cnts = '\n'.join(top5_cnts)
ngram2, ngram2_cnts = zip(*[['"' + ' '.join(r[0]) + '"', str(r[1])]
for r in self.ngrams[1]])
ngram3, ngram3_cnts = zip(*[['"' + ' '.join(r[0]) + '"', str(r[1])]
for r in self.ngrams[2]])
ngram2 = '\n'.join(ngram2)
ngram2_cnts = '\n'.join(ngram2_cnts)
ngram3 = '\n'.join(ngram3)
ngram3_cnts = '\n'.join(ngram3_cnts)
# create inner Frames for ngrams header
headerFr = ttk.Frame(graphFrInn0, borderwidth=0, relief='flat')
headerFr.grid(row=0, sticky='we')
head_msg = "Top 10 words, 2-grams, 3-grams and their counts"
ttk.Label(headerFr, font='TkDefaultFont 10 bold',
text=head_msg).grid(row=0, column=1)
# for top5 ngrams
ngramFrInn0 = ttk.Frame(ngramsFr, borderwidth=2, relief='groove')
ngramFrInn0.grid(row=1, column=0, sticky='nsew')
ngramFrInn1 = ttk.Frame(ngramsFr, borderwidth=2, relief='groove')
ngramFrInn1.grid(row=1, column=1, sticky='nsew')
# for ngram2
ngramFrInn2 = ttk.Frame(ngramsFr, borderwidth=2, relief='groove')
ngramFrInn2.grid(row=1, column=2, sticky='nsew')
ngramFrInn3 = ttk.Frame(ngramsFr, borderwidth=2, relief='groove')
ngramFrInn3.grid(row=1, column=3, sticky='nsew')
# for ngram3
ngramFrInn4 = ttk.Frame(ngramsFr, borderwidth=2, relief='groove')
ngramFrInn4.grid(row=1, column=4, sticky='nsew')
ngramFrInn5 = ttk.Frame(ngramsFr, borderwidth=2, relief='groove')
ngramFrInn5.grid(row=1, column=5, sticky='nsew')
# inserting ngram counts
ttk.Label(ngramFrInn0, font=TKFONT, text=top5).grid()
ttk.Label(ngramFrInn1, font='TkDefaultFont 10 bold',
text=top5_cnts).grid()
ttk.Label(ngramFrInn2, font=TKFONT, text=ngram2).grid()
ttk.Label(ngramFrInn3, font='TkDefaultFont 10 bold',
text=ngram2_cnts).grid()
ttk.Label(ngramFrInn4, font=TKFONT, text=ngram3).grid()
ttk.Label(ngramFrInn5, font='TkDefaultFont 10 bold',
text=ngram3_cnts).grid()
# update and reset window size, tkinter will adjust
self.graphs_win.update_idletasks()
self.graphs_win.geometry('')
self.graphs_win.minsize(120, 300) # FIX: limit max size
self.centrify_widget(self.graphs_win)
self.graphs_win.update()
# limit max size using current
x, y = self.graphs_win.winfo_geometry().split('+')[0].split('x')
self.graphs_win.maxsize(int(x), int(y))
[docs] def mk_graphs_win(self):
"""
Check if graphs have already been calculated.
Create necessary UI elements that will contain the plots and stats.
Start a separate thread to create plots and calculate word/ngram
counts.
"""
# create a Toplevel first, we will update it later
self.graphs_win = tk.Toplevel()
# set custom window icon
set_win_icon(self.graphs_win, self.img_path('stats2.png'))
self.graphs_win.title('Graphs')
if self.processed and not self.graphs_ready:
self.stats_butt2.config(state='disabled')
self.waitFr = ttk.Frame(self.graphs_win, borderwidth=2,
relief='groove')
self.waitFr.grid(sticky='nsew')
ttk.Label(self.waitFr,
font='TkDefaultFont 12',
text='Wait... Creating plots...').grid()
self.wait_img = itk.PhotoImage(file=self.img_path('cup.png'))
ttk.Label(self.waitFr, image=self.wait_img).grid()
self.centrify_widget(self.graphs_win)
self.model_queue = Queue.PriorityQueue()
tags_dic = Counter((tup[1] for tup
in self.process_results[0].tags))
# now handle the Process button command
self.graphs_thread = thr.Thread(target=model.get_graphs_data,
args=(self.model_queue, tags_dic,
self.current_fname,
self.process_results))
self.graphs_thread.start()
# check if model_thread finished
self.after(100, self.check_graphs_thread_save_results)
elif not self.processed and (self.is_file_loaded
or self.Text.edit_modified()):
self.graphs_win.destroy()
self.show_message('Please click "Process!" button',
'warning.png')
return
elif not self.processed and not (self.is_file_loaded
or self.Text.edit_modified()):
self.graphs_win.destroy()
self.show_message('No text data provided!', 'error.png')
return
else:
# self.graphs_win.resizable(0, 0)
self.graphs_win.minsize(120, 300)
self.finish_graphs_window()
[docs] def check_search_stats_thread_save_results(self):
"""
Check every 10ms if model thread is alive.
While displaying waiting label in Toplevel.
Unlock UI widgets.
"""
self.lock_toplevel(self.sstats_win_butt, True)
self.stats_butt2.config(state='disabled')
if self.sstats_thread.is_alive():
self.after(10, self.check_search_stats_thread_save_results)
else:
self.sstats_thread.join()
# get the results of model processing
self.sstats = self.model_queue.get()
# centering window position
# update the information to calculated stats
tmatched = self.sstats.get('Tokens matched')
mlength = self.sstats.get('Matched length')
mlratio = self.sstats.get('Matched length ratio')
sstats_text = self.ss_rlabl.format(tmatched, mlength, mlratio)
self.update_idletasks()
self.sstats_win.geometry("")
self.set_search_stats_ready(True)
self.lock_toplevel(self.sstats_win_butt, False)
self.stats_butt2.config(state='normal')
self.ss_rtext.config(text=sstats_text)
[docs] def show_search_stats_win(self):
"""
Show a window with statistics for a query search.
Search stats:
number of matched terms
length of all matched strings
% of matched data to all search corpus
"""
if not self.matches:
msg = 'Please provide a search query!'
self.show_message(msg, 'warning.png')
return
# handle exceptions
if not self.sstats_ready:
self.model_queue = Queue.PriorityQueue()
# now handle the Process button command
text = self.Text.get('1.0', tk.END)
self.sstats_thread = thr.Thread(target=model.get_search_stats,
args=(self.model_queue,
self.matches, text))
self.sstats_thread.start()
sstats = ('Tokens matched', 'Matched length',
'Matched length ratio')
self.sstats = dict((stat, 'Wait...') for stat in sstats)
# check if model_thread finished
self.after(10, self.check_search_stats_thread_save_results)
elif not self.processed and (self.Text.edit_modified() or
self.is_file_loaded):
self.show_message('Please click "Process!" button', 'warning.png')
return
elif not self.processed and (not self.Text.edit_modified() and
not self.is_file_loaded):
self.show_message('No text data provided!', 'error.png')
return
ss_text = self.ss_rlabl.format(self.sstats.get('Tokens matched'),
self.sstats.get('Matched length'),
self.sstats.get('Matched length ratio'))
# build a Toplevel window
self.sstats_win = tk.Toplevel()
# set custom window icon
set_win_icon(self.sstats_win, self.img_path('stats3.png'))
# centering window position
self.sstats_win.resizable(0, 0)
self.sstats_win.title("Search statistics")
self.sstatsFr = ttk.Frame(self.sstats_win, borderwidth=2,
relief='groove')
self.sstatsFr.grid()
self.sstatsFrInn1 = ttk.Frame(self.sstatsFr, borderwidth=2,
relief='groove')
self.sstatsFrInn1.grid(row=0, column=0, sticky='ns')
self.sstatsFrInn2 = ttk.Frame(self.sstatsFr, borderwidth=2,
relief='groove')
self.sstatsFrInn2.grid(row=0, column=1, sticky='ns')
ss_llabl = 'Tokens matched:\nMatched length:\n' +\
'Matched length / full text [0,1]:\n'
self.ss_ltext = ttk.Label(self.sstatsFrInn1, font=TKFONT,
text=ss_llabl)
self.ss_ltext.grid()
self.ss_rtext = ttk.Label(self.sstatsFrInn2,
font='TkDefaultFont 10 bold',
text=ss_text)
self.ss_rtext.grid()
self.sstats_win_butt = ttk.Button(self.sstatsFr, text='Close',
padding=(0, 0),
command=self.sstats_win.destroy)
self.sstats_win_butt.grid(sticky='ns')
self.centrify_widget(self.sstats_win)
[docs] def highlight_find(self):
"""Turn on highlighting for found strings."""
self.Text.tag_configure('find', foreground='#000000',
background="#FFD417",
font='TkDefaultFont 10')
[docs] def find_query(self):
"""
Find the search query. Highlight and auto scroll to the matched string.
"""
# set cache for keeping the indeces of matched strings
self.prev_found_cache = []
# index to access self.prev_found_cache
self.last = -1
self.Text.tag_delete('find', '1.0', tk.END)
find_query = self.findEnt.get().strip()
# get current mouse cursor position in text field
start = self.Text.index(tk.INSERT)
self.ffound = self.Text.search(find_query, start, stopindex=tk.END,
regexp=True)
# in case a user put mouse cursor somewhere in the end
if not self.ffound:
start = '1.0'
self.ffound = self.Text.search(find_query, start, stopindex=tk.END,
regexp=True)
if self.ffound:
end_mark = '%s+%dc' % (self.ffound, len(find_query))
self.Text.tag_add('find', self.ffound, end_mark)
self.highlight_find()
# auto scroll to found string
self.Text.see(end_mark)
# self.Text.focus_set()
self.prev_found_cache.append((self.ffound, end_mark))
else:
msg = "Nothing found!"
self.show_message(msg, 'info.png', True)
[docs] def find_next(self, dummy_arg=''):
"""Find next matching string if exists."""
if not self.ffound:
self.find_query()
return
if not self.prev_found_cache:
return
self.Text.tag_delete('find', '1.0', tk.END)
find_query = self.findEnt.get().strip()
start_mark, end_mark = self.prev_found_cache[self.last]
# self.ffound = re.search(find_query, text[int(col):])
self.ffound = self.Text.search(find_query, end_mark, stopindex=tk.END,
regexp=True)
if self.ffound:
end_mark = '%s+%dc' % (self.ffound, len(find_query))
self.Text.tag_add('find', self.ffound, end_mark)
self.highlight_find()
# auto scroll to found string
self.Text.see(end_mark)
# self.Text.focus_set()
self.prev_found_cache.append((self.ffound, end_mark))
else:
msg = " Nothing found! "
self.show_message(msg, 'info.png', True)
[docs] def find_prev(self):
"""Find previous matching string if exists."""
if not self.prev_found_cache:
return
self.Text.tag_delete('find', '1.0', tk.END)
if abs(self.last - 1) <= len(self.prev_found_cache):
self.last -= 1
start_found, prev_found = self.prev_found_cache[self.last]
if self.ffound:
self.Text.tag_add('find', start_found, prev_found)
self.highlight_find()
# auto scroll to found string
self.Text.see(prev_found)
# self.Text.focus_set()
else:
msg = " Nothing found! "
self.show_message(msg, 'info.png', True)
[docs] def show_find(self):
"""
Display a simple text search toplevel window.
"""
# create a Toplevel first, we will update it later
find_win = tk.Toplevel()
find_win.wm_attributes('-topmost', True)
find_win.resizable(0, 0)
# bind <Return>
find_win.bind('<Return>', self.find_next, '+')
# set custom window icon
set_win_icon(find_win, self.img_path('nn-search.png'))
find_win.title('')
findEntFr = ttk.Frame(find_win, borderwidth=2, relief='groove')
findEntFr.grid(row=0, sticky='nsew')
findFr = ttk.Frame(find_win, borderwidth=2, relief='groove')
findFr.grid(row=1, sticky='nsew')
self.findEnt = ttk.Entry(findEntFr, font='TkDefaultFont 11', width=30)
self.findEnt.grid()
self.findEnt.focus_set()
close_butt = ttk.Button(findFr, padding=(-10, 0),
text='Close', command=find_win.destroy)
close_butt.grid(row=1, column=0, sticky='w', padx=1, pady=1)
self.prev = itk.PhotoImage(file=self.img_path('find_prev.png'))
prev_butt = ttk.Button(findFr, padding=(-10, 0),
text='Previous', image=self.prev,
compound='left', command=self.find_prev)
prev_butt.grid(row=1, column=1, sticky='nwe', padx=1, pady=1)
self.next = itk.PhotoImage(file=self.img_path('find_next.png'))
next_butt = ttk.Button(findFr, padding=(-10, 0),
text='Next', image=self.next,
compound='left', command=self.find_next)
next_butt.grid(row=1, column=2, sticky='nwe', padx=1, pady=1)
find_butt = ttk.Button(findFr, padding=(0, 0),
text='Find', image=self.find,
compound='left', command=self.find_query)
find_butt.grid(row=1, column=3, sticky='nwe', padx=1, pady=1)
self.centrify_widget(find_win)
[docs] def prepare_view1(self):
"""
Prepare text for various text views.
<Just a separate method that formats text accrodingly for each view.>
"""
# prepare for view1
self.view1_text = model.normalize_text(self.process_results[0].raw)
# prepare for view1 with POS-tags
view1_text_pos = ''
for key, values in self.process_results[1].items():
text = ['_'.join([value[0], value[1]]) for value in values]
view1_text_pos = ' '.join([view1_text_pos, ' '.join(text)])
self.view1_text_pos = view1_text_pos.lstrip(' ')
[docs] def prepare_view2(self, matched):
"""
Prepare text for various text views.
<Just a separate method that formats text accrodingly for each view.>
Args:
| *matched* -- dict of matched tokens
"""
# show a warning because view2 depends on matched terms
if not matched:
msg = 'Please provide a search query!'
self.show_message(msg, 'warning.png', True)
return
# prepare for view2
# first see which sent has query matches and include only those
matched_ids = [sent_id for sent_id in matched
for tokens in matched[sent_id] if tokens]
view2_text = ''
i = 0
for sent_id, sent_lst in self.process_results[1].items():
if sent_id not in matched_ids:
continue
i += 1
sent = ' '.join([token[0] for token in sent_lst])
text = ': '.join([str(i), sent])
view2_text = '\n\n'.join([view2_text, text])
self.view2_text = view2_text.lstrip('\n\n') # remove first \n\n
# prepare for view2 with POS-tags
view2_text = ''
for sent_id, sent_lst in self.process_results[1].items():
if sent_id not in matched_ids:
continue
sent = ' '.join(['_'.join([token[0], token[1]])
for token in sent_lst])
text = ': '.join([str(sent_id), sent])
view2_text = '\n\n'.join([view2_text, text])
self.view2_text_pos = view2_text.lstrip('\n\n')
[docs] def prepare_view3(self, matched):
"""
Prepare text for various text views.
<Just a separate method that formats text accrodingly for each view.>
Args:
| *matched* -- Ordereddict of matched tokens
"""
# show a warning because view2 depends on matched terms
if not matched:
msg = 'Please provide a search query!'
self.show_message(msg, 'warning.png', True)
return
# prepare text for view3, plain and with pos-tags
cnt = 0
view3_plain = []
view3_pos = []
for sent_lst in matched.values():
for tokens in sent_lst:
cnt += 1
# plain
view3_plain.append(': '.join([str(cnt),
' '.join([token[0] for token
in tokens])]))
# pos-tags included
view3_pos.append(': '.join([str(cnt),
' '.join(['_'.join([token[0],
token[1]])
for token in tokens])]))
self.view3_text = '\n'.join(view3_plain)
self.view3_text_pos = '\n'.join(view3_pos)
[docs] def highlighter(self, matched):
"""
Reload Text field view.
Invoke marker, which finds matched string occurrences and returns
indeces for Tkinter tags.
Highlight strings tagged by Tkinter according to view type.
Args:
*matched* -- dict of matched results
"""
pos_tags, text_view = self.get_opts()
# if no matched results and view is 2 or 3, don't do anything
if not matched and text_view != 1:
return
# reset highighting
self.Text.tag_delete('style')
# reload text
self.Text.delete('1.0', 'end') # remove text
if text_view == 1:
if pos_tags:
self.insert_text(self.view1_text_pos)
else:
self.insert_text(self.view1_text)
if matched:
# find matched token indeces for Tkinter to tag
self.marker(matched, pos_tags)
# highlight
self.Text.tag_configure('style', foreground='#000000',
background='#C0FA82')
elif text_view == 2:
if pos_tags:
self.Text.insert('1.0', self.view2_text_pos)
else:
self.Text.insert('1.0', self.view2_text)
if matched:
# find matched token indeces for Tkinter to tag
self.marker(matched, pos_tags)
# highlight
self.Text.tag_configure('style', foreground='#000000',
background="#BCFC77",
font='TkDefaultFont 10 bold')
elif text_view == 3:
if pos_tags:
self.Text.insert('1.0', self.view3_text_pos)
else:
self.Text.insert('1.0', self.view3_text)
if matched:
# find matched tokens indeces for Tkinter to tag
self.marker(matched, pos_tags)
# highlight
self.Text.tag_configure('style', font='TkDefaultFont 11 bold')
[docs] def marker(self, matched, pos):
"""
Find all matches occurences in the text and return their start and end
indeces converted for Tkinter.
Args:
| *matched* -- dict of matched results
| *pos* -- True if add POS-tags
"""
# The house is black and the house is green. There is a house in the city.
sents_matches = [toks for sent_lst in matched.values()
for toks in sent_lst]
# use python re to get the match indeces instead of Text.search
text = self.Text.get('1.0', tk.END).encode('utf-8')
sents = [line for line in text.split('\n')]
matches = []
for row, sent in enumerate(sents, 1):
start = 0
for tokens in sents_matches:
if not tokens:
continue
if not pos:
matched_str = ' '.join([tok[0] for tok in tokens])
else:
matched_str = ' '.join(['_'.join([tok[0], tok[1]])
for tok in tokens])
token = handle_punct(matched_str)
isfound = re.search(token, sent[start:])
if isfound:
matched = fnode(row, isfound.start() + start,
isfound.end() + start)
matches.append(matched)
start += isfound.end()
for start_mark, end_mark in matches:
self.Text.tag_add('style', start_mark, end_mark)
[docs] def show_about(self):
"""
Display About window
"""
about = ['nn-search v.2.0.0',
'Built with nltk, TextBlob and matplotlib',
'tastyminerals@gmail.com ']
about_win = tk.Toplevel()
about_win.title('About')
# set custom window icon
set_win_icon(about_win, self.img_path('info.png'))
about_win.resizable(0, 0)
# creating Frames for headers
aboutFr = ttk.Frame(about_win, borderwidth=2, relief='groove')
aboutFr.grid(sticky='nsew')
aboutFrInn0 = ttk.Frame(aboutFr, borderwidth=2, relief='groove')
aboutFrInn0.grid(row=0, column=0)
# inserting Labels
ttk.Label(aboutFrInn0, font='TkDefaultFont 10 bold',
text=about[0]).grid()
self.nn_icon = itk.PhotoImage(file=self.img_path('nn-search.png'))
ttk.Label(aboutFrInn0, image=self.nn_icon).grid()
ttk.Label(aboutFrInn0, font=TKFONT,
text=about[1]).grid(sticky='we')
email = about[2]
email_str = tk.StringVar()
email_str.set(email)
contact = tk.Entry(aboutFrInn0, state='readonly', relief='flat',
fg='#0000FF', width=25, textvariable=email_str)
contact.grid()
about_butt = ttk.Button(aboutFr, padding=(0, 0), text='Close',
command=about_win.destroy)
about_butt.grid()
self.centrify_widget(about_win)
def _open_html(self):
"""Open docs index.html with the default browser."""
index = os.path.join(os.path.dirname(os.path.realpath(__file__)),
'docs', 'html', 'index.html')
webbrowser.open('file://' + index)
def _build_gui(self):
"""
Create user interface including all necessary components like Frames,
Buttons, Labels etc.
"""
def put_resizable(elem, row, col, colspan, rowspan, stick):
"""
Place the element and make it resizable.
Args:
| *elem* (ttk Object) -- ttk Object instance
| *row* (int) -- row on which the element is placed
| *col* (int) -- col on which the elemnt is placed
| *colspan* (int) -- how many columns are allowed to span
| *rowspan* (int) -- how many rows are allowed to span
| *stick* (str) -- element alignment within the cell
"""
elem.grid(row=row, column=col, rowspan=rowspan, columnspan=colspan,
sticky=stick)
elem.grid_columnconfigure(col, weight=1)
elem.grid_rowconfigure(row, weight=1)
options = dict(sticky='nsew', pady=1, padx=1)
# making main frame which shall contain all widgets and subframes
self.Main = ttk.Frame(self, borderwidth='2', relief='groove')
self.Main.grid(columnspan=2, rowspan=4, **options)
self.Main.grid_columnconfigure(0, weight=1)
self.Main.grid_columnconfigure(1, weight=0) # do not get hidden
self.Main.grid_rowconfigure(0, weight=0)
self.Main.grid_rowconfigure(1, weight=0)
self.Main.grid_rowconfigure(2, weight=1)
# make a toolbar menu
# make "File" menu
self.MenuFrm = ttk.Frame(self.Main, borderwidth='1', relief='flat')
put_resizable(self.MenuFrm, 0, 0, 2, 1, 'w')
self.Menu0 = tk.Menu(self.MenuFrm, tearoff=False)
self.MenuButton0 = ttk.Menubutton(self.MenuFrm, text='File',
direction='below',
menu=self.Menu0)
self.load = itk.PhotoImage(file=self.img_path('load.png'))
self.Menu0.add_command(label="Load", image=self.load, compound='left',
command=self.load_data)
self.save = itk.PhotoImage(file=self.img_path('disk.png'))
self.Menu0.add_command(label="Save", image=self.save, compound='left',
command=self.save_data)
# self.save2 = itk.PhotoImage(file=self.img_path('disk2.png'))
# self.Menu0.add_command(label="Save as", image=self.save2,
# compound='left', command=self.save_data)
self.exit = itk.PhotoImage(file=self.img_path('exit.png'))
self.Menu0.add_command(label="Exit", image=self.exit, compound='left',
command=self.quit)
put_resizable(self.MenuButton0, 0, 0, 1, 1, 'n')
# make "Edit" menu
self.Menu1 = tk.Menu(self.MenuFrm, tearoff=False)
self.MenuButton1 = ttk.Menubutton(self.MenuFrm, text='Edit',
direction='below',
menu=self.Menu1)
self.find = itk.PhotoImage(file=self.img_path('find.png'))
self.Menu1.add_command(label="Find (Ctrl-f)", image=self.find,
compound='left', command=self.show_find)
self.copy = itk.PhotoImage(file=self.img_path('copy.png'))
self.Menu1.add_command(label="Copy (Ctrl-c)", image=self.copy,
compound='left', command=self.ctrl_c)
self.cut = itk.PhotoImage(file=self.img_path('cut.png'))
self.Menu1.add_command(label="Cut (Ctrl-x)", image=self.cut,
compound='left', command=self.ctrl_x)
self.paste = itk.PhotoImage(file=self.img_path('paste.png'))
self.Menu1.add_command(label="Paste (Ctrl-v)", image=self.paste,
compound='left', command=self.ctrl_v)
self.undo = itk.PhotoImage(file=self.img_path('undo.png'))
self.Menu1.add_command(label="Undo (Ctrl-z)", image=self.undo,
compound='left', command=self.ctrl_z)
self.redo = itk.PhotoImage(file=self.img_path('redo.png'))
self.Menu1.add_command(label="Redo (Ctrl-u)", image=self.redo,
compound='left', command=self.ctrl_u)
put_resizable(self.MenuButton1, 0, 1, 1, 1, 'n')
# make "Tools" menu
self.Menu2 = tk.Menu(self.MenuFrm, tearoff=False)
self.MenuButton2 = ttk.Menubutton(self.MenuFrm, text='Tools',
direction='below',
menu=self.Menu2)
self.tagger = itk.PhotoImage(file=self.img_path('wand.png'))
self.Menu2.add_command(label="POS-tagger", image=self.tagger,
compound='left', command=self.pos_tagger_win)
put_resizable(self.MenuButton2, 0, 2, 1, 1, 'n')
# make "Help" menu
self.Menu3 = tk.Menu(self.MenuFrm, tearoff=False)
self.MenuButton3 = ttk.Menubutton(self.MenuFrm, text='Help',
direction='below',
menu=self.Menu3)
self.help = itk.PhotoImage(file=self.img_path('help.png'))
self.Menu3.add_command(label="Help", image=self.help, compound='left',
command=self._open_html)
self.about = itk.PhotoImage(file=self.img_path('info.png'))
self.Menu3.add_command(label="POS-tags", image=self.about,
compound='left', command=self.pos_tags_long)
self.Menu3.add_command(label="About", image=self.about,
compound='left', command=self.show_about)
put_resizable(self.MenuButton3, 0, 3, 1, 1, 'n')
# make a frame for query input widget
self.EntryFrm = ttk.Frame(self.Main, borderwidth='2', relief='groove')
put_resizable(self.EntryFrm, 1, 0, 1, 1, 'we')
# make entry widget inside entry frame
self.Entry = ttk.Entry(self.EntryFrm, font='TkDefaultFont 12')
self.Entry.grid(row=1, column=0, columnspan=1, **options)
self.Entry.bind('<Control-a>', self.ctrl_a)
self.Entry.bind('<Control-d>', self.ctrl_d)
self.Entry.bind('<Control-z>', self.ctrl_z)
self.Entry.bind('<Control-u>', self.ctrl_u)
self.Entry.bind('<Return>', self.press_return, '+')
self.Entry.bind('<Control-r>', self.ctrl_r)
self.Entry.focus() # <Return> enable when entry widget in focus
# make search button
self.search = itk.PhotoImage(file=self.img_path('search.png'))
self.search_butt= ttk.Button(self.EntryFrm, padding=(-5,0),
text='Search', image=self.search,
compound='left',
command=self.press_return)
self.search_butt.grid(row=1, column=1, **options)
# make text frame
self.TextFrm = ttk.Frame(self.Main, borderwidth=2, relief='groove')
put_resizable(self.TextFrm, 2, 0, 1, 1, 'nsew')
# make text widget
self.Text = tk.Text(self.TextFrm, font=TKTEXT_FONT, height=35,
width=100,
undo=True,
takefocus=0)
put_resizable(self.Text, 2, 0, 1, 1, 'nsew')
self.Text.bind('<Control-a>', self.ctrl_a)
self.Text.bind('<Control-d>', self.ctrl_d)
self.Text.bind('<Control-s>', self.ctrl_s)
self.Text.bind('<Control-z>', self.ctrl_z)
self.Text.bind('<Control-u>', self.ctrl_u)
self.Text.bind('<Control-r>', self.ctrl_r)
self.Text.bind('<Control-f>', self.ctrl_f)
self.Text.edit_modified(False) # set Text widget -- not modified
# make a scrollbar for text widget
self.scroll = ttk.Scrollbar(self.TextFrm, command=self.Text.yview)
self.Text.config(yscrollcommand=self.scroll.set)
self.scroll.grid(row=2,column=1, sticky='ens')
# make the right frame
self.RightFrm = ttk.Frame(self.Main, borderwidth=2, relief='groove')
put_resizable(self.RightFrm, 1, 1, 2, 2, 'new')
# make inner frame that will contain "Load", "Save" buttons.
self.InnerRightFrm0 = ttk.Frame(self.RightFrm, borderwidth=2,
relief='groove')
put_resizable(self.InnerRightFrm0, 0, 0, 2, 1, 'new')
# add a label for "Load", "Save" frame
self.flab = ttk.Label(self.InnerRightFrm0,
font='TkDefaultFont 10 bold',
text='File operations')
self.flab.grid(row=0)
# make "Load", "Process" and "Save" buttons for right frame
self.load_butt = ttk.Button(self.InnerRightFrm0, padding=(0, 0),
text='Load', image=self.load,
compound='left', command=self.load_data)
self.load_butt.grid(row=1, column=0, sticky='nwe', padx=1, pady=1)
self.proimg = itk.PhotoImage(file=self.img_path('proc.png'))
self.proc_butt = ttk.Button(self.InnerRightFrm0, padding=(5, 5),
text='Process!', image=self.proimg,
compound='left',
command=self.process_command)
self.proc_butt.grid(row=2, column=0, sticky='nwe', pady=1, padx=1)
self.save_butt = ttk.Button(self.InnerRightFrm0, padding=(0, 0),
text='Save', image=self.save,
compound='left', command=self.save_data)
self.save_butt.grid(row=3, column=0, sticky='nwe', padx=1, pady=1)
# make inner frame that will contain view types
self.InnerRightFrm1 = ttk.Frame(self.RightFrm, borderwidth=2,
relief='groove')
put_resizable(self.InnerRightFrm1, 1, 0, 2, 1, 'nwe')
# make view widgets
self.vlab = ttk.Label(self.InnerRightFrm1,
font='TkDefaultFont 10 bold',
text='View mode')
self.vlab.grid(row=0)
self.view_opts = tk.IntVar()
self.view1 = itk.PhotoImage(file=self.img_path('view1.png'))
self.view1Radio = ttk.Radiobutton(self.InnerRightFrm1,
image=self.view1,
variable=self.view_opts,
value=1)
self.view1Radio.grid(row=1)
self.view1Radio.invoke() # make active by default
self.view2 = itk.PhotoImage(file=self.img_path('view2.png'))
self.view2Radio = ttk.Radiobutton(self.InnerRightFrm1,
image=self.view2,
variable=self.view_opts,
value=2)
self.view2Radio.grid(row=2)
self.view3 = itk.PhotoImage(file=self.img_path('view3.png'))
self.view3Radio = ttk.Radiobutton(self.InnerRightFrm1,
image=self.view3,
variable=self.view_opts,
value=3)
self.view3Radio.grid(row=3)
# make show POS-rags button
self.show_tags = tk.IntVar()
self.tags_butt = ttk.Checkbutton(self.InnerRightFrm1, text='POS-tags',
padding=(0, 5), onvalue=1, offvalue=0,
variable=self.show_tags)
self.tags_butt.grid(row=4)
# make inner frame that will contain back and stats buttons
self.InnerRightFrm2 = ttk.Frame(self.RightFrm, borderwidth=2,
relief='groove')
put_resizable(self.InnerRightFrm2, 2, 0, 2, 1, 'we')
# add text statistics label
self.slab = ttk.Label(self.InnerRightFrm2,
font='TkDefaultFont 10 bold',
text='Statistics')
self.slab.grid(row=0)
# make "Stats" buttons
self.simg1 = itk.PhotoImage(file=self.img_path('stats.png'))
self.stats_butt1 = ttk.Button(self.InnerRightFrm2, padding=(0, 0),
text='Numbers', image=self.simg1,
compound='left',
command=self.show_nums_win)
self.stats_butt1.grid(row=2, column=0, sticky='nwe', pady=1, padx=1)
self.simg2 = itk.PhotoImage(file=self.img_path('stats2.png'))
self.stats_butt2 = ttk.Button(self.InnerRightFrm2, padding=(0, 0),
text='Graphs', image=self.simg2,
compound='left',
command=self.mk_graphs_win)
self.stats_butt2.grid(row=3, column=0, sticky='nwe', pady=1, padx=1)
self.simg3 = itk.PhotoImage(file=self.img_path('stats3.png'))
self.stats_butt3 = ttk.Button(self.InnerRightFrm2, padding=(0, 0),
text='Search stats', image=self.simg3,
compound='left',
command=self.show_search_stats_win)
self.stats_butt3.grid(row=4, column=0, sticky='nwe', pady=1, padx=1)
# make inner frame that will contain file information
self.InnerRightFrm3 = ttk.Frame(self.RightFrm, borderwidth=2,
relief='groove')
put_resizable(self.InnerRightFrm3, 3, 0, 2, 1, 'ew')
# make file info labels
self.stats = ttk.Label(self.InnerRightFrm3, text='Data source',
font='TkDefaultFont 10 bold')
self.stats.grid(row=0, column=0)
self.stats0 = ttk.Label(self.InnerRightFrm3, text='Name: not loaded')
self.stats0.grid(row=1, column=0, sticky='w')
self.stats1 = ttk.Label(self.InnerRightFrm3, text='Size: not loaded')
self.stats1.grid(row=2, column=0, sticky='w')
[docs] def lock_ui(self, lock):
"""
Lock all UI clickable widgets when background operations are running.
Args:
*lock* (bool) -- disable widgets if True
"""
if lock:
self.MenuButton0.config(state='disabled')
self.MenuButton1.config(state='disabled')
self.MenuButton2.config(state='disabled')
self.MenuButton3.config(state='disabled')
self.view1Radio.config(state='disabled')
self.view2Radio.config(state='disabled')
self.view3Radio.config(state='disabled')
self.tags_butt.config(state='disabled')
self.search_butt.config(state='disabled')
self.load_butt.config(state='disabled')
self.save_butt.config(state='disabled')
self.proc_butt.config(state='disabled')
self.stats_butt1.config(state='disabled')
self.stats_butt2.config(state='disabled')
self.stats_butt3.config(state='disabled')
else:
self.MenuButton0.config(state='normal')
self.MenuButton1.config(state='normal')
self.MenuButton2.config(state='normal')
self.MenuButton3.config(state='normal')
self.view1Radio.config(state='normal')
self.view2Radio.config(state='normal')
self.view3Radio.config(state='normal')
self.tags_butt.config(state='normal')
self.search_butt.config(state='normal')
self.load_butt.config(state='normal')
self.save_butt.config(state='normal')
self.proc_butt.config(state='normal')
self.stats_butt1.config(state='normal')
self.stats_butt2.config(state='normal')
self.stats_butt3.config(state='normal')
[docs] def lock_toplevel(self, toplevel_win_widget, lock):
"""
Lock Toplevel widgets in order to prevent a user from closing it.
Args:
|*toplevel_win_widget* (ttk.Button) -- Toplevel Button widget
|*lock* (bool) -- disable widgets if True
"""
if lock:
toplevel_win_widget.config(state='disabled')
else:
toplevel_win_widget.config(state='normal')
[docs] def clean_up(self):
"""
Remove all plot files in '_graphs' dir upon initialization.
"""
try:
shutil.rmtree(os.path.join(RESDIR, '_graphs'))
except (OSError, IOError):
print "WARNING: Cannot remove '_graphs' directory!"
try:
shutil.os.mkdir(os.path.join(RESDIR, '_graphs'))
except (OSError, IOError):
print "WARNING: Cannot create '_graphs' directory!"
sys.exit(1)
[docs] def get_opts(self):
"""
Return UI selected widget values.
"""
return self.show_tags.get(), self.view_opts.get()
[docs] def img_path(self, icon_name):
"""
Return a full path with an icon name.
Args:
*icon_name* (str) -- icon name
"""
return os.path.join(RESDIR, 'data', 'icons', icon_name)
[docs] def set_stats_ready(self, state):
"""
Getter/Setter for self.stats_ready var
Args:
*state* (bool) -- True, if text statistics was calculated
"""
self.stats_ready = state
[docs] def set_graphs_ready(self, state):
"""
Getter/Setter for self.graphs_ready var
Args:
*state* (bool) -- True, if graphs were plotted
"""
self.graphs_ready = state
[docs] def set_search_stats_ready(self, state):
"""
Getter/Setter for self.graphs_ready var
Args:
*state* (bool) -- True, if graphs were plotted
"""
self.sstats_ready = state
[docs] def set_file_loaded(self, state):
"""
Getter/Setter for self.is_file_loaded var
Args:
*state* (bool) -- True, if file was loaded
"""
self.is_file_loaded = state
[docs] def set_processed(self, state):
"""
Getter/Setter for self.processed var
Args:
*state* (bool) -- True, if 'Processed!' was clicked
"""
self.processed = state
[docs]def main():
root = tk.Tk()
root.title('nn-search2')
# set a custom window icon
win_icon_path = os.path.join(RESDIR, 'data', 'icons', 'nn-search.ico')
set_win_icon(root, win_icon_path)
# root.geometry("1000x630") # gui size at startup
root.columnconfigure(0, weight=1)
root.rowconfigure(0, weight=1)
root.resizable(True, True)
root.update()
# ttk_theme = ttk.Style()
# you can use ttk themes here ('clam', 'alt', 'classic', 'default')
# ttk_theme.theme_use('clam')
gui = NNSearch(root)
gui.mainloop()
if __name__ == '__main__':
main()