Baby Language Lab Scripts
A collection of data processing tools.
 All Classes Namespaces Files Functions Variables Pages
freq_window.py
Go to the documentation of this file.
1 from gi.repository import Gtk as gtk
2 from gi.repository import Gdk as gdk
3 from gi.repository import GObject as gobject
4 import subprocess
5 import re
6 import sre_constants
7 import logging
8 import traceback
9 
10 from utils.ui_utils import UIUtils
11 from utils.progress_dialog import ProgressDialog
12 from parsers.trs_parser import TRSParser
13 from data_structs.seg_filters import WHQFilter
14 from data_structs.output import Output
15 from data_structs.output_calcs import CountOutputCalc
16 from parsers.freq_exporter import FreqExporter
17 from parsers.filter_manager import FilterManager
18 from db.bll_database import DBConstants
19 
20 class FreqWindow():
21  def __init__(self, filename, progress_dialog):
22  self.window = gtk.Window(gtk.WindowType.TOPLEVEL)
23  self.window.set_title('WH-Frequency Counter')
24  self.window.set_border_width(10)
25  self.window.set_default_size(730, 400)
26 
27  self.logger = logging.getLogger(__name__)
28 
30  self.trs_parser = TRSParser(filename)
31  segments = self.trs_parser.parse(progress_update_fcn=progress_dialog.set_fraction, progress_next_phase_fcn=progress_dialog.next_phase, validate=False, seg_filters=[])
32 
33  self.filter_manager = FilterManager(segments) #this object caches original segs and helps with lookup by segment number
34  calc = CountOutputCalc('', CountOutputCalc.COUNT_TYPES.PER_SEG, 1)
35  self.output = Output('', '', [WHQFilter()], calc, False) #this object filters and allows us to retrieve the filtered segs
36  map(lambda seg: self.output.add_item(seg), segments)
37 
38  treeview = self._build_treeview()
39  #ensure progress dialog self-destructs even if no utterances are found (in that case the above call never invokes progress_dialog.set_fraction)
40  progress_dialog.ensure_finish()
41 
42  scrolled_win = gtk.ScrolledWindow()
43  scrolled_win.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC)
44  scrolled_win.add(treeview)
45 
46  export_button = UIUtils.create_button('Export Results', UIUtils.BUTTON_ICONS.EXPORT)
47  export_button.connect('clicked', lambda widget: self._export_results(treeview))
48 
49  close_button = UIUtils.create_button('Close', UIUtils.BUTTON_ICONS.CLOSE)
50  close_button.connect('clicked', lambda w: self.window.destroy())
51 
52  add_button = UIUtils.create_button('Add Count Column', UIUtils.BUTTON_ICONS.ADD)
53  add_button.connect('clicked', lambda w: self._add_count_col(treeview))
54 
55  self.remove_button = UIUtils.create_button('Remove Count Column', UIUtils.BUTTON_ICONS.REMOVE)
56  self.remove_button.connect('clicked', lambda w: self._remove_count_col(treeview))
58 
59  options_frame = gtk.Frame(label='Options')
60  options_vbox = gtk.VBox()
61  self.linked_checkbox = gtk.CheckButton('Group Linked Segments')
62  self.linked_checkbox.connect('toggled', self._toggle_seg_grouping, treeview)
63  options_vbox.pack_start(self.linked_checkbox, False, False, 0)
64 
65  self.context_checkbox = gtk.CheckButton('Show Context')
66  self.context_checkbox.connect('toggled', self._toggle_show_context, treeview)
67  options_vbox.pack_start(self.context_checkbox, False, False, 0)
68 
69  options_frame.add(options_vbox)
70 
71  self.statusbar = gtk.Statusbar()
72  self.statusbar.set_has_resize_grip(False)
73  self.num_whq = treeview.get_model().iter_n_children(None)
74  self._update_statusbar()
75 
76  vbox = gtk.VBox()
77 
78  bbox = gtk.HButtonBox()
79  bbox.pack_start(export_button, True, False, 0)
80  bbox.pack_start(add_button, True, False, 0)
81  bbox.pack_start(self.remove_button, True, False, 0)
82  bbox.pack_start(close_button, True, False, 0)
83 
84  vbox.pack_start(scrolled_win, True, True, 0)
85  vbox.pack_start(self.statusbar, False, False, 0)
86  vbox.pack_end(bbox, False, False, 0)
87  vbox.pack_end(options_frame, False, False, 0)
88  self.window.add(vbox)
89 
90  self.window.show_all()
91 
93  return map( lambda word: [word.capitalize(), word, 0], 'who what why when where how'.split() )
94 
95  def _toggle_show_context(self, checkbox, treeview):
96  tree_model = self._build_list_store(link_segs=self.linked_checkbox.get_active(), prev_store=treeview.get_model(), show_context=self.context_checkbox.get_active())
97  treeview.set_model(tree_model)
98 
99  def _toggle_seg_grouping(self, checkbox, treeview):
100  tree_model = self._build_list_store(link_segs=self.linked_checkbox.get_active(), prev_store=None, show_context=self.context_checkbox.get_active())
101  treeview.set_model(tree_model)
102  self.num_whq = treeview.get_model().iter_n_children(None)
103  self._update_statusbar()
104 
106  self.remove_button.set_sensitive(len(self.count_cols) > 0)
107 
108  def _remove_count_col(self, treeview):
109  dialog = gtk.Dialog(title='Remove Count Column',
110  buttons=(gtk.STOCK_CANCEL, gtk.ResponseType.CANCEL, gtk.STOCK_OK, gtk.ResponseType.OK))
111  dialog.set_default_response(gtk.ResponseType.OK)
112 
113  vbox = dialog.get_content_area()
114 
115  list_store = gtk.ListStore(gobject.TYPE_STRING)
116 
117  for i in range(len(self.count_cols)):
118  list_store.append([ self.count_cols[i][0] ])
119 
120  combo = gtk.ComboBox(model=list_store)
121  renderer = gtk.CellRendererText()
122  combo.pack_start(renderer, True, True, 0)
123  combo.add_attribute(renderer, 'text', 0)
124  combo.set_active(0)
125 
126  hbox = gtk.HBox()
127  hbox.pack_start(gtk.Label('Select Column:'), True, True, 0)
128  hbox.pack_start(combo, True, True, 0)
129 
130  vbox.pack_start(hbox, True, True, 0)
131  vbox.show_all()
132 
133  response = dialog.run()
134 
135  if response == gtk.ResponseType.CANCEL:
136  dialog.destroy()
137  done = True
138 
139  elif response == gtk.ResponseType.OK:
140  col_index = combo.get_active()
141  if col_index >= 0:
142  dialog.destroy()
143 
144  self.count_cols = self.count_cols[:col_index] + self.count_cols[col_index + 1:]
145 
146  progress_dialog = ProgressDialog('Removing Column...', ['Rebuilding UI...'])
147  progress_dialog.show()
148 
149  tree_model = self._build_list_store(link_segs=self.linked_checkbox.get_active(), prev_store=treeview.get_model(), show_context=self.context_checkbox.get_active())
150 
151  old_col = treeview.get_column(6 + col_index)
152  treeview.remove_column(old_col)
153 
154  #update the 'text' property of the cell renderers in all columns after the removed column - otherwise cell values get mixed up
155  i = 6 + col_index
156  while i < tree_model.get_n_columns():
157  col = treeview.get_column(i)
158  renderer = col.get_cell_renderers()[0]
159  col.set_attributes(renderer, text=i)
160  i += 1
161 
162  treeview.set_model(tree_model)
163 
165  self._update_statusbar()
166 
167  progress_dialog.ensure_finish()
168 
169  def _add_count_col(self, treeview):
170  dialog = gtk.Dialog(title='Add Count Column',
171  buttons=(gtk.STOCK_CANCEL, gtk.ResponseType.CANCEL, gtk.STOCK_OK, gtk.ResponseType.OK))
172  dialog.set_default_response(gtk.ResponseType.OK)
173 
174  vbox = dialog.get_content_area()
175 
176  #table = gtk.Table(2, 2)
177  grid = gtk.Grid()
178  name_label = gtk.Label('Column Name:')
179  #table.attach(name_label, 0, 1, 0, 1, gtk.EXPAND, gtk.EXPAND, 3, 3)
180  grid.attach(name_label, 0, 0, 1, 1, 3)
181 
182  name_entry = gtk.Entry()
183  #table.attach(name_entry, 1, 2, 0, 1, gtk.EXPAND, gtk.EXPAND, 3, 3)
184  grid.attach(name_entry, 1, 0, 1, 1, 3)
185 
186  regex_label = gtk.Label('Search term:')
187  #table.attach(regex_label, 0, 1, 1, 2, gtk.EXPAND, gtk.EXPAND, 3, 3)
188  grid.attach(regex_label, 0, 1, 1, 1, 3)
189 
190  regex_entry = gtk.Entry()
191  #table.attach(regex_entry, 1, 2, 1, 2, gtk.EXPAND, gtk.EXPAND, 3, 3)
192  grid.attach(regex_entry, 1, 1, 1, 1, 3)
193 
194  vbox.pack_start(grid, True, True, 0)
195  vbox.show_all()
196 
197  done = False
198  while not done:
199  response = dialog.run()
200  if response == gtk.ResponseType.CANCEL:
201  dialog.destroy()
202  done = True
203 
204  elif response == gtk.ResponseType.OK:
205  name = name_entry.get_text()
206  regex = regex_entry.get_text()
207 
208  try:
209  re.compile(regex)
210 
211  dialog.destroy()
212 
213  self.count_cols.append( [name, regex, 0] ) #name, regex, total
214 
215  progress_dialog = ProgressDialog('Adding New Column...', ['Counting occurrances...'])
216  progress_dialog.show()
217  list_store = self._build_list_store(link_segs=self.linked_checkbox.get_active(), prev_store=treeview.get_model(), show_context=self.context_checkbox.get_active())
218  progress_dialog.ensure_finish()
219 
220  treeview.set_model(list_store)
221  col = gtk.TreeViewColumn(name, gtk.CellRendererText(), text=list_store.get_n_columns() - 1)
222  treeview.append_column(col)
224  self._update_statusbar()
225  done = True
226 
227  except Exception as error:
228  if isinstance(error, sre_constants.error):
229  error_dialog = gtk.MessageDialog(buttons=(gtk.ButtonType.OK), message_format='The regular expression that has been entered is invalid.')
230  error_dialog.run()
231  error_dialog.destroy()
232  else:
233  error_dialog = gtk.MessageDialog(buttons=(gtk.ButtonType.OK), message_format='The application has encountered an internal error. Please contact your local programmer to assign blame.')
234  error_dialog.run()
235  error_dialog.destroy()
236  done = True
237 
238  if progress_dialog:
239  progress_dialog.destroy()
240 
241  self.logger.error('Exception in add_column():\n %s\nStacktrace: %s' % (error, traceback.format_exc()))
242 
243  def _update_statusbar(self):
244  context_id = self.statusbar.get_context_id('num_whq')
245  self.statusbar.pop(context_id)
246  totals = 'Totals: WHQ Count: %d' % (self.num_whq)
247 
248  for col in self.count_cols:
249  totals += ', %s: %d' % (col[0], col[2])
250 
251  self.statusbar.push(context_id, totals)
252 
253  def _get_link_chain(self, cur_seg):
254  cur = cur_seg
255  chain = []
256  while cur != None:
257  chain.insert(cur, 0)
258  cur = cur.prev
259 
260  cur = cur_seg.next
261  while cur != None:
262  chain.append(cur)
263 
264  return chain
265 
266  def _build_list_store_row(self, utter_id, start_time, end_time, trans_phrase, speaker_str, target_str, whq_count):
267  start_time = ('%0.2f' % (start_time)) if start_time != None else ''
268  end_time = ('%0.2f' % (end_time)) if end_time != None else ''
269 
270  return [
271  utter_id,
272  '%s - %s' % (start_time, end_time),
273  trans_phrase,
274  speaker_str,
275  target_str,
276  whq_count,
277  ]
278 
279  def _find_utter_index(self, utter):
280  utter_index = -1
281  i = 0
282  while i < len(utter.seg.utters) and utter_index < 0:
283  if utter.seg.utters[i] == utter:
284  utter_index = i
285  i += 1
286 
287  return utter_index
288 
289  def _append_context(self, bwd_start_utter, fwd_start_utter, cur_phrase):
290  #backward
291  bwd_phrase = self._get_adjacent_phrase(bwd_start_utter, -1)
292  fwd_phrase = self._get_adjacent_phrase(fwd_start_utter, 1)
293 
294  return '(%s)\n%s\n(%s)' % (bwd_phrase, cur_phrase, fwd_phrase)
295 
296  def _get_adjacent_phrase(self, start_utter, incr):
297  utter_index = self._find_utter_index(start_utter) + incr
298  seg_index = start_utter.seg.num
299  phrase = None
300 
301  i_in_bounds = None
302  init_j = None
303  if incr < 0:
304  i_in_bounds = lambda i: i >= 0
305  init_j = lambda i, seg: utter_index if i == seg_index else len(seg.utters) - 1
306  j_in_bounds = lambda j, seg: j >= 0
307  else:
308  i_in_bounds = lambda i: i < len(self.filter_manager.get_segs())
309  init_j = lambda i, seg: utter_index if i == seg_index else 0
310  j_in_bounds = lambda j, seg: j < len(seg.utters)
311 
312  i = seg_index
313  while i_in_bounds(i) and not phrase:
314  seg = self.filter_manager.get_seg_by_num(i)
315  j = init_j(i, seg)
316  while j_in_bounds(j, seg) and not phrase:
317  phrase = seg.utters[j].trans_phrase
318  j += incr
319  i += incr
320 
321  return phrase or '-'
322 
323  def _build_list_store(self, link_segs=False, prev_store=None, show_context=False):
324  #for now, we always grab segs and convert to chains later if needed
325  segments = self.output.get_filtered_items()
326  list_store = gtk.ListStore(gobject.TYPE_INT, #utterance id
327  gobject.TYPE_STRING, #time
328  gobject.TYPE_STRING, #phrase
329  gobject.TYPE_STRING, #speakers
330  gobject.TYPE_STRING, #target listeners
331  gobject.TYPE_INT, #whq count
332  *([gobject.TYPE_INT] * len(self.count_cols)) #user-defined 'count columns'
333  )
334 
335  row_num = 0
336  if link_segs:
337  utter_chains = FilterManager.get_chains(segments)
338  for head in utter_chains:
339  cur = head
340  prev = cur
341  trans_phrase = cur.trans_phrase
342  speaker_str = DBConstants.SPEAKER_CODES.get_option(cur.speaker.get_codeinfo().get_code()).desc if cur.speaker else '(Unknown)'
343  target_str = DBConstants.TRANS_CODES[1].get_option(cur.trans_codes[1]).desc if cur.trans_codes else '(Unknown)'
344  cur = cur.next
345 
346  count_col_vals = [0] * len(self.count_cols)
347 
348  while cur:
349  trans_phrase += '\n->%s' % (cur.trans_phrase)
350  if cur.speaker:
351  speaker_str += ', %s' % (DBConstants.SPEAKER_CODES.get_option(cur.speaker.get_codeinfo().get_code()).desc)
352  if cur.trans_codes:
353  target_str += ', %s' % (DBConstants.TRANS_CODES[1].get_option(cur.trans_codes[1]).desc)
354  prev = cur
355  cur = cur.next
356 
357  tail = FilterManager.get_endpoint(FilterManager.ENDPOINT_TYPES.TAIL, head)
358 
359  if show_context:
360  trans_phrase = self._append_context(head, tail, trans_phrase)
361 
362  whq_count = prev_store[row_num][5] if prev_store else 1
363  row = self._build_list_store_row(head.id, head.start, tail.end, trans_phrase, speaker_str, target_str, whq_count)
364 
365  for j in range(len(self.count_cols)):
366  count = len(re.findall(self.count_cols[j][1], trans_phrase))
367  #reset column total on first iteration (if _build_list_store() was called in the past, then self.count_cols[j][2] may be > 0)
368  self.count_cols[j][2] = self.count_cols[j][2] + count if row_num else count
369  row.append(count)
370 
371  list_store.append(row)
372  row_num += 1
373 
374  else:
375  for i in range(len(segments)):
376  for utter in segments[i].utters:
377  trans_phrase = utter.trans_phrase
378  if show_context:
379  trans_phrase = self._append_context(utter, utter, trans_phrase)
380 
381  whq_count = prev_store[row_num][5] if prev_store else 1
382  speaker_str = DBConstants.SPEAKER_CODES.get_option(utter.speaker.speaker_codeinfo.get_code()).desc if utter.speaker else '(Unknown)'
383  target_str = DBConstants.TRANS_CODES[1].get_option(utter.trans_codes[1]).desc if utter.trans_codes else '(Unknown)'
384  row = self._build_list_store_row(utter.id, utter.start, utter.end, trans_phrase, speaker_str, target_str, whq_count)
385 
386  for j in range(len(self.count_cols)):
387  count = len(re.findall(self.count_cols[j][1], utter.trans_phrase.lower()))
388  #reset column total on first iteration (if _build_list_store() was called in the past, then self.count_cols[j][2] may be > 0)
389  self.count_cols[j][2] = self.count_cols[j][2] + count if row_num else count
390  row.append(count)
391 
392  list_store.append(row)
393  row_num += 1
394 
395  return list_store
396 
397  def _build_treeview(self):
398  list_store = self._build_list_store()
399  treeview = gtk.TreeView(list_store)
400 
401  #create hidden id column
402  col = gtk.TreeViewColumn('ID', gtk.CellRendererText(), text=0)
403  col.set_visible(False)
404  col.set_resizable(True)
405  treeview.append_column(col)
406 
407  col_names = ['Time', 'Phrase', 'Speakers', 'Target Listeners']
408  for i in range(len(col_names)):
409  col = gtk.TreeViewColumn(col_names[i], gtk.CellRendererText(), text=(i + 1))
410  col.set_resizable(True)
411  treeview.append_column(col)
412 
413  spin_renderer = gtk.CellRendererSpin()
414  adj = gtk.Adjustment(value=1, lower=0, upper=100, page_incr=5, step_incr=1, page_size=0)
415  spin_renderer.set_property('adjustment', adj)
416  spin_renderer.set_property('editable', True)
417  spin_renderer.connect('edited', self._update_row, treeview)
418  col = gtk.TreeViewColumn('WHQ Count', spin_renderer, text=(len(col_names) + 1))
419  col.set_resizable(True)
420  treeview.append_column(col)
421 
422  for i in range( len(self.count_cols) ):
423  col = gtk.TreeViewColumn(self.count_cols[i][0], gtk.CellRendererText(), text=(len(col_names) + 2 + i))
424  col.set_resizable(True)
425  treeview.append_column(col)
426 
427  treeview.connect('key-press-event', self._keypress_callback, treeview)
428 
429  return treeview
430 
431  def _keypress_callback(self, widget, event, treeview):
432  if gdk.keyval_name(event.keyval).lower() == 'tab':
433  (model, paths) = treeview.get_selection().get_selected_rows()
434  total_rows = model.iter_n_children(None)
435  if paths and paths[0][0] + 1 < total_rows:
436  treeview.set_cursor(paths[0][0] + 1, focus_column=treeview.get_column(3), start_editing=True)
437 
438  def _update_row(self, widget, path, value, treeview):
439  #we must retrieve the model each time this method is called (rather than just passing in a reference to it), since the model is re-defined ever time a count column is added or removed
440  model = treeview.get_model()
441  old_val = int(model[path][5])
442  new_val = int(value)
443  self.num_whq += (new_val - old_val)
444 
445  model[path][5] = new_val
446  self._update_statusbar()
447 
448  def _export_results(self, treeview):
449  dialog = gtk.FileChooserDialog(title='Save',
450  action=gtk.FileChooserAction.SAVE,
451  buttons=(gtk.STOCK_CANCEL, gtk.ResponseType.CANCEL, gtk.STOCK_SAVE, gtk.ResponseType.OK))
452  dialog.set_default_response(gtk.ResponseType.OK)
453  dialog.add_filter(UIUtils.CSV_FILE_FILTER)
454  dialog.add_filter(UIUtils.ALL_FILE_FILTER)
455 
456  #splice in the 'open immediately checkbox'
457  content_area = dialog.get_content_area()
458  open_now_checkbox = gtk.CheckButton('Open Immediately')
459  open_now_checkbox.set_active(True)
460  align = gtk.Alignment(xalign=1.0, yalign=1.0)
461  align.add(open_now_checkbox)
462  content_area.pack_end(align, False, False, 0)
463  open_now_checkbox.show()
464  align.show()
465 
466  response = dialog.run()
467  if response == gtk.ResponseType.CANCEL:
468  dialog.destroy()
469  elif response == gtk.ResponseType.OK:
470  filename = dialog.get_filename()
471  open_now = open_now_checkbox.get_active()
472  dialog.destroy()
473 
474  count_col_headers, count_col_vals, count_col_totals = zip(*self.count_cols) if self.count_cols else [[]] * 3
475  exporter = FreqExporter(filename, self.trs_parser.filename)
476 
477  exporter.write_header_row(count_col_headers)
478  list_store = treeview.get_model()
479  tree_it = list_store.get_iter_first()
480  while tree_it:
481  #we must remove newline chars, otherwise Excel thinks it's the end of a row (even when it's quoted...)
482  phrase = list_store.get_value(tree_it, 2).replace('\n', ' ').replace('\r', '')
483  time_str = list_store.get_value(tree_it, 1)
484  speakers_str = list_store.get_value(tree_it, 3) or '(Unknown)'
485  targets_str = list_store.get_value(tree_it, 4) or '(Unknown)'
486  num_utters = int(list_store.get_value(tree_it, 5))
487  i = 6
488  count_col_vals = []
489  while i < list_store.get_n_columns():
490  count_col_vals.append( int(list_store.get_value(tree_it, i)) )
491  i += 1
492 
493  exporter.write_count_row(time_str, phrase, speakers_str, targets_str, num_utters, count_col_vals)
494 
495  tree_it = list_store.iter_next(tree_it)
496 
497  exporter.finish(self.num_whq, count_col_totals)
498 
499  if open_now:
500  subprocess.Popen(['%s' % DBConstants.SETTINGS.SPREADSHEET_PATH, filename])
501  else:
502  result_dialog = gtk.MessageDialog(buttons=gtk.ButtonType.OK, message_format='Results exported successfully.')
503  result_dialog.run()
504  result_dialog.destroy()