Baby Language Lab Scripts
A collection of data processing tools.
 All Classes Namespaces Files Functions Variables Pages
verification_window.py
Go to the documentation of this file.
1 from gi.repository import Gtk as gtk
2 from gi.repository import GObject as gobject
3 import logging
4 
5 from parsers.trs_parser import TRSParser
6 from parsers.wav_parser import WavParser
7 from utils.ui_utils import UIUtils
8 from utils.progress_dialog import ProgressDialog
9 from utils.enum import Enum
10 from utils.backend_utils import BackendUtils
11 from parsers.errors import ParserWarning, ParserError
12 
14  ERROR_STATES = Enum(['NONE', 'WARNING', 'ERROR'])
15  def __init__(self, filename, progress_dialog):
16  self.logger = logging.getLogger(__name__)
17  self.window = gtk.Window(gtk.WindowType.TOPLEVEL)
18  self.window.set_title('Transcription Verifier')
19  self.window.connect('destroy', lambda x: self.window.destroy())
20  self.window.set_border_width(10)
21  self.window.set_default_size(580, 500)
22 
23  self.trs_parser = TRSParser(filename)
24  self.trs_parser.parse(
25  progress_update_fcn=progress_dialog.set_fraction,
26  progress_next_phase_fcn=progress_dialog.next_phase,
27  remove_bad_trans_codes=False
28  )
29  self.wav_parser = None
30 
31  progress_dialog.next_phase()
32  self.filter_errors = True
33  self.toolbar = self.build_toolbar()
34  self.treeview = self.build_treeview(progress_dialog.set_fraction)
35  self.treeview.expand_all()
36 
37  scrolled_win = gtk.ScrolledWindow()
38  scrolled_win.set_policy(gtk.PolicyType.AUTOMATIC, gtk.PolicyType.AUTOMATIC)
39  scrolled_win.add(self.treeview)
40 
41  vbox = gtk.VBox(False, 2)
42  vbox.pack_start(self.toolbar, False, False, 0)
43  vbox.pack_start(scrolled_win, True, True, 0)
44 
45  self.window.add(vbox)
46 
47  self.window.show_all()
48 
49  def build_toolbar(self):
50  toolbar = gtk.Toolbar()
51  toolbar.set_orientation(gtk.Orientation.HORIZONTAL)
52 
53  filter_errors_button = gtk.ToggleToolButton()
54  filter_errors_button.set_active(True) #set this before the connecting the clicked handler so it doesn't cause trouble
55  filter_errors_button.connect('toggled', lambda w: self.toggle_filter_errors(w.get_active()))
56  filter_errors_icon = gtk.Image()
57  filter_errors_icon.set_from_file(UIUtils.get_icon_path(UIUtils.BUTTON_ICONS.FLAG))
58  filter_errors_button.set_label('Show Errors Only')
59  filter_errors_button.set_icon_widget(filter_errors_icon)
60 
61  expand_button = gtk.ToolButton()
62  expand_icon = gtk.Image()
63  expand_icon.set_from_file(UIUtils.get_icon_path(UIUtils.BUTTON_ICONS.EXPAND))
64  expand_button.set_label('Expand All')
65  expand_button.set_icon_widget(expand_icon)
66  expand_button.connect('clicked', lambda w: self.treeview.expand_all())
67 
68  collapse_button = gtk.ToolButton()
69  collapse_icon = gtk.Image()
70  collapse_icon.set_from_file(UIUtils.get_icon_path(UIUtils.BUTTON_ICONS.COLLAPSE))
71  collapse_button.set_label('Collapse All')
72  collapse_button.set_icon_widget(collapse_icon)
73  collapse_button.connect('clicked', lambda w: self.treeview.collapse_all())
74 
75  rescan_button = gtk.ToolButton()
76  rescan_icon = gtk.Image()
77  rescan_icon.set_from_file(UIUtils.get_icon_path(UIUtils.BUTTON_ICONS.REFRESH))
78  rescan_button.set_label('Rescan File')
79  rescan_button.set_icon_widget(rescan_icon)
80  rescan_button.connect('clicked', lambda w: self._rescan_file())
81 
82  play_seg_button = gtk.ToolButton()
83  play_icon = gtk.Image()
84  play_icon.set_from_file(UIUtils.get_icon_path(UIUtils.BUTTON_ICONS.PLAY))
85  play_seg_button.set_label('Play Seg')
86  play_seg_button.set_icon_widget(play_icon)
87  play_seg_button.connect('clicked', lambda w: self.play_selected_seg())
88 
89  close_button = gtk.ToolButton()
90  close_icon = gtk.Image()
91  close_icon.set_from_file(UIUtils.get_icon_path(UIUtils.BUTTON_ICONS.CLOSE))
92  close_button.set_label('Close');
93  close_button.set_icon_widget(close_icon)
94  close_button.connect('clicked', lambda w: self.window.destroy())
95 
96  exit_button = gtk.ToolButton()
97  exit_icon = gtk.Image()
98  exit_icon.set_from_file(UIUtils.get_icon_path(UIUtils.BUTTON_ICONS.EXIT))
99  exit_button.set_label('Exit')
100  exit_button.set_icon_widget(exit_icon)
101  exit_button.connect('clicked', lambda w: gtk.main_quit())
102 
103  toolbar.insert(filter_errors_button, -1)
104  toolbar.insert(expand_button, -1)
105  toolbar.insert(collapse_button, -1)
106  toolbar.insert(gtk.SeparatorToolItem(), -1)
107  toolbar.insert(play_seg_button, -1)
108  toolbar.insert(rescan_button, -1)
109  toolbar.insert(gtk.SeparatorToolItem(), -1)
110  toolbar.insert(close_button, -1)
111  toolbar.insert(exit_button, -1)
112 
113  return toolbar
114 
115  def _rescan_file(self):
116  self.window.set_sensitive(False)
117 
118  progress_dialog = ProgressDialog('Processing File...', ['Parsing trs file...', 'Validating data...', 'Building UI...'])
119  progress_dialog.show()
120 
121  #this causes the parser to invalidate all cache, re-open and re-parse the file
122  self.trs_parser.re_parse(progress_update_fcn=progress_dialog.set_fraction,
123  progress_next_phase_fcn=progress_dialog.next_phase)
124 
125  #build a new treeview model based on the new data
126  progress_dialog.next_phase()
127  filter_model = self._build_tree_store(progress_dialog.set_fraction)
128  self.treeview.set_model(filter_model)
129 
130  #Presumably the most common cause for rescanning is to check if errors have been fixed.
131  #If the error filter is on, automatically expand all rows to show any remaining errors.
132  if self.filter_errors:
133  self.treeview.expand_all()
134 
135  self.window.set_sensitive(True)
136 
137  def _build_tree_store(self, progress_update_fcn):
138  #segment/utter id, description, error_state (0 = none, 1 = warning, 2 = error)
139  tree_store = gtk.TreeStore(gobject.TYPE_INT, gobject.TYPE_STRING, gobject.TYPE_INT)
140 
141  #note: these may be errors or warnings
142  cur_utter = 0
143  for seg in self.trs_parser.parse():
144  seg_speakers = ''
145  if seg.speakers:
146  for i in range(len(seg.speakers)):
147  seg_speakers += seg.speakers[i].speaker_codeinfo.get_code()
148  if i < len(seg.speakers) - 1:
149  seg_speakers += ' + '
150  else:
151  seg_speakers = ' - '
152 
153  seg_iter = tree_store.append(None, [seg.num,
154  '%s [%s - %s]' % ( seg_speakers, BackendUtils.get_time_str(seg.start), BackendUtils.get_time_str(seg.end) ),
155  VerificationWindow.ERROR_STATES.NONE])
156 
157  for utter in seg.utters:
158  speaker_cd = '?' #question mark indicates an error occured - if we have utter.speaker, we should have an utter code. Errors occur if the utter code isn't in the DB lookup table (which means that utter.speaker != None, but utter.speaker.speaker_codeinfo == None. This is the condition that falls through the if-else blocks below).
159  if utter.speaker:
160  if utter.speaker.speaker_codeinfo:
161  speaker_cd = utter.speaker.speaker_codeinfo.get_code()
162  else:
163  speaker_cd = ' - '
164 
165  desc_str = '%s [%s - %s]' % ( speaker_cd, BackendUtils.get_time_str(utter.start), BackendUtils.get_time_str(utter.end))
166  if utter.lena_notes:
167  desc_str += ' %s' % (utter.lena_notes)
168  if utter.trans_phrase:
169  desc_str += ' %s' % (utter.trans_phrase)
170  if utter.lena_codes:
171  desc_str += ' |%s|' % ('|'.join(utter.lena_codes))
172  if utter.trans_codes:
173  if not utter.lena_codes:
174  desc_str += ' |'
175  desc_str += '%s|' % ('|'.join(utter.trans_codes))
176 
177  utter_iter = tree_store.append(seg_iter, [
178  utter.id,
179  desc_str,
180  VerificationWindow.ERROR_STATES.NONE
181  ])
182 
183  cur_utter += 1
184  progress_update_fcn(float(cur_utter) / float(self.trs_parser.total_utters))
185 
186  error_list = self.trs_parser.error_collector.get_errors_by_utter(utter)
187  for error in error_list:
188  error_type = VerificationWindow.ERROR_STATES.ERROR
189  if isinstance(error, ParserWarning):
190  error_type = VerificationWindow.ERROR_STATES.WARNING
191 
192  error_iter = tree_store.append(utter_iter, [
193  -1,
194  '%s' % (error.msg),
195  error_type
196  ])
197 
198  parent_it = utter_iter
199  while parent_it:
200  parent_error_type = tree_store.get_value(parent_it, 2)
201  if parent_error_type < error_type:
202  tree_store.set_value(parent_it, 2, error_type)
203 
204  parent_it = tree_store.iter_parent(parent_it)
205 
206  filter_model = tree_store.filter_new()
207  filter_model.set_visible_func(self.filter)
208 
209  return filter_model
210 
211  def build_treeview(self, progress_update_fcn):
212  filter_model = self._build_tree_store(progress_update_fcn)
213  treeview = gtk.TreeView(filter_model)
214 
215  col = gtk.TreeViewColumn('ID', gtk.CellRendererText(), text=0)
216  col.set_visible(False)
217  treeview.append_column(col)
218 
219  renderer = gtk.CellRendererText()
220  col = gtk.TreeViewColumn('Description', renderer, text=1)
221  col.set_cell_data_func(renderer, self.cell_render_fcn)
222  treeview.append_column(col)
223 
224  col = gtk.TreeViewColumn('Error State', gtk.CellRendererText(), text=2)
225  col.set_visible(False)
226  treeview.append_column(col)
227 
228  return treeview
229 
230  def cell_render_fcn(self, col, cell_renderer, model, it, user_data=None):
231  error_state = model.get_value(it, 2)
232  if error_state == VerificationWindow.ERROR_STATES.WARNING:
233  cell_renderer.set_property('foreground', 'orange')
234  elif error_state == VerificationWindow.ERROR_STATES.ERROR:
235  cell_renderer.set_property('foreground', 'red')
236  else:
237  cell_renderer.set_property('foreground', 'black')
238 
239  return
240 
241  #returns true if row pointed to by 'it' should be visible
242  def filter(self, model, it, user_data):
243  result = True
244  if self.filter_errors:
245  result = model.get_value(it, 2) > VerificationWindow.ERROR_STATES.NONE
246 
247  return result
248 
249  def toggle_filter_errors(self, filter_errors):
250  self.filter_errors = not self.filter_errors
251  self.treeview.get_model().refilter()
252 
253  def play_selected_seg(self):
254  (model, it) = self.treeview.get_selection().get_selected()
255  if it:
256  #if they've selected an error row, find the top level parent (the segment) and use it instead
257  parent = model.iter_parent(it)
258  while parent:
259  it = parent
260  parent = model.iter_parent(it)
261 
262  seg_num = model.get_value(it, 0) if it else None
263  seg = self.trs_parser.parse()[seg_num]
264 
265  if not self.wav_parser:
266  dialog = gtk.FileChooserDialog(title='Select WAV File',
267  action=gtk.FileChooserAction.OPEN,
268  buttons=(gtk.STOCK_CANCEL, gtk.ResponseType.CANCEL, gtk.STOCK_OPEN, gtk.ResponseType.OK))
269  dialog.set_default_response(gtk.ResponseType.OK)
270 
271  for filter_opt in (('wav Files', '*.wav'), ('All Files', '*')):
272  file_filter = gtk.FileFilter()
273  file_filter.set_name(filter_opt[0])
274  file_filter.add_pattern(filter_opt[1])
275  dialog.add_filter(file_filter)
276 
277  response = dialog.run()
278  if response == gtk.ResponseType.OK:
279  filename = dialog.get_filename()
280  self.wav_parser = WavParser(filename)
281 
282  dialog.destroy()
283 
284  if self.wav_parser:
285  self.wav_parser.play_seg(seg)
286 
287  else:
288  UIUtils.show_no_sel_dialog()
289  else:
290  UIUtils.show_no_sel_dialog()
291