Baby Language Lab Scripts
A collection of data processing tools.
 All Classes Namespaces Files Functions Variables Pages
open_pair_window.py
Go to the documentation of this file.
1 import os
2 import difflib
3 
4 from gi.repository import Gtk as gtk
5 from gi.repository import WebKit as webkit
6 from parsers.trs_parser import TRSParser
7 from utils.ui_utils import UIUtils
8 from utils.progress_dialog import ProgressDialog
9 from utils.backend_utils import BackendUtils
10 from ui.verifier_app.diff_win import DiffWin
11 
13  def __init__(self):
14  self.window = gtk.Window(gtk.WindowType.TOPLEVEL)
15  self.window.set_title('Transcription Verifier')
16  self.window.connect('destroy', lambda w: self.window.destroy())
17  self.window.set_default_size(270, 210)
18  self.window.set_resizable(True)
19 
20  vbox = gtk.VBox()
21  file1_grid = gtk.Grid()
22  file1_frame = gtk.Frame(label='File 1')
23 
24  file1_name_label = gtk.Label('Transcriber Name:')
25  file1_name_entry = gtk.Entry()
26  file1_name_entry.set_width_chars(20)
27  file1_label = gtk.Label('Path:')
28  file1_entry = gtk.Entry()
29  file1_entry.set_width_chars(50)
30  file1_browse_button = gtk.Button('Browse')
31  file1_browse_button.connect('clicked', lambda w: UIUtils.browse_file('Select File 1', file1_entry, [UIUtils.TRS_FILE_FILTER]))
32  file1_grid.attach(file1_name_label, 0, 0, 1, 1)
33  file1_grid.attach(file1_name_entry, 1, 0, 1, 1)
34  file1_grid.attach(file1_label, 0, 1, 1, 1)
35  file1_grid.attach(file1_entry, 1, 1, 1, 1)
36  file1_grid.attach(file1_browse_button, 2, 1, 1, 1)
37 
38  file1_frame.add(file1_grid)
39  vbox.pack_start(file1_frame, True, True, 0)
40 
41  file2_grid = gtk.Grid()
42  file2_frame = gtk.Frame(label='File 2')
43 
44  file2_name_label = gtk.Label('Transcriber Name:')
45  file2_name_entry = gtk.Entry()
46  file2_name_entry.set_width_chars(20)
47  file2_label = gtk.Label('Path:')
48  file2_entry = gtk.Entry()
49  file2_entry.set_width_chars(50)
50  file2_browse_button = gtk.Button('Browse')
51  file2_browse_button.connect('clicked', lambda w: UIUtils.browse_file('Select File 2', file2_entry, [UIUtils.TRS_FILE_FILTER]))
52  file2_grid.attach(file2_name_label, 0, 2, 1, 1)
53  file2_grid.attach(file2_name_entry, 1, 2, 1, 1)
54  file2_grid.attach(file2_label, 0, 3, 1, 1)
55  file2_grid.attach(file2_entry, 1, 3, 1, 1)
56  file2_grid.attach(file2_browse_button, 2, 3, 1, 1)
57 
58  file2_frame.add(file2_grid)
59  vbox.pack_start(file2_frame, True, True, 0)
60 
61  #for debugging
62  #file1_entry.set_text('G:\\Wayne\\baby-lab\\test-data\\trs\\C001b_20090901lFINAL.trs')
63  #file2_entry.set_text('G:\\Wayne\\baby-lab\\test-data\\trs\\C001b_20090901lFINAL - Copy.trs')
64 
65  file1_name_entry.grab_focus()
66 
67  button_box = gtk.HButtonBox()
68  button_box.set_layout(gtk.ButtonBoxStyle.EDGE)
69  cancel_button = gtk.Button(stock=gtk.STOCK_CANCEL, label='Cancel')
70  cancel_button.connect('clicked', lambda w: self.window.destroy())
71  button_box.add(cancel_button)
72 
73  ok_button = gtk.Button(stock=gtk.STOCK_OK, label='Ok')
74  ok_button.connect('clicked', lambda w: self._check_input(
75  file1_entry.get_text(),
76  file2_entry.get_text(),
77  file1_name_entry.get_text(),
78  file2_name_entry.get_text())
79  )
80  button_box.add(ok_button)
81 
82  vbox.pack_start(button_box, True, True, 0)
83 
84  self.window.add(vbox)
85  self.window.show_all()
86 
87  def _check_input(self, file1_path, file2_path, file1_name, file2_name):
88  if file1_path and file2_path:
89  bad_paths = []
90  for path in [file1_path, file2_path]:
91  if not os.path.exists(path):
92  bad_paths.append(path)
93 
94  if bad_paths:
95  message = 'The following files could not be located.\n'
96  for path in bad_paths:
97  message += '\n- %s' % (path)
98  message += '\n\nPlease double-check the paths and try again.'
99  UIUtils.show_message_dialog(message)
100 
101  else:
102  self._compare(file1_path, file2_path, file1_name, file2_name)
103 
104  else:
105  UIUtils.show_message_dialog('Please select two files.')
106 
107  def _compare(self, file1_path, file2_path, file1_name, file2_name):
108  self.window.set_sensitive(False)
109  paths = [file1_path, file2_path]
110  segs = []
111  dialog = ProgressDialog('Processing Files...', ['Parsing trs file %d...' % (i + 1) for i in range(len(paths))] + ['Comparing files...', 'Generating output...'])
112  dialog.show()
113 
114  for i in range(len(paths)):
115  file_segs = TRSParser(paths[i]).parse(
116  progress_update_fcn=dialog.set_fraction,
117  validate=False,
118  remove_bad_trans_codes=False
119  )
120  segs.append(file_segs)
121 
122  dialog.next_phase()
123 
124  desc_strs = self._build_desc_strs(segs, dialog)
125  dialog.next_phase()
126 
127  html = difflib.HtmlDiff().make_file(*desc_strs, fromdesc=file1_name, todesc=file2_name, context=True, numlines=0)
128 
129  #prevent font selection from killing webkit on Windows systems
130  html = html.replace('font-family:Courier;', '')
131  DiffWin(html)
132 
133  dialog.ensure_finish()
134 
135  self.window.destroy()
136 
137  def _build_desc_strs(self, segs, dialog):
138  descs = []
139 
140  for i in range(len(segs)):
141  file_descs = []
142  for seg in segs[i]:
143  for utter in seg.utters:
144  file_descs.append(self._build_utter_desc(utter))
145 
146  dialog.set_fraction(float(i) / float(len(segs)))
147  descs.append(file_descs)
148 
149  return descs
150 
151  def _build_utter_desc(self, utter):
152  desc_str = ''
153 
154  speaker_cd = '?'
155  if utter.speaker:
156  if utter.speaker.speaker_codeinfo:
157  speaker_cd = utter.speaker.speaker_codeinfo.get_code()
158  else:
159  speaker_cd = ' - '
160 
161  desc_str = '%s [%s - %s]' % ( speaker_cd, BackendUtils.get_time_str(utter.start), BackendUtils.get_time_str(utter.end))
162  if utter.lena_notes:
163  desc_str += ' %s' % (utter.lena_notes)
164  if utter.trans_phrase:
165  desc_str += ' %s' % (utter.trans_phrase)
166  if utter.lena_codes:
167  desc_str += ' |%s|' % ('|'.join(utter.lena_codes))
168  if utter.trans_codes:
169  if not utter.lena_codes:
170  desc_str += ' |'
171  desc_str += '%s|' % ('|'.join(utter.trans_codes))
172 
173  desc_str += '\n'
174 
175  return desc_str