36 def __init__(self, trs_parser, seg, remove_bad_trans_codes):
38 self.
logger = logging.getLogger(__name__ +
'.' + self.__class__.__name__)
45 self.
States =
Enum([
'INITIAL',
'INITIAL_SYNC_TAG',
'WHO_TAG'])
54 if next_obj.tag ==
'Sync':
62 start_time = float(next_obj.attrib[
'time'])
63 map(
lambda u: setattr(u,
'start', start_time), utters)
64 self.utter_list.extend(utters)
67 self.
cur_state = self.States.INITIAL_SYNC_TAG
71 elif self.
cur_state == self.States.INITIAL_SYNC_TAG:
73 if next_obj.tag ==
'Sync':
75 end_time = float(next_obj.attrib[
'time'])
78 while i >= 0
and self.
utter_list[i].end ==
None:
84 start_time = float(next_obj.attrib[
'time'])
85 map(
lambda u: setattr(u,
'start', start_time), new_utters)
86 self.utter_list.extend(new_utters)
91 elif next_obj.tag ==
'Who':
94 old_utter = self.utter_list.pop()
95 self.trs_parser.total_utters -= 1
99 map(
lambda u: setattr(u,
'start', old_utter.start), new_utters)
100 self.utter_list.extend(new_utters)
107 elif self.
cur_state == self.States.WHO_TAG:
108 if next_obj.tag ==
'Sync':
110 sync_time = float(next_obj.attrib[
'time'])
112 while i >= 0
and self.
utter_list[i].end ==
None:
118 map(
lambda u: setattr(u,
'start', sync_time), new_utters)
119 self.utter_list.extend(new_utters)
122 self.
cur_state = self.States.INITIAL_SYNC_TAG
124 elif next_obj.tag ==
'Who':
129 map(
lambda u: setattr(u,
'start', self.
utter_list[-1].start), new_utters)
130 self.utter_list.extend(new_utters)
139 final_end_time = float(final_obj.attrib[
'endTime'])
143 if self.
cur_state == self.States.INITIAL_SYNC_TAG
or self.
cur_state == self.States.WHO_TAG:
146 while i >= 0
and self.
utter_list[i].end ==
None:
184 self.
logger = logging.getLogger(__name__ +
'.' + self.__class__.__name__)
192 for utter
in seg.utters:
195 codes_str +=
'|%s|' % (
'|'.join(utter.lena_codes))
196 if utter.trans_codes:
197 codes_str =
'|%s|' % (
'|'.join(utter.trans_codes))
199 contents +=
'\t%s [%s - %s] %s%s%s\n' % (utter.speaker.speaker_codeinfo.code
if utter.speaker
else '-',
200 BackendUtils.get_time_str(utter.start),
201 BackendUtils.get_time_str(utter.end),
202 (utter.lena_notes
or '') +
' ',
203 (utter.trans_phrase
or '') +
' ',
215 if next_obj.trans_phrase:
218 if self.
cur_linkable_seg ==
None or self.cur_linkable_seg.num != next_obj.seg.num:
221 for link_num
in self.
link_dict[
'prev_links']:
222 err_msg =
'Encountered I%s with no C%s in next linkable segment.\n' % tuple([str(link_num
or '')] * 2)
223 err_msg +=
'Expected a \'C\' in either the current segment, or one of the these (following) segments:\n'
226 self.trs_parser.error_collector.add(
ParserError(err_msg, self.
link_dict[
'prev_links'][link_num]))
238 continued_match =
None
239 continuation_match =
None
241 continuation_num =
''
244 if len(next_obj.trans_codes) == len(DBConstants.TRANS_CODES):
245 continued_match = re.search(
'(?:I(\d+)?)', next_obj.trans_codes[2])
246 continuation_match = re.search(
'(?:C(\d+)?)', next_obj.trans_codes[2])
249 continued = hasattr(continued_match,
'group')
252 continued_num = continued_match.groups()[0]
or 0
254 continuation = hasattr(continuation_match,
'group')
257 continuation_num = continuation_match.groups()[0]
or 0
264 if continuation_num
in self.
link_dict[
'prev_links']:
266 prev_obj = self.
link_dict[
'prev_links'].pop(continuation_num)
269 prev_obj.next = next_obj
270 next_obj.prev = prev_obj
273 elif continuation_num
in self.
link_dict[
'cur_links']:
275 prev_obj = self.
link_dict[
'cur_links'].pop(continuation_num)
278 prev_obj.next = next_obj
279 next_obj.prev = prev_obj
283 err_msg =
'Encountered C%s code with no I%s code in previous linkable segment.\n' % tuple([str(continuation_num
or '')] * 2)
285 err_msg +=
'Expected an \'I\' either previously in the current segment, or in one of these (previous) segments:\n'
288 self.trs_parser.error_collector.add(
ParserError(err_msg, next_obj))
294 if continued_num
in self.
link_dict[
'cur_links']:
295 self.trs_parser.error_collector.add(
ParserError(
'Ambiguous I%s in segment.' % (str(continued_num
or '')), next_obj))
298 self.
link_dict[
'cur_links'][continued_num] = next_obj
306 for link_num
in self.
link_dict[
'cur_links']:
307 self.trs_parser.error_collector.add(
ParserError(
'Encountered I%s with no C%s in next segment.' % tuple([str(link_num
or '')] * 2), self.
link_dict[
'cur_links'][link_num]))
310 for link_num
in self.
link_dict[
'prev_links']:
312 self.trs_parser.error_collector.add(
ParserError(
'Encountered I%s with no following C%s code.' % tuple([str(link_num
or '')] * 2), self.
link_dict[
'prev_links'][link_num]))