8 from collections
import OrderedDict
60 class CountOutputCalc(OutputCalc):
69 def __init__(self, search_term, count_type, max_count=-1):
70 self.
logger = logging.getLogger(__name__)
98 while seg.utters
and i < len(seg.utters):
100 if seg.utters[i].trans_phrase:
101 count = len(re.findall(self.
search_term, seg.utters[i].trans_phrase))
116 if head.trans_phrase:
121 full_phrase += cur.trans_phrase
126 count = len(re.findall(self.
search_term, full_phrase))
144 csv_writer.writerow([
'Start Time',
'End Time',
'Phrase',
'Count'])
149 for utter
in utter_list:
152 phrase, tail = FilterManager.get_chain_phrase(utter)
153 start = BackendUtils.get_time_str(utter.start)
154 end = BackendUtils.get_time_str(tail.end)
159 csv_writer.writerow([start,
161 phrase.replace(
'\n',
'').replace(
'\r',
''),
167 start = BackendUtils.get_time_str(utter.start)
168 end = BackendUtils.get_time_str(utter.end)
173 csv_writer.writerow([
181 csv_writer.writerow([
''])
182 csv_writer.writerow([
'Total:',
'',
'', total])
191 counts = self.chain_dict.values()
if chained
else self.utter_dict.values()
193 total = reduce(
lambda accum, x: accum + x, counts, 0)
194 avg = float(total) / float(len(counts))
196 csv_writer.writerow([
'Avg:',
'%0.3f' % (avg)])
198 csv_writer.writerow([
'No matches found in TRS file.'])
206 counts = self.chain_dict.values()
if chained
else self.utter_dict.values()
207 total = reduce(
lambda accum, x: accum + x, counts, 0)
209 csv_writer.writerow([
'Sum:', total])
213 combo_option = DBConstants.COMBOS[DBConstants.COMBO_GROUPS.COUNT_OUTPUT_CALC_TYPES][self.
count_type]
214 csv_writer.writerow([
'Count:', combo_option.disp_desc])
215 csv_writer.writerow([
'Search Term:', self.
search_term])
234 self.
logger = logging.getLogger(__name__)
260 while seg.utters
and i < len(seg.utters):
262 if seg.utters[i].trans_phrase:
263 count = len(re.findall(self.
search_term, seg.utters[i].trans_phrase))
266 if seg.utters[i].end !=
None and seg.utters[i].start !=
None:
267 time = seg.utters[i].end - seg.utters[i].start
274 self.
utter_dict[seg.utters[i]] = (count, time)
279 if head.trans_phrase:
287 count += len(re.findall(self.
search_term, cur.trans_phrase))
292 if tail.end !=
None and head.start !=
None:
293 time = tail.end - head.start
308 csv_writer.writerow([
'Start Time',
'End Time',
'Phrase',
'Occurances',
'Time Elapsed(sec)',
'Rate (occurrances/sec)'])
312 for utter
in utter_list:
322 phrase, tail = FilterManager.get_chain_phrase(utter)
326 rate = float(count) / float(time)
331 phrase = utter.trans_phrase
333 rate = float(count) / float(time)
335 csv_writer.writerow([BackendUtils.get_time_str(start),
336 BackendUtils.get_time_str(end),
337 phrase.replace(
'\n',
'').replace(
'\r',
''),
347 pairs = self.chain_dict.values()
if chained
else self.utter_dict.values()
351 for cur_pair
in pairs:
352 count, time = cur_pair
356 avg = float(total_count) / total_time
358 csv_writer.writerow([
'Avg:', avg])
362 combo_option = DBConstants.COMBOS[DBConstants.COMBO_GROUPS.RATE_OUTPUT_CALC_TYPES][self.
rate_type]
363 csv_writer.writerow([
'Rate:', combo_option.disp_desc])
364 csv_writer.writerow([
'Search Term:', self.
search_term])
377 self.
logger = logging.getLogger(__name__)
410 keys = self.chains_dict.keys()
413 while not found_key
and i < len(keys):
414 if ( (start >= keys[i][0]
and start <= keys[i][1])
or
415 (end <= keys[i][1]
and end >= keys[i][0]) ):
429 while not found
and cur:
431 found = re.search(self.
search_term, cur.trans_phrase) !=
None
440 for utter
in seg.utters:
442 if utter.end !=
None and utter.start !=
None and utter.trans_phrase:
444 if not (utter.start, utter.end)
in self.
utters_dict:
448 if utter.trans_phrase
and re.search(self.
search_term, utter.trans_phrase):
455 if head.trans_phrase:
456 tail = FilterManager.get_endpoint(FilterManager.ENDPOINT_TYPES.TAIL, head)
458 if tail.end !=
None and head.start !=
None:
467 self.chains_dict.pop(key_tuple)
473 new_key_tuple = (min(key_tuple[0], head.start), max(key_tuple[1], tail.end))
477 new_key_tuple = (head.start, tail.end)
485 csv_writer.writerow([
'Time Period'])
489 csv_writer.writerow([
'Search Term:', self.
search_term])
490 csv_writer.writerow([
'Time Containg Matches:', BackendUtils.get_time_str(total_time)])
498 BREAKDOWN_CRITERIA =
None
505 self.
logger = logging.getLogger(
'stats_app')
530 self.seg_list.append(seg)
534 self.chain_list.append(head)
542 BreakdownOutputCalc.BREAKDOWN_CRITERIA.SPEAKER_TYPE: 0,
543 BreakdownOutputCalc.BREAKDOWN_CRITERIA.TARGET_LISTENER: 1,
544 BreakdownOutputCalc.BREAKDOWN_CRITERIA.COMPLETENESS: 2,
545 BreakdownOutputCalc.BREAKDOWN_CRITERIA.UTTERANCE_TYPE: 3,
556 if len(utter.trans_codes) > trans_code_index:
557 code = utter.trans_codes[trans_code_index]
569 row_combo = DBConstants.COMBOS[DBConstants.COMBO_GROUPS.BREAKDOWN_OUTPUT_CALC_CRITERIA][self.
row_criteria]
570 col_combo = DBConstants.COMBOS[DBConstants.COMBO_GROUPS.BREAKDOWN_OUTPUT_CALC_CRITERIA][self.
col_criteria]
572 csv_writer.writerow([
'Row Criteria:', row_combo.disp_desc])
573 csv_writer.writerow([
'Column Criteria:', col_combo.disp_desc])
577 row_code = DBConstants.TRANS_CODES[row_code_index]
578 row_code_strs = row_code.get_all_options_codes()
582 col_code = DBConstants.TRANS_CODES[col_code_index]
583 col_code_strs = col_code.get_all_options_codes()
585 csv_writer.writerow([
''] + col_code_strs)
588 count_hash = OrderedDict()
594 for row
in row_code_strs:
595 count_hash[row] = OrderedDict()
596 for col
in col_code_strs:
597 count_hash[row][col] = 0
600 for datum
in data_list:
602 if datum.trans_phrase:
607 if row_code !=
None and col_code !=
None:
608 for row_char
in row_code:
609 for col_char
in col_code:
611 count_hash[row_char][col_char] += 1
613 except KeyError
as err:
614 self.logger.info(
'Output Calc encountered unrecognized key: %s' % (err))
615 self.logger.info(
'row_code: %s, col_code: %s' % (row_code, col_code))
616 self.logger.info(
'Utterance: %s' % (datum))
619 print 'Output Calc encountered unrecognized key: %s' % (err)
620 print 'row_code: %s, col_code: %s' % (row_code, col_code)
624 while datum.utters
and utter_index < len(datum.utters):
625 if datum.utters[utter_index].trans_phrase:
628 if row_code !=
None and col_code !=
None:
629 for row_char
in row_code:
630 for col_char
in col_code:
632 count_hash[row_char][col_char] += 1
634 except KeyError
as err:
635 self.logger.info(
'Output Calc encountered unrecognized key: %s' % (err))
636 self.logger.info(
'row_code: %s, col_code: %s' % (row_code, col_code))
637 self.logger.info(
'Utterance: %s' % (datum))
640 print 'Output Calc encountered unrecognized key: %s' % (err)
641 print 'row_code: %s, col_code: %s' % (row_code, col_code)
646 for row_key
in count_hash:
647 csv_writer.writerow( [row_key] + map(
lambda col_key: count_hash[row_key][col_key], count_hash[row_key]) )
659 self.
logger = logging.getLogger(__name__)
685 while seg.utters
and i < len(seg.utters):
686 if seg.utters[i].trans_phrase
and re.search(self.
search_term, seg.utters[i].trans_phrase):
687 self.utter_list.append(seg.utters[i])
695 while not found
and cur:
697 found = re.search(self.
search_term, cur.trans_phrase)
701 self.chain_list.append(head)
709 ListOutputCalc.LIST_CATS.SPEAKER_TYPE: 0,
710 ListOutputCalc.LIST_CATS.TARGET_LISTENER: 1,
711 ListOutputCalc.LIST_CATS.COMPLETENESS: 2,
712 ListOutputCalc.LIST_CATS.UTTERANCE_TYPE: 3,
717 combo = DBConstants.COMBOS[DBConstants.COMBO_GROUPS.LIST_OUTPUT_CALC_CATS][self.
cat]
718 csv_writer.writerow([
'List:', combo.disp_desc])
719 csv_writer.writerow([
'Search Term:', self.
search_term])
723 group_code = DBConstants.TRANS_CODES[trans_code_index]
724 group_code_strs = group_code.get_all_options_codes()
729 group_dict = OrderedDict()
733 for utter
in data_list:
738 if len(cur.trans_codes) > trans_code_index:
739 code_str = cur.trans_codes[trans_code_index]
741 if not code_str
in group_dict:
742 group_dict[code_str] = OrderedDict()
744 if not utter
in group_dict[code_str]:
745 group_dict[code_str][utter] =
True
750 if len(utter.trans_codes) > trans_code_index:
751 code_str = utter.trans_codes[trans_code_index]
753 if not code_str
in group_dict:
754 group_dict[code_str] = OrderedDict()
756 if not utter
in group_dict[code_str]:
757 group_dict[code_str][utter] =
True
760 for code
in group_dict:
761 csv_writer.writerow([
''])
762 csv_writer.writerow([
'',
'Code:', code])
763 csv_writer.writerow([
'',
'Start Time',
'End Time',
'LENA Speakers',
'Phrase',
'Transcriber Codes'])
764 for utter
in group_dict[code]:
766 trans_codes, tail = FilterManager.get_chain_trans_codes(utter)
767 trans_codes = trans_codes.replace(
'\n',
'').replace(
'\r',
'')
768 speakers, tail = FilterManager.get_chain_lena_speakers(utter)
769 speakers = speakers.replace(
'\n',
'').replace(
'\r',
'')
770 phrase, tail = FilterManager.get_chain_phrase(utter)
771 phrase = phrase.replace(
'\n',
'').replace(
'\r',
'')
772 start_str = BackendUtils.get_time_str(utter.start)
773 end_str = BackendUtils.get_time_str(tail.end)
774 csv_writer.writerow([
'', start_str, end_str, speakers, phrase, trans_codes])
777 csv_writer.writerow([
'',
778 BackendUtils.get_time_str(utter.start),
779 BackendUtils.get_time_str(utter.end),
780 utter.speaker.speaker_codeinfo.code
if utter.speaker
and utter.speaker.speaker_codeinfo
else '?',
782 '|%s|' % (
'|'.join(utter.trans_codes))
if utter.trans_codes
else 'None',
789 CountOutputCalc.COUNT_TYPES = DBConstants.COMBO_OPTIONS[DBConstants.COMBO_GROUPS.COUNT_OUTPUT_CALC_TYPES]
790 RateOutputCalc.RATE_TYPES = DBConstants.COMBO_OPTIONS[DBConstants.COMBO_GROUPS.RATE_OUTPUT_CALC_TYPES]
791 BreakdownOutputCalc.BREAKDOWN_CRITERIA = DBConstants.COMBO_OPTIONS[DBConstants.COMBO_GROUPS.BREAKDOWN_OUTPUT_CALC_CRITERIA]
792 ListOutputCalc.LIST_CATS = DBConstants.COMBO_OPTIONS[DBConstants.COMBO_GROUPS.LIST_OUTPUT_CALC_CATS]