Baby Language Lab Scripts
A collection of data processing tools.
 All Classes Namespaces Files Functions Variables Pages
reliability2_exporter.py
Go to the documentation of this file.
1 ## @package parsers.reliability2_exporter
2 
3 import csv
4 
5 from parsers.reliability2_parser import Reliability2Parser
6 from utils.backend_utils import BackendUtils
7 
8 ## This calss writes details about a check2 object (a unit of data from the Reliability2 App) to a CSV file.
9 class Reliability2Exporter(object):
10  ## Constructor
11  # @param self
12  # @param write_filename (string) path to the csv file we will write the data to
13  # @param check2 (Check2) a Check2 object containing the data to write - see data_structs.Check2
14  def __init__(self, write_filename, check2):
15  self.out_file = open(write_filename, 'wb')
16  self.in_file = open(check2.csv_filename, 'rb')
17 
18  self.check2 = check2
19 
20  ## This method extracts data from the Check2 object and writes it to the csv file in a nicely formatted manner.
21  # @param self
22  # @param include_trans (boolean) If True, the method will append an extra CSV column containing the actual transcription
23  # text that was entered by the user for each clip.
24  # @param progress_update_fcn (function=None) function accepting a value in [0,1] to display as a progress bar - see utils.ProgressDialog. This value is used to indicate the level of completeness <em>of the current phase</em>
25  # @param progress_next_phase_fcn(function=None) - moves the progress bar to the next phase, which causes new text to be displayed in the bar - see utils.ProgressDialog
26  def export(self, include_trans, progress_update_fcn=None, progress_next_fcn=None):
27  reader = csv.DictReader(self.in_file)
28  extra_headers = ['Child Voc', 'Word Count']
29  if include_trans:
30  extra_headers.append('Transcription')
31  out_headers = reader.fieldnames + extra_headers
32  writer = csv.DictWriter(self.out_file, out_headers)
33 
34  writer.writeheader()
35 
36  #The composite key (child_code, timestamp) uniquely identifies a row (assuming a child can't be in two
37  # places at the same time :) We are going to build a lookup table that is keyed based on this combination of values.
38 
39  #Match the rows: we can generate a dict of self.check2.test2s
40  #and go through the input file one row at a time, storing matches in the out_rows array below.
41  #We must store to this array in the order the tests were run, not the order they appear in the input file.
42  test2_dict = {}
43  for i in range(len(self.check2.test2s)):
44  test2 = self.check2.test2s[i]
45  key = test2.child_code + test2.spreadsheet_timestamp
46  test2_dict[key] = (test2, i)
47 
48  out_rows = [None] * len(self.check2.test2s)
49  all_rows = list(reader)
50  match_count = 0
51 
52  i = 0
53  while i < len(all_rows) and match_count < len(self.check2.test2s):
54  row = all_rows[i]
55  year = row['year']
56  month = BackendUtils.pad_num_str(row['month'])
57  day = BackendUtils.pad_num_str(row['day'])
58  elapsed_sec = row['Elapsed_Time']
59  key = Reliability2Parser.get_child_code(row) + '%s %s %s %s' % (day, month, year, elapsed_sec) #row['clock_time_tzadj']
60  if key in test2_dict:
61  row[extra_headers[0]] = test2_dict[key][0].child_vocs
62  row[extra_headers[1]] = BackendUtils.get_word_count(test2_dict[key][0].transcription)
63  if include_trans:
64  row[extra_headers[2]] = test2_dict[key][0].transcription
65  match_count += 1
66 
67  out_rows[test2_dict[key][1]] = row
68 
69  if progress_update_fcn:
70  progress_update_fcn(float(i + 1) / float(len(all_rows)))
71 
72  i += 1
73 
74  if progress_next_fcn:
75  progress_next_fcn()
76  for i in range(len(out_rows)):
77  row = out_rows[i]
78  if row == None:
79  raise Exception('Unable to match Test2 object with input spreadsheet row. Has spreadsheet changed?')
80  else:
81  writer.writerow(row)
82 
83  if progress_update_fcn:
84  progress_update_fcn(float(i + 1) / float(len(out_rows)))
85 
86  ## Closes this parser. This just closes all the open files that it is using.
87  # Calling this method is necessary to ensure that all of the data that was written to the csv file is actually flushed to disk.
88  # @param self
89  def close(self):
90  self.out_file.close()
91  self.in_file.close()