Baby Language Lab Scripts
A collection of data processing tools.
 All Classes Namespaces Files Functions Variables Pages
naptime.py
Go to the documentation of this file.
1 import glob
2 import logging
3 import os
4 import csv
5 import datetime
6 import traceback
7 from db.bll_database import DBConstants
8 from db.bll_database import _get_constants
9 from utils.ui_utils import UIUtils
10 
11 class Naptime:
12  @staticmethod
13  def select_files(db):
14  filenames = []
15  rows = db.select(
16  'naptime_files',
17  'id filename'.split()
18  )
19 
20  for cur_row in rows:
21  filenames.append(cur_row[1])
22 
23  return filenames
24 
25  @staticmethod
26  def _update_settings(db, filenames, path):
27  path = path.replace('\\', '/')
28 
29  #update naptime filenames in naptime_files table
30  db.delete('naptime_files')
31  db.insert(
32  'naptime_files',
33  ['filename'],
34  map(lambda name: [name], filenames)
35  )
36 
37  #update timestamp and path in naptime table
38  db.update_timestamp_col(
39  'settings',
40  'val',
41  where_cond='code_name=?',
42  params=['LAST_NAPTIME_UPDATE']
43  )
44 
45  db.update(
46  'settings',
47  ['val'],
48  where_cond='code_name=?',
49  params=[path, 'LAST_NAPTIME_FOLDER']
50  )
51 
52  #Refresh DBConstants (re-query the DB and rebuild the DBConstants.SETTINGS Enums).
53  #This is to force an update the LAST_NAPTIME_FOLDER and LAST_NAPTIME_UPDATE
54  #properties of DBConstants.SETTINGS, which have just changed.
55  #This is ugly, but it works...
56  _get_constants() #this is defined in bll_database.py, and imported at the top of this file
57 
58  @staticmethod
59  def get_naptime_files(path):
60  items = os.listdir(path)
61  files = []
62  for cur_item in items:
63  cur_item = '%s/%s' % (path, cur_item)
64  if os.path.isdir(cur_item):
65  files.extend(Naptime.get_naptime_files(cur_item))
66  elif cur_item.lower().endswith('complete.csv'):
67  files.append(cur_item)
68 
69  return files
70 
71  @staticmethod
72  def update_naptime_data(db, path, prog_diag=None):
73  logger = logging.getLogger()
74 
75  naptime_filenames = Naptime.get_naptime_files(path)
76  processed_filenames = []
77  error_filenames = []
78 
79  #remove all previous naptime zone data
80  db.delete('naptime')
81 
82  for i in range(len(naptime_filenames)):
83  try:
84  nap_file = open(naptime_filenames[i], 'rb')
85  #print 'Reading naptime file "%s"' % (naptime_filenames[i])
86 
87  reader = csv.DictReader(nap_file)
88  rows = list(reader)
89 
90  month, day, year = rows[0]['Clock_Time_TZAdj'].split(' ')[0].split('/')
91  if len(year) == 2:
92  year = '20%s' % (year)
93  child_id = rows[0]['File_Name'].split('_')[0].upper()
94  child_cd = '%s_%d%02d%02d' % (child_id, int(year), int(month), int(day))
95 
96 
97  dur_cols = ['Segment_Duration', 'Audio_Duration', 'Block_Duration']
98  k = 0
99  while dur_cols[k] not in rows[0] and k < len(dur_cols):
100  k += 1
101 
102  dur_col_title = None
103  if k < len(dur_cols):
104  dur_col_title = dur_cols[k]
105  print 'Using dur_col_title="%s"' % (dur_col_title)
106  else:
107  raise Exception('Raising a fuss because there\'s no Segment_Duration or Audio_Duration column in this file.')
108 
109  if 'Naptime' not in rows[0]:
110  print 'Can\'t find "Naptime" column!'
111  print rows[0]
112  print rows[0].keys()
113 
114  last_is_naptime = bool(rows[0]['Naptime'].strip().lower() == 'naptime') #naptime col is blank if seg is not naptime
115  accum_time = float(rows[0]['Elapsed_Time']) + float(rows[0][dur_col_title]) #elapsed time will restart periodically, so we need to keep a time accumulator var
116  accum_time = round(accum_time, 2)
117  nap_start = accum_time if last_is_naptime else None
118  nap_end = None
119 
120  for j in range(1, len(rows)):
121  cur_is_naptime = bool(rows[j]['Naptime'].strip().lower() == 'naptime') #naptime col is blank if seg is not naptime
122  cur_dur = float(rows[j][dur_col_title])
123  cur_dur = round(cur_dur, 2)
124 
125  if cur_is_naptime and not last_is_naptime:
126  nap_start = accum_time
127 
128  elif not cur_is_naptime and last_is_naptime:
129  nap_end = accum_time
130 
131  db.insert(
132  'naptime',
133  'child_cd start end'.split(),
134  ((child_cd,
135  round(nap_start, 2),
136  round(nap_end, 2),
137  ),),
138  )
139 
140  #last row
141  if cur_is_naptime and j == len(rows) - 1:
142  nap_end = accum_time + cur_dur
143 
144  db.insert(
145  'naptime',
146  'child_cd start end'.split(),
147  ((child_cd,
148  round(nap_start, 2),
149  round(nap_end, 2),
150  ),),
151  )
152 
153  accum_time += cur_dur
154  last_is_naptime = cur_is_naptime
155 
156  nap_file.close()
157  processed_filenames.append(naptime_filenames[i])
158 
159  if prog_diag:
160  prog_diag.set_fraction(float(i + 1) / float(len(naptime_filenames)))
161 
162  except Exception as e:
163  #logger.error('%s %s' % (naptime_filenames[i], str(e)))
164  print 'Error processing %s:' % (naptime_filenames[i])
165  print e
166  print "Stack trace: %s" % (traceback.format_exc())
167  error_filenames.append(naptime_filenames[i])
168 
169  Naptime._update_settings(db, processed_filenames, path)
170 
171  return error_filenames
172 
173  @staticmethod
174  def filter_file(db, input_path, output_path):
175  file_in = open(input_path, 'rb')
176  file_out = open(output_path, 'wb')
177 
178  src_reader = csv.DictReader(file_in)
179  dest_writer = csv.DictWriter(file_out, src_reader.fieldnames)
180  dest_writer.writeheader()
181 
182  rows = list(src_reader)
183  accum_start = float(rows[0]['Elapsed_Time'])
184  accum_start = round(accum_start, 2)
185  accum_end = None
186 
187  month, day, year = rows[0]['Clock_Time_TZAdj'].split(' ')[0].split('/')
188  if len(year) == 2:
189  year = '20%s' % (year)
190 
191  child_id = rows[0]['File_Name'].split('_')[0].upper()
192  child_cd = '%s_%d%02d%02d' % (child_id, int(year), int(month), int(day))
193 
194  dur_col = 'Segment_Duration' if 'Segment_Duration' in rows[0] else 'Audio_Duration'
195 
196  for i in range(len(rows)):
197  seg_dur = float(rows[i][dur_col])
198  seg_dur = round(seg_dur, 2)
199  accum_end = accum_start + seg_dur
200 
201  #check if this seg intersects with any naptime zones
202  intersect_rows = db.select(
203  'naptime',
204  'start end'.split(),
205  where_cond='child_cd = ? and ' +
206  '((end > ? and end <= ?) or ' + #db end pt intersects range, or db start pt and db end pt both intersect range
207  '(start < ? and start >= ?) or ' + #db start pt inersects range, or db start pt and db end pt both intersect range
208  '(start < ? and end > ?))', #some db middle pts intersects range
209  params=(child_cd,
210  accum_start, accum_end,
211  accum_end, accum_start,
212  accum_start, accum_end,
213  ),
214  order_by='id ASC'
215  )
216 
217  #determine the magnitude of the overlap (in seconds)
218  largest_mag = -1
219  #note: results are ordered by id (order of insertion) ascending, so the looping here will ensure that in the case of two naptimes that equally overlap the segment, the last one (i.e. the one from the naptime file, if present) will be used.
220  for db_row in intersect_rows:
221  db_start, db_end = db_row
222  mag = 0
223  #db end pt intersects range, or db start pt and db end pt both intersect range
224  if db_end > accum_start and db_end <= accum_end:
225  mag = db_end - max(accum_start, db_start)
226 
227  #db start pt inersects range, or db start pt and db end pt both intersect range
228  elif db_start < accum_end and db_start >= accum_start:
229  mag = min(accum_end, db_end) - db_start
230 
231  #some db middle pts intersects range (db_row completely covers file_row
232  else: #if db_start < accum_start and db_end > accum_end:
233  mag = accum_end - accum_start
234 
235  #update if mag is equal to largest mag, since we want to prefer later naptime
236  if mag >= largest_mag:
237  largest_mag = mag
238 
239  #We exclude if >= 50% of the seg overlaps with a naptime zone.
240  #=> We include if the segment does not overlap a naptime zone (largest_mag == -1) or if we have < 50% overlap.
241  if round(largest_mag, 2) < round(seg_dur / 2.0, 2): #covers case when largest_mag == -1 (then largest_mag is also < seg_dur / 2.0)
242  dest_writer.writerow(rows[i])
243 
244  accum_start += seg_dur
245 
246  file_in.close()
247  file_out.close()