Baby Language Lab Scripts
A collection of data processing tools.
 All Classes Namespaces Files Functions Variables Pages
seg_filters.py
Go to the documentation of this file.
1 ## @package data_structs.seg_filters
2 
3 from data_structs.base_objects import DBObject
4 from parsers.filter_manager import FilteredSeg, FilterManager
5 from db.bll_database import DBConstants
6 from utils.backend_utils import BackendUtils
7 
8 import re
9 
10 ## This class (actually, its subclasses) handles all of the segment/chain filtering operations in the BLL Apps.
11 # Currently, this class exists as an interface (if Python had those). Subclasses should override the methods indicated.
12 # Filters can be saved to the DB.
13 # The filtering methods make use of a special data structure called a FilteredSeg (see bll_app.parsers.filter_manager.py). A FilteredSeg wraps a Segment object and provides access to all it's attributes except one - the 'utters' attribute.
14 # (The 'utters' attribute is a list of all the utterances that the Segment contains). The FilteredSeg maintains its own separate utters list, and redirects accesses to that list, rather than the original Segment object's list. This means that
15 # one can create a FilteredSeg object from a Segment object, then modify the FilteredSeg object's utters <em>list</em> without having any effect on the original Segment's utters list (note that if you modify the actual utterances themselves - as opposed to
16 # just the list - this will affect the Utterance objects in the original Segment's utters list). This is important because it allows this class to return FilteredSeg objects that represent filtered versions of Segments, without messing with the orignal utterance list, which may
17 # be needed elsewhere.
19  ## Constructor
20  # @param self
21  # @param db_id (int=None) if this SegFilter is in the DB, this is a primary key id - else it is None. This is set by db_insert() and db_select().
22  # @negate (boolean=False) when set to True, this Filter's effects are inverted (it includes segments it normally excludes, and excludes those it normally includes). When False, the Filter works normally.
23  def __init__(self, db_id=None, negate=False):
24  self.db_id = db_id
25  self.negate = negate
26 
27  ## See superclass description.
28  def db_insert(self, db):
29  last_ids = db.insert('seg_filters',
30  'class_name args'.split(),
31  [[self.__class__.__name__, str(self.get_db_args())]]
32  )
33  self.db_id = last_ids[0]
34 
35  ## See superclass description.
36  def db_delete(self, db):
37  db.delete('seg_filters',
38  'id=?',
39  [self.db_id],
40  )
41 
42  ## Constructs a list of SegFilter objects from data retreived from the DB.
43  # @param filter_rows (list) this should be a list of the form [ [id, class_name, args], ... ].
44  # @returns (list) list of SegFilter objects (of the appropriate subclass type).
45  @staticmethod
46  def _reconstruct_from_db(filter_rows):
47  filter_list = []
48  for cur_row in filter_rows:
49  cls = eval(cur_row[1])
50  args = eval(cur_row[2])
51  filter_obj = cls(*args)
52  filter_obj.db_id = cur_row[0]
53  filter_list.append(filter_obj)
54 
55  return filter_list
56 
57  ## See superclass description.
58  @staticmethod
59  def db_select(db, ids=[]):
60  rows = db.select('seg_filters',
61  'id class_name args',
62  DBObject._build_where_cond_from_ids(ids),
63  )
64 
65  return SegFilter._reconstruct_from_db(rows)
66 
67  ## Selects SegFilter objects from the DB via a relationship table.
68  # @param db (BLLDatabase) a database handle object
69  # @param rel_table (string) name of the relationship table
70  # @param filter_id_col (string) name of the column in rel_table that contains filter ids (a foreign key column corresponding to the seg_filters table primary key column).
71  # @param rel_id_col (string) name of the column in rel_table that contains the alternative (non-filter) ids you wish to look up
72  # @param rel_val (int) value of rel_id_col to search for in rel_table. Corresponding values from filter_id_col will be recorded. A select will be done on seg_filters using these ids. Finally, SegFilter objects corresponding to the selected rows will be created and returned.
73  # @returns (list) list of SegFilter objects (or appropriate subclasses)
74  @staticmethod
75  def db_select_by_ref(db, rel_table, filter_id_col, rel_id_col, rel_val):
76  rows = db.select('%s rel join seg_filters sf on rel.%s=sf.id' % (rel_table, filter_id_col),
77  'sf.id sf.class_name sf.args'.split(),
78  '%s=?' % (rel_id_col),
79  [rel_val],
80  )
81 
82  return SegFilter._reconstruct_from_db(rows)
83 
84  ## Handles the filtering of unlinked segments. Subclasses should not override this method - they should override the private method _filter_seg(), which this method calls.
85  # @param seg (Segment/FIlteredSeg) the segment-like object to filter
86  # @param filter_utters (boolean=False) If True, a FilteredSeg is returned, containing only those Utterances that pass the filter. If False, if even one Utterance fails the filter, None will be returned (otherwise a FilteredSeg containing the same Utterances as the parameter seg will be returned).
87  # @returns a FilteredSeg object whose utterances have been filtered according to the filter_utters and negate parameter settings, or None in the case noted in the filter_utters description.
88  def filter_seg(self, seg, filter_utters=False):
89  return self._filter_seg(seg, self.negate, filter_utters)
90 
91  ## Handles the filtering of linked segments. Subclasses should not override this method - they should override the private method _filter_linked_utter(), which this method calls.
92  # Note: This method does not have a 'filter_utters' param (link filter_seg) because each chain is essentially treated like 'a single long utterance'. Therefore, if one utterance in the chain fails the filter, the whole chain fails.
93  # @param self
94  # @param head (Utterance) start of the chain
95  # @returns (Utterance) if the chain passed the filter, returns the start of the chain. Else returns None.
96  def filter_linked_utter(self, head):
97  return self._filter_linked_utter(head, self.negate)
98 
99  ## See superclass description. Subclasses should not override this method - they should override the private method _get_db_args(), which this method calls.
100  def get_db_args(self):
101  return self._get_db_args(self.negate)
102 
103  ## See superclass description. Subclasses should not override this method - they should override the private method _get_filter_type_str(), which this method calls.
105  return self._get_filter_type_str(self.negate)
106 
107  ## See superclass description. Subclasses should not override this method - they should override the private method _get_filter_desc_str(), which this method calls.
109  return self._get_filter_desc_str(self.negate)
110 
111  ## See filter_seg(). Must be overridden by subclasses.
112  # @param seg (Segment/FilteredSeg) the segment-like object to filter
113  # @param negate (boolean=False) if True, the filter should invert its functionality (include what it normally excludes, and exclude what it normally includes). If False, the filter should act normally.
114  # @param filter_utters (boolean=False) see filter_seg() description of this parameter.
115  # @returns a FilteredSeg object whose utterances have been filtered according to the filter_utters and negate parameter settings, or None in the case noted in the filter_utters description.
116  def _filter_seg(self, seg, negate, filter_utters=False):
117  pass
118 
119  ## See filter_linked_utter(). Must be overridden by subclasses.
120  # @param head (Utterance) the start node of the chain to be filtered
121  # @param negate (boolean=False) if True, the filter should invert its functionality (include what it normally excludes, and exclude what it normally includes). If False, the filter should act normally.
122  # @returns (Utterance) if the chain passed the filter, returns the start of the chain. Else returns None.
123  def _filter_linked_utter(self, head, negate):
124  pass
125 
126  ## See get_db_args(). Subclass should override.
127  # @param self
128  # @param negate (boolean) True indicates the filter should invert its functionality. False indicates that it should act normally.
129  def _get_db_args(self, negate):
130  return [negate]
131 
132  ## See get_filter_type_str(). Subclass should override.
133  # @param self
134  # @param negate (boolean) True indicates the filter should invert its functionality. False indicates that it should act normally.
135  def _get_filter_type_str(self, negate):
136  pass
137 
138  ## See get_filter_desc_str(). Subclass should override.
139  # @param self
140  # @param negate (boolean) True indicates the filter should invert its functionality. False indicates that it should act normally.
141  def _get_filter_desc_str(self, negate):
142  pass
143 
144 
145  ## Convenience method to wrap Segments in FilteredSeg objects and set the filtered utters list. Subclasses need not override this.
146  # If 'seg' is a Segment, creates a FilteredSeg (with an Utterance list of 'utters') from the 'seg' parameter. If 'seg' is a FilteredSeg, sets the utterance list to 'utters'.
147  # @param self
148  # @param seg (Segment/FilteredSeg) the object you wish to wrap/modify
149  # @param utters (list) list of Utterance objects to assign to the resulting FilteredSeg (this is generally a filtered list)
150  def _to_filtered_seg(self, seg, utters):
151  result = seg
152  if isinstance(seg, FilteredSeg):
153  seg.utters = utters
154  else:
155  result = FilteredSeg(seg, utters)
156 
157  return result
158 
159 ## This Filter allows utterances to pass through if they (contain at least one 'wh' word (who, what, when, where, why, how) AND are marked as a question in transcriber code 3).
161  ## Constructor
162  # @self
163  # @negate (boolean=False) If True, the filter will invert its behaviour. If False, the filter acts normally.
164  # @param db_id (int=None) this is the primary key id from the database table seg_filters. A value of None indicates that this filter is not yet in the DB.
165  def __init__(self, negate=False, db_id=None):
166  SegFilter.__init__(self, db_id, negate)
167 
168  ## See superclass description.
169  def _filter_seg(self, seg, negate, filter_utters=False):
170  utters = []
171  result = None
172  passed = False
173 
174  i = 0
175  while i < len(seg.utters) and not passed:
176  if seg.utters[i].trans_codes and len(seg.utters[i].trans_codes) >= 4:
177  #check if the utterance was transcribed as a question, and contains a wh word
178  cond = ( seg.utters[i].trans_codes[3].find('Q') >= 0 and
179  re.search(r'\bwh|\bhow\b', seg.utters[i].trans_phrase.lower()) )
180 
181  #if we're straining out utters that don't pass, append those utterances to a list
182  if filter_utters:
183  if (not negate and cond) or (negate and not cond):
184  utters.append(seg.utters[i])
185  #otherwise, throw out the whole segment if one utterance doesn't pass
186  else:
187  passed = cond
188  i += 1
189 
190  #ensure the Segment is wrapped in a FilteredSeg containing the filtered utterance list
191  if filter_utters:
192  result = super(WHQFilter, self)._to_filtered_seg(seg, utters)
193 
194  else:
195  if passed:
196  result = super(WHQFilter, self)._to_filtered_seg(seg, seg.utters)
197 
198  return result
199 
200  ## See superclass description.
201  def _filter_linked_utter(self, head, negate):
202  result = None
203  found = False
204  cur = head
205 
206  #run through the chain nodes until we're done or one fails
207  while cur and not found:
208  if cur.trans_codes:
209  #check if the utterance was transcribed as a question, and contains a wh word
210  found = ( cur.trans_codes[3].find('Q') >= 0 and
211  re.search(r'\bwh|\bhow\b', cur.trans_phrase.lower()) )
212 
213  cur = cur.next
214 
215  #if one node in the chain failed, throw out the whole chain. Otherwise, return the head node.
216  if (not negate and found) or (negate and not found):
217  result = head
218 
219  return result
220 
221  ## See superclass description.
222  def _get_db_args(self, negate):
223  return [negate]
224 
225  ## See superclass description.
226  def _get_filter_type_str(self, negate):
227  return '"WH" Question'
228 
229  ## See superclass description.
230  def get_filter_desc_str(self, negate):
231  return 'Is %sa "WH" question' % (' not' if self.negate else '')
232 
233 ## This filter strains out utterances that start or end after a specific time.
235  #time cutoff is in seconds
236  #filter_type is
237  ## Constructor
238  # @param self
239  # @param filter_type (int) an element from DBConstants.COMBO_OPTIONS[DBConstants.COMBO_GROUPS.SEG_FILTER_TIME], indicating the behaviour of this filter - eg. whether it should cut of segments that start before, start after, end before, or end after 'time cutoff'.
240  # @param time_cutoff (float) the point time, specified in seconds, for which this filter should apply the behaviour specified by 'filter_type'.
241  # @param inclusive (boolean=True) if True, the filter will allow utterances whose start/end time (depending upon the behaviour specified by 'filter_type') is equal to 'time_cutoff' to pass through the filter. If False, it will strain these out.
242  # @negate (boolean=False) If True, the filter will invert its behaviour. If False, the filter acts normally.
243  # @param db_id (int=None) this is the primary key id from the database table seg_filters. A value of None indicates that this filter is not yet in the DB.
244  def __init__(self, filter_type, time_cutoff, inclusive=True, negate=False, db_id=None):
245  SegFilter.__init__(self, db_id, negate)
246 
247  self.filter_type = filter_type
248  self.time_cutoff = time_cutoff
249  self.inclusive = inclusive
250 
251  ## See superclass description.
252  def _filter_seg(self, seg, negate, filter_utters=False):
253  result = None
254 
255  #read the filter type options into a local variable to cut down on the amount ot typing
256  options = DBConstants.COMBO_OPTIONS[DBConstants.COMBO_GROUPS.SEG_FILTER_TIME]
257 
258  #build a dictionary that maps the possible filter_type options to functions. Each function accepts an utterance/segment and returns a boolean that's true if the utterance/segment passed the filter.
259  include_dict = {options.START_TIME_BEFORE: lambda data_obj: (data_obj.start <= self.time_cutoff) if self.inclusive else (data_obj.start < self.time_cutoff),
260  options.START_TIME_AFTER: lambda data_obj: (data_obj.start >= self.time_cutoff) if self.inclusive else (data_obj.start > self.time_cutoff),
261  options.END_TIME_BEFORE: lambda data_obj: (data_obj.end <= self.time_cutoff) if self.inclusive else (data_obj.end < self.time_cutoff),
262  options.END_TIME_AFTER: lambda data_obj: (data_obj.end >= self.time_cutoff) if self.inclusive else (data_obj.end > self.time_cutoff),
263  }
264 
265  #if we are supposed to strain out utterances that don't pass, build a list of them
266  if filter_utters:
267  utter_list = []
268  for utter in seg.utters:
269  include_utter = include_dict[self.filter_type](utter)
270  if (not negate and include_utter) or (negate and not include_utter):
271  utter_list.append(utter)
272 
273  #warp result in a FilteredSeg
274  result = super(TimeSegFilter, self)._to_filtered_seg(seg, utter_list)
275 
276  #otherwise, one failed utterance causes us to discard the entire segment
277  else:
278  include_seg = include_dict[self.filter_type](seg)
279 
280  if (not negate and include_seg) or (negate and not include_seg):
281  #wrap result in a FilteredSeg
282  result = super(TimeSegFilter, self)._to_filtered_seg(seg, seg.utters)
283 
284  return result
285 
286  ## See superclass description.
287  def _filter_linked_utter(self, head, negate):
288  result = None
289  #grab the possible filter_type options into a local var, for convenience
290  options = DBConstants.COMBO_OPTIONS[DBConstants.COMBO_GROUPS.SEG_FILTER_TIME]
291 
292  #find chain endpoint
293  tail = FilterManager.get_endpoint(FilterManager.ENDPOINT_TYPES.TAIL, head)
294 
295  #build a dictionary that maps the possible filter_type options to functions. Each function accepts an utterance/segment and returns a boolean that's true if the utterance/segment passed the filter.
296  include = {options.START_TIME_BEFORE: lambda head, tail: head.start <= self.time_cutoff if self.inclusive else head.start < self.time_cutoff,
297  options.START_TIME_AFTER: lambda head, tail: head.start >= self.time_cutoff if self.inclusive else head.start > self.time_cutoff,
298  options.END_TIME_BEFORE: lambda head, tail: tail.end <= self.time_cutoff if self.inclusive else tail.end < self.time_cutoff,
299  options.END_TIME_AFTER: lambda head, tail: tail.end >= self.time_cutoff if self.inclusive else tail.end > self.time_cutoff,
300  }[self.filter_type](head, tail)
301 
302  #return the head if no node in the chain failed
303  if (not negate and include) or (negate and not include):
304  result = head
305 
306  return result
307 
308  ## See superclass description.
309  def _get_db_args(self, negate):
310  return [self.filter_type,
311  self.time_cutoff,
312  self.inclusive,
313  negate,
314  ]
315 
316  ## See superclass description.
317  def _get_filter_type_str(self, negate):
318  return 'Time'
319 
320  ## See superclass description.
321  def _get_filter_desc_str(self, negate):
322  options = DBConstants.COMBO_OPTIONS[DBConstants.COMBO_GROUPS.SEG_FILTER_TIME]
323  desc = ''
324  #handle the negation
325  if self.negate:
326  desc += 'Does not '
327 
328  #append an appropriate description of the behaviour based on 'filter_type'. Also handle the pluralization of words.
329  desc += {options.START_TIME_BEFORE: 'start%s before' % ('s' if not negate else ''),
330  options.START_TIME_AFTER: 'start%s after' % ('s' if not negate else ''),
331  options.END_TIME_BEFORE: 'end%s before' % ('s' if not negate else ''),
332  options.END_TIME_AFTER: 'end%s after' % ('s' if not negate else ''),
333  }[self.filter_type]
334  desc += ' '
335  desc += BackendUtils.get_time_str(self.time_cutoff)
336  desc = desc.capitalize() #appease any English teachers in the audience
337 
338  return desc
339 
340 ## This type of filter searches for Utterances of types OLN, OLF, and/or transcribed with '<>', filtering out all others.
342  ## Constructor
343  # @param self
344  # @negate (boolean=False) If True, the filter will invert its behaviour. If False, the filter acts normally.
345  # @param db_id (int=None) this is the primary key id from the database table seg_filters. A value of None indicates that this filter is not yet in the DB.
346  def __init__(self, negate=False, db_id=None):
347  SegFilter.__init__(self, db_id, negate)
348 
349  ## See superclass description.
350  def _filter_seg(self, seg, negate, filter_utters=False):
351  result = None
352 
353  if filter_utters:
354  utter_list = []
355  for utter in seg.utters:
356  if utter.trans_phrase:
357  trans_phrase = utter.trans_phrase.replace('&lt;', '<').replace('&gt;', '>') #undo the xml encoding for the angle brackets (for convenience of regex search below, as regexs have special meaning for ampersand)
358  cond = ( (utter.speaker and utter.speaker.has_property(DBConstants.SPEAKER_PROPS.OVERLAPPING)) or
359  (re.search('(?:^\s*<.*>.*$)|(?:.*<\s*>\s*)', trans_phrase)) )
360  if (not negate and cond) or (negate and not cond):
361  utter_list.append(utter)
362 
363  result = super(OverlappingVocalsSegFilter, self)._to_filtered_seg(seg, utter_list)
364 
365  else:
366  i = 0
367  found = False
368  while not found and i < len(seg.utters):
369  if utter.trans_phrase:
370  trans_phrase = utter.trans_phrase.replace('&lt;', '<').replace('&gt;', '>') #undo xml encoding for angle brackets
371  found = ( (utter.speaker and utter.speaker.has_property(DBConstants.SPEAKER_PROPS.OVERLAPPING)) or
372  (re.search('(?:^\s*<.*>.*$)|(?:.*<\s*>\s*)', trans_phrase)) )
373  i += 1
374 
375  if (not negate and found) or (negate and not found):
376  result = head
377 
378  return result
379 
380  ## See superclass description.
381  def _filter_linked_utter(self, head, negate):
382  result = None
383 
384  found = False
385  cur = head
386  while not found and cur:
387  if cur.trans_phrase:
388  trans_phrase = cur.trans_phrase.replace('&lt;', '<').replace('&gt;', '>')
389  found = ( (cur.speaker and cur.speaker.has_property(DBConstants.SPEAKER_PROPS.OVERLAPPING)) or
390  (trans_phrase and re.search('(?:^\s*<.*>.*$)|(?:.*<\s*>\s*)', trans_phrase)) )
391  cur = cur.next
392 
393  if (not negate and found) or (negate and not found):
394  result = head
395 
396  return result
397 
398  ## See superclass description.
399  def _get_db_args(self, negate):
400  return [negate]
401 
402  ## See superclass description.
403  def _get_filter_type_str(self, negate):
404  return 'Overlapping Vocals'
405 
406  ## See superclass description.
407  def _get_filter_desc_str(self, negate):
408  return 'Not marked by LENA as overlapping and not transcribed as overlapping' if negate else 'Marked by LENA and/or transcribed as overlapping'
409 
410 ## This type of Filter can be used to include only segments/chains with particular LENA-defined speaker codes (eg. FAN, MAN, CHN).
412  ## Constructor
413  # @param self
414  # @param code_str_list (list) list of strings, each matching a particular LENA speaker code (eg. 'MAN', 'FAN', etc). The filter will OR together multiple codes.
415  # @negate (boolean=False) If True, the filter will invert its behaviour. If False, the filter acts normally.
416  # @param db_id (int=None) this is the primary key id from the database table seg_filters. A value of None indicates that this filter is not yet in the DB.
417  def __init__(self, code_str_list, negate=False, db_id=None):
418  SegFilter.__init__(self, db_id, negate)
419 
420  self.code_str_list = code_str_list
421 
422  ## See superclass description.
423  # An OR operation is performed using all codes in code_str_list. Eg. if code_str_list == ['FAN', 'MAN'], then only utterances with speaker codes FAN or MAN will pass through the filter.
424  def _filter_seg(self, seg, negate, filter_utters=False):
425  result = None
426 
427  if filter_utters:
428  utter_list = []
429  for utter in seg.utters:
430  i = 0
431  utter_has_speaker = False
432  while not utter_has_speaker and i < len(self.code_str_list):
433  utter_has_speaker = utter.speaker and utter.speaker.speaker_codeinfo.get_code() == self.code_str_list[i]
434  i += 1
435 
436  if negate:
437  if not utter_has_speaker:
438  utter_list.append(utter)
439  else:
440  if utter_has_speaker:
441  utter_list.append(utter)
442 
443  result = super(SpeakerCodeSegFilter, self)._to_filtered_seg(seg, utter_list)
444 
445  else:
446  speaker_in_list = False
447  i = 0
448  j = 0
449  while not speaker_in_list and i < len(seg.speakers):
450  while not speaker_in_list and j < len(self.code_str_list):
451  speaker_in_list = seg.speakers[i].speaker_codeinfo.get_code() == self.code_str_list[j]
452  j += 1
453  i += 1
454 
455  if negate:
456  if not speaker_in_list:
457  result = super(SpeakerCodeSegFilter, self)._to_filtered_seg(seg, seg.utters)
458  else:
459  if speaker_in_list:
460  result = super(SpeakerCodeSegFilter, self)._to_filtered_seg(seg, seg.utters)
461 
462  return result
463 
464  ## See superclass description.
465  # Note the OR behaviour if there are multiple codes in self.code_str_list
466  def _filter_linked_utter(self, head, negate):
467  result = None
468  speaker_in_chain = False
469  cur = head
470  while not speaker_in_chain and cur:
471  i = 0
472  while not speaker_in_chain and i < len(self.code_str_list):
473  speaker_in_chain = cur.speaker and cur.speaker.speaker_codeinfo.get_code() == self.code_str_list[i]
474  i += 1
475  cur = cur.next
476 
477  include_chain = False
478  if negate:
479  include_chain = not speaker_in_chain
480  else:
481  include_chain = speaker_in_chain
482 
483  if include_chain:
484  result = head
485 
486  return result
487 
488  ## See superclass description.
489  def _get_db_args(self, negate):
490  return [self.code_str_list, negate]
491 
492  ## See superclass description.
493  def _get_filter_type_str(self, negate):
494  return 'Speaker'
495 
496  ## See superclass description.
497  def _get_filter_desc_str(self, negate):
498  desc = 'Speaker code is %s' % ('not ' if self.negate else '')
499  if len(self.code_str_list) > 1:
500  desc += 'one of (%s)' % (' or '.join(self.code_str_list))
501  else:
502  desc += self.code_str_list[0]
503 
504  return desc
505 
506 ## This is an 'abstract' base class for all types of filters that include/exclude based on a particular transcriber code.
508  ## Constructor
509  # @param self
510  # @param trans_code_index (int) the index (zero-based) of the transcriber code this filter will operate upon.
511  # @param type_str (string) a string to use for the output of _get_filter_type_str()
512  # @param desc_noun (string) used in get_filter_desc_str() to describe the object that this filter is filtering.
513  # @param code_str_list (list) list of codes (that are valid for the transcriber code specified in trans_code_index) to allow through the filter (OR logic is used for multiple codes).
514  # @negate (boolean=False) If True, the filter will invert its behaviour. If False, the filter acts normally.
515  # @param db_id (int=None) this is the primary key id from the database table seg_filters. A value of None indicates that this filter is not yet in the DB.
516  def __init__(self, trans_code_index, type_str, desc_noun, code_str_list, negate=False, db_id=None):
517  super(TransCodeSegFilter, self).__init__(db_id, negate)
518 
519  self.trans_code_index = trans_code_index
520  self.type_str = type_str
521  self.desc_noun = desc_noun
522  self.code_str_list = code_str_list
523 
524  ## See superclass description.
525  # Note: A logical OR operation is performed using all codes in code_str_list. That is, utterances pass if they match at least one of the codes.
526  def _filter_seg(self, seg, negate, filter_utters=False):
527  result = None
528 
529  if filter_utters:
530  utter_list= []
531  for utter in seg.utters:
532  i = 0
533  found = False
534  #search for one of the codes in self.code_str_list
535  while not found and i < len(self.code_str_list):
536  found = utter.trans_codes and len(utter.trans_codes) > self.trans_code_index and utter.trans_codes[self.trans_code_index].find(self.code_str_list[i]) > -1
537  i += 1
538 
539  if negate:
540  if not found:
541  utter_list.append(utter)
542  else:
543  if found:
544  utter_list.append(utter)
545 
546  result = super(TransCodeSegFilter, self)._to_filtered_seg(seg, utter_list)
547 
548  else:
549  found = False
550  i = 0
551  #for chains we only require one node to have a match (then the whole thing is considered a match)
552  while not found and i < len(seg.utters):
553  j = 0
554  while not found and j < len(self.code_str_list):
555  found = seg.utters[i].trans_codes and seg.utters[i].trans_codes[self.trans_code_index].find(self.code_str_list[j]) > -1
556  j += 1
557  i += 1
558 
559  if negate:
560  if not_found:
561  result = super(TransCodeSegFilter, self)._to_filtered_seg(seg, seg.utters)
562  else:
563  if found:
564  result = super(TransCodeSegFilter, self)._to_filtered_seg(seg, seg.utters)
565 
566  return result
567 
568  ## See superclass description.
569  # Note: A logical OR operation is performed using all codes in code_str_list. That is, utterances pass if they match at least one of the codes.
570  def _filter_linked_utter(self, head, negate):
571  result = None
572  code_in_chain = False
573  cur = head
574  while not code_in_chain and cur:
575  i = 0
576  while not code_in_chain and i < len(self.code_str_list):
577  if self.trans_code_index < len(cur.trans_codes): #make sure we have the correct number of transcriber codes (so we don't index out of bounds)
578  code_in_chain = cur.trans_codes and cur.trans_codes[self.trans_code_index].find(self.code_str_list[i]) > -1
579  i += 1
580 
581  cur = cur.next
582 
583  if negate:
584  if not code_in_chain:
585  result = head
586  else:
587  if code_in_chain:
588  result = head
589 
590  return result
591 
592  ## See superclass description.
593  def _get_db_args(self, negate):
594  return [self.trans_code_index, self.type_str, self.desc_noun, self.code_str_list, negate]
595 
596  ## See superclass description.
597  def _get_filter_type_str(self, negate):
598  return self.type_str
599 
600  ## See superclass description.
601  def _get_filter_desc_str(self, negate):
602  desc = '%s is %s' % (self.desc_noun, 'not ' if self.negate else '')
603  if len(self.code_str_list) > 1:
604  desc += 'one of (%s)' % (' or '.join( map(lambda cd: DBConstants.TRANS_CODES[self.trans_code_index].get_option(cd).get_code_desc(), self.code_str_list) ))
605  else:
606  desc += DBConstants.TRANS_CODES[self.trans_code_index].get_option(self.code_str_list[0]).get_code_desc()
607 
608  return desc
609 
610 ## This class filters by transcriber code 1 (speaker type). Utterances with a particular speaker type will pass through the filter.
612  ## See superclass description.
613  def __init__(self, code_str_list, negate=False, db_id=None):
614  super(SpeakerTypeSegFilter, self).__init__(0, 'Speaker Type', 'Speaker Type', code_str_list, negate, db_id)
615 
616  ## See superclass description.
617  def _get_db_args(self, negate):
618  return [self.code_str_list, negate]
619 
620 ## This class filters by transcriber code 2 (target listener). Utterances with a particular target listener will pass through the filter.
622  ## See superclass description.
623  def __init__(self, code_str_list, negate=False, db_id=None):
624  super(TargetListenerSegFilter, self).__init__(1, 'Target Listener', 'Target Listener', code_str_list, negate, db_id)
625 
626  ## See superclass description.
627  def _get_db_args(self, negate):
628  return [self.code_str_list, negate]
629 
630 ## This class filters by transcriber code 3 (grammaticality/completeness). Utterances with a particular grammaticality/completeness will pass through the filter.
632  ## See superclass description.
633  def __init__(self, code_str_list, negate=False, db_id=None):
634  super(GrammaticalitySegFilter, self).__init__(2, 'Gramaticality', 'Utterance', code_str_list, negate, db_id)
635 
636  ## See superclass description.
637  def _get_db_args(self, negate):
638  return [self.code_str_list, negate]
639 
640 ## This class filters by transcriber code 4 (utterance type). Utterances with a particular utterance type will pass through the filter.
642  ## See superclass description.
643  def __init__(self, code_str_list, negate=False, db_id=None):
644  super(UtteranceTypeSegFilter, self).__init__(3, 'Utterance Type', 'Utterance', code_str_list, negate, db_id)
645 
646  ## See superclass description.
647  def _get_db_args(self, negate):
648  return [self.code_str_list, negate]
649