Baby Language Lab Scripts
A collection of data processing tools.
 All Classes Namespaces Files Functions Variables Pages
segment.py
Go to the documentation of this file.
1 ## @package data_structs.segment
2 
3 from data_structs.base_objects import DBObject
4 from db.bll_database import DBConstants
5 from data_structs.speaker import Speaker
6 
7 ##This class holds a chunk of information from the TRS file.
8 # This corresponds to the information between a <Turn></Turn> pair of tags in a TRS file. Each <sync> or <who> group within the turn tags is an Utterance
9 # (caveat: if one <sync> or <who> tag has multiple lines, each line is a separate Utterance).
10 # Each Segment has a list of Utterances it contains.
11 # Utterances correspond to "bullet points" in transcriber.
13  ## Constructor
14  # @param self
15  # @param num (int) a unique number that can be used to identify this segment (assigned by the TRS Parser)
16  # @param start (float) start time (as recorded in the <Turn> tag)
17  # @param end (float) end time (as recorded in the <Turn> tag)
18  # @param speakers (list=[]) list of Speaker objects.
19  # @param utters (list=[]) list of Utterance objects that this Segment contains
20  # @param db_id (int=None) database table primary key (segments table). Is None if this Segment is not in the DB.
21  def __init__(self, num, start, end, speakers=[], utters=[], db_id=None, user_adj_start=None, user_adj_end=None):
22  super(DBObject, self).__init__()
23 
24  self.num = num
25  self.speakers = speakers
26  self.utters = utters
27  self.start = start
28  self.end = end
29  self.db_id = db_id
30  self.user_adj_start = user_adj_start
31  self.user_adj_end = user_adj_end
32 
33  ## Iterates through this segment's list of speakers until a specified condition is reached.
34  # @param self
35  # @param cond_fcn (function) this function should accept a Speaker object, and return True (a value that Python considers True) when the iteration should stop. Otherwise it should return False (or None, 0, etc.)
36  # @returns (anything) returns whatever the cond_fcn returns, provided it evaluates to something Python considers True
37  def _iterate_speakers_until(self, cond_fcn):
38  result = False
39 
40  while not result and i < len(self.speakers):
41  result = cond_fcn(self.speakers[i])
42  i += 1
43 
44  return result
45 
46  ## Checks if this segment contains silence (i.e. if it contains the 'SIL' LENA speaker code)
47  # @param self
48  # @returns (boolean) True if one or more speakers is silence, False otherwise
49  def has_silence(self):
50  return self._iterate_speakers_until(
51  lambda speaker: speaker.is_type(DBConstants.SPEAKER_TYPES.SILENCE))
52 
53  ## Checks if this segment contains a speaker that is considered 'far/distant' (eg. FAF, MAF)
54  # @param self
55  # @returns (boolean) True if one or more speakers is considered 'far , False otherwise.
56  def has_distant(self):
57  return self._iterate_speakers_until(
58  lambda speaker: speaker.is_distance(DBConstants.SPEAKER_DISTANCES.FAR))
59 
60  ## Checks if this segment contains a speaker that is considered 'overlapping speech' (eg. OLN, OLF)
61  # @param self
62  # @returns (boolean) True if one or more speakers is considered to be overlapping speech.
64  return self._iterate_speakers_until(
65  speaker.has_property(DBConstants.SPEAKER_PROPS.OVERLAPPING))
66 
67  ## Checks if this segment contains a speaker that is considered to be media. (i.e. TVF, TVN)
68  # @param self
69  # @returns (boolean) True if one or more speakers are media, False otherwise
70  def has_media(self):
71  return self._iterate_speakers_until(
72  lambda speaker: speaker.has_property(DBConstants.SPEAKER_PROPS.MEDIA))
73 
74  ## Checks if this segment contains a speaker that represents nonverbal noise (i.e. NON, NOF)
75  # @param self
76  # @returns (boolean) True if one or more speakers are nonverbal noise, False otherwise
78  return self._iterate_speakers_until(
79  lambda speaker: speaker.has_property(DBConstants.SPEAKER_PROPS.NON_VERBAL_NOISE))
80 
81  ## See superclass description.
82  def db_insert(self, db):
83  super(Segment, self).db_insert(db)
84 
85  last_ids = db.insert('segments',
86  'start_time end_time user_adj_start user_adj_end'.split(),
87  [[self.start, self.end, self.user_adj_start, self.user_adj_end]],
88  )
89 
90  self.db_id = last_ids[0]
91 
92  #insert into this relationship table to record the mapping of segments to speaker codes
93  #note: this will potentially insert multiple rows in a single call
94  db.insert('segs_to_speaker_codes',
95  'seg_id speaker_code_id'.split(),
96  map(lambda person: [self.db_id, person.speaker_codeinfo.db_id], self.speakers),
97  )
98 
99  ## See superclass description.
100  def db_delete(self, db):
101  super(Segment, self).db_insert(db)
102  if self.db_id != None:
103  #note: db foreign key cascade property will cause this to automatically drop corresponding segs_to_speaker_codes
104  db.delete('segments',
105  'id=?',
106  [self.db_id],
107  )
108 
109  ## See superclass description.
110  # Note: this method does not populate the 'num', or 'utters' attributes of segments. In addition, the segment's Speakers do not have their speaker_id attribute populated. The segments DB table does not bother to store this info, as currently, Utterance objects are only ever used after parsing a TRS file. This is probably not a very good reason - but if utterances or speaker objects ever need to be stored in the future, it's not difficult to add extra tables and link them to segments via foreign keys.
111  @staticmethod
112  def db_select(db, ids=[]):
113  DBObject.db_select(db, ids)
114 
115  rows = db.select('segments',
116  'id start_time end_time user_adj_start user_adj_end'.split(),
117  DBObject._build_where_cond_from_ids(ids),
118  )
119 
120  seg_list = []
121  for cur_row in rows:
122  speaker_rows = db.select('segs_to_speaker_codes rel join speaker_codes sc on rel.speaker_code_id=sc.id',
123  ['sc.code'],
124  'rel.seg_id=?',
125  [cur_row[0]],
126  )
127 
128  #create the segment's speaker objects
129  speaker_list = []
130  for cur_speaker in speaker_rows:
131  codeinfo = DBConstants.SPEAKER_CODES.get_option(cur_speaker[0])
132  speaker = Speaker(None, codeinfo)
133  speaker_list.append(speaker)
134 
135  seg = Segment(None, float(cur_row[1]), float(cur_row[2]), speaker_list, [], cur_row[0], cur_row[3], cur_row[4])
136  seg_list.append(seg)
137 
138  return seg_list