Baby Language Lab Scripts
A collection of data processing tools.
 All Classes Namespaces Files Functions Variables Pages
output.py
Go to the documentation of this file.
1 ## @package data_structs.output
2 
3 from data_structs.base_objects import DBObject
4 from data_structs.output_calcs import *
5 from data_structs.seg_filters import SegFilter
6 
7 ## An Output represents particular statistic that the user is trying to calculate from a TRS file. It is used in the Statistics Application.
8 # Each Output corresponds to a single Configuration object. Configurations can have multiple Outputs.
9 # The exported spreadsheet constains a single section for each Output in the selected Configuration.
10 # Calling code should instantiate this object, then use it's add_item() method to pass in segments/chains that it want's included in this output's calculations.
12  ## Constructor
13  # @param self
14  # @param name (string) the user-specified name for this output
15  # @param desc (string) the user-specified description for this output
16  # @param filters (list) list of SegFilter objects to apply to data that is inserted into this Output
17  # @param output_calc (OutputCalc) an object used to perform any arithmetic/processing needed to generate this output's section in the spreadsheet file
18  # @param chained (boolean) True if we're using linked segments, False if we're using unlinked segments
19  # @param db_id (int=None) database primary key value for this object - code should not set, it's set via db_insert() and db_select(). A value of None indicates that this Output is not in the database.
20  def __init__(self, name, desc, filters, output_calc, chained, db_id=None):
21  self.name = name
22  self.desc = desc
23  self.filters = filters or []
24  self.output_calc = output_calc
25  self.chained = chained
26  self.db_id = db_id
27 
28  #these are used to hold all of the data that's inserted into this output
29  self.segs = []
30  self.chains = []
31 
32  ## Clears any cached utterances in preparation for a new run.
33  # @param self
34  def reset(self):
35  self.segs = []
36  self.chains = []
37  #the calc object needs to be notified so it can restart it's processing
38  self.output_calc.reset()
39 
40  ## Accepts a segment/chain, filters it, and (if it passes through the filters) factors it into this output's calculations.
41  # @param self
42  # @param item (Segment / Utterance) the data object to add to this output. If chained is set to True, this should be an Utterance object (potentially linked to other Utterance objects via its 'next'/'prev' pointers). If chained is False, this should be a Segment object.
43  # @param filter_utters (boolean=False) If True, and chained == False, then any utterances that don't pass the filter will be stripped out of the segments passed in (but the segment itself will still be included if it has other utterances. If False, and chained == False, then it the segment that's passed in has any utterance that fails to pass the filter, the whole segment will be excluded. If chained == True, the setting of this parameter has no effect (in all cases, if one node in the chain fails to pass the filter, the whole chain is excluded - conceptually, chains are treated as a single, long Utterance).
44  def add_item(self, item, filter_utters=False):
45  if self.chained:
46  self._add_chain(item)
47 
48  else:
49  self._add_seg(item, filter_utters)
50 
51 
52  ## Adds an unlinked segment to this Output, if it passes through the filters.
53  # @param self
54  # @param seg (Segment) the Segment object to add
55  # @param filter_utters (boolean=False) see description for add_item()
56  def _add_seg(self, seg, filter_utters=False):
57  #run the segment through the filters - the tasks associated with filter_utters are deferred to the seg_filter objects
58  i = 0
59  filtered_seg = seg
60  while filtered_seg and i < len(self.filters):
61  filtered_seg = self.filters[i].filter_seg(filtered_seg, filter_utters)
62  i += 1
63 
64  #if it made it through the filters, append the seg to this object's internal list of unlinked segments, and factor it into the output calculations
65  if filtered_seg:
66  self.segs.append(filtered_seg)
67  self.output_calc.add_seg(filtered_seg)
68 
69  ## Adds chain (an Utterance) to this Output, if it passes through the filters.
70  # @param self
71  # @param head (Utterance) the Utterance (head of the chain) to add
72  def _add_chain(self, head):
73  #run the chain through the filters
74  i = 0
75  filtered_head = head
76  while filtered_head and i < len(self.filters):
77  filtered_head = self.filters[i].filter_linked_utter(filtered_head)
78  i += 1
79 
80  #it if made it through the filters, append the chain to this object's internal list of chains, and factor it into the output calculations
81  if filtered_head:
82  self.chains.append(filtered_head)
83  self.output_calc.add_chain(filtered_head)
84 
85  ## Grabs a list of all items that have been added to this Output that have passed through the filters.
86  # @param self
87  # @returns (list) list of Utterances, if chained. Otherwise list of Segments.
88  def get_filtered_items(self):
89  return (self.chains if self.chained else self.segs)
90 
91  ## See superclass description
92  def db_insert(self, db):
93  super(Output, self).db_insert(db)
94 
95  #insert this object into the outputs DB table, retreiving the PK id value.
96  last_ids = db.insert('outputs',
97  'name desc calc_class_name calc_args chained'.split(),
98  [[self.name,
99  self.desc,
100  self.output_calc.__class__.__name__,
101  str(self.output_calc.get_db_args()),
102  self.chained,
103  ]])
104  self.db_id = last_ids[0]
105 
106  #insert each of this Output's filters into the DB, if it's not already present in the DB
107  for cur_filter in self.filters:
108  if cur_filter.db_id == None:
109  cur_filter.db_insert(db)
110 
111  #always insert a row into the relation table that maps outputs to their corresponding filters
112  db.insert('outputs_to_seg_filters',
113  'output_id seg_filter_id'.split(),
114  [[self.db_id, cur_filter.db_id]])
115 
116  #See superclass description.
117  def db_delete(self, db):
118  super(Output, self).db_delete(db)
119 
120  db.delete('outputs',
121  'id=?',
122  [self.db_id])
123 
124  #Note: Foreign keys will cause the above statement to also delete from:
125  #output_configs_to_outputs,
126  #outputs_to_seg_filters
127 
128  #we still need to delete from seg_filters
129  for cur_filter in self.filters:
130  if cur_filter.db_id != None:
131  db.delete('seg_filters',
132  'id=?',
133  [cur_filter.db_id])
134 
135  self.db_id = None
136 
137  ## Writes a description of this output, and the calculated information from this output, to a CSV file.
138  # @param self
139  # @param csv_writer (CSVWriter) this is a Python csv library writer objects, configured to write to the appropriate csv file.
140  def write_csv_rows(self, csv_writer):
141  #write out a description of this output
142  csv_writer.writerow(['------------------'])
143  csv_writer.writerow(['Name:', self.name])
144  csv_writer.writerow(['Description:', self.desc])
145  csv_writer.writerow(['Link Segments:', str(bool(self.chained))])
146  csv_writer.writerow(['Filters:'])
147 
148  #write out any filters
149  if self.filters:
150  for cur_filter in self.filters:
151  csv_writer.writerow(['', cur_filter.get_filter_type_str(), cur_filter.get_filter_desc_str()])
152  else:
153  csv_writer.writerow(['', 'None'])
154 
155  #write the calculated information
156  csv_writer.writerow([''])
157  self.output_calc.write_csv_rows(self.chained, csv_writer)
158  csv_writer.writerow(['------------------'])
159 
160 
161  ## See superclass description.
162  @staticmethod
163  def db_select(db, ids=[]):
164  DBObject.db_select(db, ids)
165 
166  #select the output data from the outputs table
167  rows = db.select('outputs',
168  'id name desc calc_class_name calc_args chained'.split(),
169  DBObject._build_where_cond_from_ids(ids),
170  )
171 
172 
173  #create an Output object for each row
174  output_list = []
175  for cur_row in rows:
176  #instantiate the OutputCalc object
177  calc_class = eval(cur_row[3])
178  calc_args = eval(cur_row[4])
179  output_calc = calc_class(*calc_args)
180 
181  #instantiate any filter objects associated with this outputs
182  filters = SegFilter.db_select_by_ref(
183  db,
184  'outputs_to_seg_filters',
185  'seg_filter_id',
186  'output_id',
187  cur_row[0],
188  )
189 
190  #create the Output object using the info retreived above
191  output = Output(cur_row[1],
192  cur_row[2],
193  filters,
194  output_calc,
195  cur_row[5],
196  cur_row[0]
197  )
198 
199  output_list.append(output)
200 
201  return output_list
202 
203  ## Selects Outputs via a relationship table (a table linking output ids to some other type of id).
204  # @param db (BLLDatabase) a database handle object to use for the select operation
205  # @param ref_table (string) the name of the relationship table to use
206  # @param id_col (string) name of the column (in ref_table) containing output
207  # @param ref_col (string) name of the column (in ref table) containing the value you want to look up outputs by
208  # @param ref_val (int) value to search for in ref_col. For each matching row, the id_col value is obtained (this is the output id), and used to do a lookup in the outputs table.
209  # @returns (list) list of Output objects, or empty list if no matches were found for ref_val in ref_col of ref_table
210  @staticmethod
211  def db_select_by_ref(db, ref_table, id_col, ref_col, ref_val):
212  outputs = []
213 
214  #perfrom the select on ref_table
215  rows = db.select(ref_table, [id_col], '%s=?' % (ref_col), [ref_val])
216  #retreive the output ids from the result set
217  ids = map(lambda cur_row: cur_row[0], rows)
218 
219  #select outputs if we obtained some ids from ref_table
220  if ids:
221  outputs = Output.db_select(db, ids)
222 
223  return outputs