Baby Language Lab Scripts
A collection of data processing tools.
 All Classes Namespaces Files Functions Variables Pages
database.py
Go to the documentation of this file.
1 ## @package db.database
2 
3 import sqlite3
4 import logging
5 import traceback
6 import sys
7 import csv
8 
9 ## This class provides a basic API layer on top of an SQLite database.
10 # Ideally, all SQL should be restricted to this file. Calling code can form queries using the select() method. You can also update(), insert(), and delete().
11 class Database(object):
12  ## Constructor
13  # @param self
14  # @param db_filename (string) absolute path to the sqlite3 database file (if this does not exist, a new database file will be created)
15  def __init__(self, db_filename):
16  #self.logger = logging.getLogger(__name__)
17 
18  #create/open the database
19  self.conn = sqlite3.connect(db_filename)
20 
21  #grab a cursor object that can be used to execute statements
22  self.cursor = self.conn.cursor()
23 
24  #turn on foreign keys - they're disabled by default in sqlite
25  self.conn.execute("PRAGMA foreign_keys = ON")
26 
27  ## Closes the database connection, releasing any resources that it was using.
28  # @param self
29  def close(self):
30  self.cursor.close()
31  self.conn.close()
32 
33  ## Executes a verbatim SQL statement.
34  # This is used for the custom processing scripts (custom_scripts/ directory), and shouldn't
35  # really be used by any general apps, since it doesn't do any sanitization.
36  # @param self
37  # @param stmt (string) a complete sql statement to execute.
38  def execute_stmt(self, stmt):
39  try:
40  self.cursor.executescript(stmt)
41  self.conn.commit()
42 
43  except Exception as err:
44  logging.error('Error executing SQL statement "%s":\n%s' % (stmt, err))
45  #print 'Error executing SQL statement "%s":\n%s' % (stmt, err)
46  self.conn.rollback()
47 
48  ## Reads and executes a sql script file.
49  # @param self
50  # @param filename (string) full path to sql file
51  def execute_script(self, filename):
52  try:
53  sql_file = open(filename, 'r')
54  sql = sql_file.read()
55  sql_file.close()
56  self.cursor.executescript(sql)
57  self.conn.commit()
58 
59  #rollback on error
60  except Exception as err:
61  logging.error('Error executing SQL script "%s":\n%s' % (filename, err))
62  self.conn.rollback()
63 
64  ## Inserts one or more rows into a table in the database.
65  # @param self
66  # @param table (string) name of the table to insert into
67  # @param cols (string) column names of this table (can omit autogenerated/defaulted cols)
68  # @param rows (list) a list of lists, (one sublist for each row) containing the data values to insert. These must all be the same length
69  # @returns (list) list of all the primary key values of the inserted rows
70  def insert(self, table, cols, rows):
71  last_ids = []
72  if table and len(rows) > 0:
73  num_cols = len(cols)
74  num_rows = len(rows)
75 
76  sql = 'INSERT INTO %s (%s) VALUES (%s);' % (table, ','.join(cols), '?,' * (num_cols - 1) + '?');
77 
78  try:
79  #SQLite does not support inserting multiple values in a single INSERT statement, so we have to loop...
80  for i in range(num_rows):
81  self.cursor.execute(sql, rows[i])
82  last_ids.append(int(self.cursor.lastrowid))
83 
84  #only commit after all rows have been successfully inserted
85  self.conn.commit()
86 
87  except Exception as error:
88  logging.error('Database error on insert()')
89  logging.error('Statement: %s' % (sql))
90  logging.error("Stack trace: %s" % (traceback.format_exc()))
91  self.conn.rollback()
92 
93  return last_ids
94 
95  ## Selects rows from a table in the database.
96  # @param table (string) name of the table to select from (can be multiple joined tables - eg. 'table1 join table2 on x=y')
97  # @param cols (list) list of column names (string) to select
98  # @param where_cond (string=None) a boolean condition like ('x=y'). Will generate 'WHERE x=y'. Set to None if no where condition should be generated. Note: Can use '?' as a placeholder for raw data values, then pass the raw values in using the 'params' parameter. Eg. passing 'x=?' for where_cond and [2] for params generates 'WHERE x=2'. Advantage here is that the DB ensures the raw values are properly escaped/quoted.
99  # @param params (list=[]) list of values to stick in '?' placeholders passed in via the where_cond parameter (or any other parameter). These values are automatically quoted/escaped by the SQLite API.
100  # @param order_by (string=None) a column name, or multiple comma-separated column names, to order by. Eg. passing in 'time, id' generates 'ORDER BY time, id'. Pass None if no order by clause is needed.
101  # @param dump_sql (boolean=False) Set to True if you wish to print the generated statement (before '?' substitution - unfortunately sqlite api restrictions make it difficult to access the final sustituted verison...) to standard output.
102  # @param group_by (string) This string is inserted into to SQL statement after the where clause. It can be a single column name, or multiple names separated with commas (Eg. 'child_id', or 'child_id, awc').
103  # @returns (list) list of lists, one sub-list per row. Each sublist contains data in the order specified by cols.
104  def select(self, table, cols, where_cond=None, params=[], order_by=None, dump_sql=False, group_by=None):
105  sql = 'SELECT %s FROM %s' % (','.join(cols), table)
106  if where_cond:
107  sql += ' WHERE %s' % (where_cond)
108 
109  if group_by:
110  sql += ' GROUP BY %s' % (group_by)
111 
112  if order_by:
113  sql += ' ORDER BY %s' % (order_by)
114 
115  sql += ';'
116 
117  if dump_sql:
118  print sql
119 
120  try:
121  self.cursor.execute(sql, params)
122 
123  except Exception as error:
124  logging.error('Database error on select()')
125  logging.error('Query: %s' % (sql))
126  logging.error("Stack trace: %s" % (traceback.format_exc()))
127 
128  return self.cursor.fetchall()
129 
130  ## Deletes rows from a table in the database.
131  # @param self
132  # @param table (string) name of the table to delete from
133  # @param where_cond (string=None) a boolean condition like ('x=y'). Will generate 'WHERE x=y'. Set to None if no where condition should be generated (deletes all rows in table). Note: Can use '?' as a placeholder for raw data values, then pass the raw values in using the 'params' parameter. Eg. passing 'x=?' for where_cond and [2] for params generates 'WHERE x=2'. Advantage here is that the DB ensures the raw values are properly escaped/quoted.
134  # @param params (list=[]) list of values to stick in '?' placeholders passed in via the where_cond parameter (or any other parameter). These values are automatically quoted/escaped by the SQLite API.
135  # @returns (int) the number of rows deleted.
136  def delete(self, table, where_cond=None, params=[]):
137  sql = 'DELETE FROM %s' % (table)
138  if where_cond:
139  sql += ' WHERE %s' % (where_cond)
140 
141  sql += ';'
142 
143  rowcount = 0
144  try:
145  self.cursor.execute(sql, params)
146  rowcount = self.cursor.rowcount
147  self.conn.commit()
148 
149  except Exception as error:
150  print 'error on delete!'
151  logging.error('Database error on delete()')
152  logging.error('Statement: %s' % (sql))
153  logging.error("Stack trace: %s" % (traceback.format_exc()))
154  self.conn.rollback()
155 
156  return rowcount
157 
158  ## Updates the value of a timestamp column to the current date/time.
159  # @param self
160  # @param table (string) name of table to update
161  # @param col (string) name of timestamp column
162  # @param where_cond (string=None) a boolean condition like ('x=y'). Will generate 'WHERE x=y'. Set to None if no where condition should be generated. Note: Can use '?' as a placeholder for raw data values, then pass the raw values in using the 'params' parameter. Eg. passing 'x=?' for where_cond and [2] for params generates 'WHERE x=2'. Advantage here is that the DB ensures the raw values are properly escaped/quoted.
163  # @param params (list=[]) list of values to stick in '?' placeholders passed in via the where_cond parameter (or any other parameter). These values are automatically quoted/escaped by the SQLite API.
164  # @returns (int) number of rows updated
165  def update_timestamp_col(self, table, col, where_cond=None, params=[]):
166  sql = 'UPDATE %s SET %s=CURRENT_TIMESTAMP' % (table, col)
167  if where_cond:
168  sql += ' WHERE %s' % (where_cond)
169 
170  sql += ';'
171  rowcount = 0
172 
173  try:
174  self.cursor.execute(sql, params)
175  rowcount = self.cursor.rowcount
176  self.conn.commit()
177 
178  except Exception as error:
179  logging.error('Database error on update_timestamp_col()')
180  logging.error('Statement: %s' % (sql))
181  logging.error("Stack trace: %s" % (traceback.format_exc()))
182  self.conn.rollback()
183 
184  return rowcount
185 
186  ## Updates rows in a table.
187  # @param table (string) name of table to update
188  # @param cols (list) list of names of columns (strings) to update
189  # @param where_cond (string=None) a boolean condition like ('x=y'). Will generate 'WHERE x=y'. Set to None if no where condition should be generated. Note: Can use '?' as a placeholder for raw data values, then pass the raw values in using the 'params' parameter. Eg. passing 'x=?' for where_cond and [2] for params generates 'WHERE x=2'. Advantage here is that the DB ensures the raw values are properly escaped/quoted.
190  # @param params (list=[]) list of values to stick in '?' placeholders generated for each column we are updating. The last param is in this list is used for the where_cond parameter (if any). These values are automatically quoted/escaped by the SQLite API.
191  # @param dump_sql (boolean=False) Set to True if you wish to print the generated statement (before '?' substitution - unfortunately sqlite api restrictions make it difficult to access the final sustituted verison...) to standard output.
192  # @returns (int) number of rows updated
193  def update(self, table, cols, where_cond=None, params=[], dump_sql=False):
194  rowcount = 0
195  sql = 'UPDATE %s SET ' % (table)
196 
197  for i in range(len(cols)):
198  sql += '%s=?' % cols[i]
199  if i < len(cols) - 1:
200  sql += ','
201 
202  if where_cond:
203  sql += ' WHERE %s' % (where_cond)
204 
205  sql += ';'
206 
207  if dump_sql:
208  print sql
209 
210  try:
211  self.cursor.execute(sql, params)
212  rowcount = self.cursor.rowcount
213  self.conn.commit()
214 
215  except Exception as error:
216  logging.error('Database error on update()')
217  logging.error('Statement: %s' % (sql))
218  logging.error("Stack trace: %s" % (traceback.format_exc()))
219  self.conn.rollback()
220 
221  return rowcount
222 
223  ## Writes out all of the data in a table to a csv file.
224  # @param self
225  # @param path (string) The location in which you'd like to save the csv file. (Eg. 'C:/Wayne/data.csv')
226  # @param table (string) Name of the database table to read data from.
227  def write_to_file(self, path, table):
228  file_out = open(path, 'wb')
229  writer = csv.writer(file_out)
230 
231  self.cursor.execute('PRAGMA table_info(%s)' % (table)) #This SQLite pragma selects the column names from the table
232  rows = self.cursor.fetchall()
233 
234  #throw away everything except the column names
235  col_names = map(lambda cur_row: cur_row[1], rows)
236 
237  #write the column names in the csv file
238  writer.writerow(col_names)
239 
240  #write the table rows to the csv file
241  rows = self.select(table, col_names, order_by='id') #select *
242  for cur_row in rows:
243  writer.writerow(cur_row)
244 
245  file_out.close()