termsql/termsql at master · gknisely/termsql · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
#!/usr/bin/env python
#
#The MIT License (MIT)
#
#Copyright (c) 2014 Tobias Glaesser
#
#Permission is hereby granted, free of charge, to any person obtaining a copy
#of this software and associated documentation files (the "Software"), to deal
#in the Software without restriction, including without limitation the rights
#to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#copies of the Software, and to permit persons to whom the Software is
#furnished to do so, subject to the following conditions:
#
#The above copyright notice and this permission notice shall be included in all
#copies or substantial portions of the Software.
#
#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
#SOFTWARE.

#NOTICE: please contribute improvements back to the project, it's very much appreciated

#TODO:
#-escape strings before for insertion into database
#-fix --head feature
#-

import fileinput
import subprocess
import sys
import argparse
import tempfile
import os
import sqlite3
import locale

try:
    import sqlparse
except ImportError:
    sqlformat = False
else:
    if sqlparse.__version__ > "0.1.14":
        sqlformat = True
    else:
        sqlformat = False
#sqlformat depends on python sqlparser/sqlformat from https://pypi.python.org/pypi/sqlparse


locale.setlocale(locale.LC_ALL, '')

VERSION = "0.3"
#configureable default values
sqlite_cmd = 'sqlite3'
table_name = 'tbl'
sql_db_file = ''
delimiter = ' '
separator = '' #default to whatever sqlite defaults to
mode = 'list'
columns = ''
key_columns = ''
offset_head = 0
max_rows = 0
queryfile_data = ''
query_off = False
merge = None
calc = ''
filename = '' #by default termsql writes to stdout, but if the user sets a filename we write there instead
show_header = False
drop_table = True #by default any existing table by the same name is dropped

epilog_expanded = False
if os.environ.get('MORE_HELP') != None and os.environ.get('MORE_HELP').lower() == 'true':
    epilog_expanded = True

epilog_more =  '''  -$ termsql -i MyCSVFile.CSV -t fancytable -d ',' -1 -o mysqlite.db
                        creates a new sqlite database from a CSV file. -d ',' tells termsql to use commata as delimiter. -1 tells termsql that the first line of the CSV file is not data and instead the column names and termsql creates database columns with those names accordingly. -t fancytable sets the tablename to fancytable
  -$ sensors | termsql "select COL2 from tbl where COL0='Core'"
                        extract the temperature from all your CPU cores
  -$ export LC_ALL=en_US; top -b | head | termsql -1 -H 6 "select [PID],[USER],[COMMAND],[%CPU] from tbl where [%CPU]>=25"
                        termsql doesn't recognize numbers like "25,3" as numbers, but as strings. export LC_ALL=en_US ensures that top outputs numbers that are easy for termsql/sqlite to digest (ie. "25.3"). -H 6 makes termsql disregard the first 6 lines. We select only the processes with more than 25% cpu usage and output their PID,USER,COMMAND and %CPU.
  -$ export DISPLAY=$(ps aux | termsql "select COL11 from tbl where COL10 like '%Xorg.bin%' limit 1")
                        set DISPLAY environment variable to what display X is running on right now, assuming that the X binary is called "Xorg.bin")
  -$ ls -lha /usr/bin/* | termsql -w -r 8 "select * from tbl order by COL8 desc"
                        -r 8 merges the filenames into the 8th column. Then "order by COL8 desc" sorts them in reverse order. Due to -w the output looks nice on the command line
  -$ termsql -m line --calc 2+2,23423./123
                        To use termsql as simple a calculator with the power of sqlite behind it, use the --calc option and a commata separated list of calculations you want to do. Sometimes you may need to append a dot . to a number to make sqlite aware that it's supposed to do floating point calculation. If you want to use paranthesis you may also need to use  quotes like this --calc "(2+2,23423./123)". The -m line option beautifies the output.
  -$ cat /etc/fstab | termsql "select COUNT(*) from tbl where COL2='ext4'"
                        how many devices do you have in your fstab that are using the ext4 filesystem? COUNT(*) simply outputs the total number of rows that the current select/where statement would select.
  -$ ps aux | termsql -m line -1 "select USER,COUNT(*) from tbl group by USER"
                        counts the total number of processes that each user has running. -1 gets the column names from the first line, therefore we can use USER instead of COL0 in the SQL statement. "group by USER" groups the rows of with identical USER together and for that reason COUNT(*) returns the total number of rows (in this case processes) for each USER. -m line beautifies the output.
  -$ termsql -ei .config/Bitcoin/Bitcoin-Qt.conf -c key,value -d = -p = -x "update tbl set value='true' where key='fMinimizeToTray'"
                        Demonstrates how you can use termsql to edit simple config files with key/value pairs. -i loads the config file as input and -e makes sure this same file is written to instead of stdout. Simply speaking: enabling edit mode. -d = sets the delimiter for splitting the input to = and -p = sets the separator for the output to = again, so that the output format matches the input format. -x appends a ";select * from tbl" to the user defined query, so that we get everything back that we put in. The user query is an SQL update statement which sets the value part of the key value pair to true, where the key is fMinimizeToTray. -c key,value allows us to use these convenient names instead of COL0 (key) and COL1 (value). Note that this approach may not always be perfect, for example in .config/Bitcoin/Bitcoin-Qt.conf there's a section line "[General]" which gets changed to "[General]=" after termsql is done with editing. Which may or may not be a problem depending on the program that uses the config file. It could easily be fixed with a tool like sed though.
  -$ termsql -ei my.config -c key,value -d = -p = -x "delete from tbl where key IN ('entry1','entry2','entry3','entry4')"
                        Analogous to the previous example this one would find and delete the key/value pair entries with the keys entry1, entry2, entry3 and entry4. Using IN in a where clause you can specifiy a list to match against instead of just a single value. Everything else from the previous example also applies in this one.
'''
if not epilog_expanded:
    epilog_more = '''
To get more examples type `man termsql` or set `MORE_HELP=True` and run `termsql -h` again. Thanks for using termsql.'''

#setup command line parser
parser = argparse.ArgumentParser(description='Convert text into SQL table and query it', formatter_class=argparse.RawDescriptionHelpFormatter, epilog='''examples:
  -$ ps aux | termsql "select * from tbl where COL0='root' limit 5"
                        outputs 5 processes that are running as root
  -$ df | termsql -1 'select Filesystem from tbl order by [1K-blocks] desc limit 1'
                        returns the largest device
  -$ cat /proc/cpuinfo | termsql "select COL2 from tbl where COL0='bogomips'"
                        how many bogomips does your system have?
  -$ termsql --infile /etc/group -d ':' "select COL0 from tbl"
                        read from file, use : as delimiter, shows all groups in /etc/group
'''+epilog_more
)
parser.add_argument("query", nargs='*', default='', help="SQL Query string")
parser.add_argument("-1", "--head", help="use first line/head(er) for column names", action="store_true")
parser.add_argument("-0", "--show-header", help="show column names as header in output (not shown by default)", action="store_true")
parser.add_argument("-d", "--delimiter",  nargs='?', help="custom delimiter (default is whitespace)")
parser.add_argument("-p","--separator", nargs='?', help="set sqlite output field separator (default depends on output mode. ie. for list '|' is the default and for csv ',' is the default) ")
parser.add_argument("-l","--line-as-column", nargs='?', help="each line of input is a whole column, next row begins after n lines")
parser.add_argument("-k","--key-columns", nargs='?', help="one or more columns can make up the primary key (i.e -k COL0 or -c one,two,three -k one,two)")
parser.add_argument("-c","--columns", nargs='?', help="set custom column names (ie. -c 'name,street,age')")
parser.add_argument("--calc", nargs='?', help="use sqlite as simple calculator, you can do multiple calculations separated by commata (i.e. \"(5*109.05)/2 , 345+789\") In this mode stdin input is ignored")
parser.add_argument("-w","--whitespace", help="use whitespace as field separator (default is |). equal to mode column",action="store_true")
parser.add_argument("-H","--offset-head", nargs='?', help="ignore first n lines of input",type=int)
parser.add_argument("-T","--offset-tail", nargs='?', help="ignore last n lines of input")
parser.add_argument("-M","--max-rows", nargs='?', help="don't insert more than n rows into database")
parser.add_argument("-C","--dump-create-table", help="dump create table SQL",action="store_true")
parser.add_argument("-D","--dump", help="dump complete SQL to create table and content",action="store_true")
parser.add_argument("-Q","--sql", help="outputs the full query, like it would be fed to SQlite", action="store_true")
parser.add_argument("-m","--mode", nargs='?', help="set sqlite output mode i.e.: csv, column, html, insert, line, list, tabs, tcl (default is list)")
parser.add_argument("-r","--merge", nargs='?', help="merges all columns from column n to the last one into one. This is useful when you have data like filenames with whitespaces in it, to prevent it from getting split by the delimiter. Note that counting starts from 0, therefore n=0 is the first column ...")
parser.add_argument("-x","--select-all", help="add final SELECT * FROM to user defined query",action="store_true")
parser.add_argument("-a","--append", help="don't DROP TABLE instead just append data to existing table. Assumes existing table is compatible.",action="store_true")
parser.add_argument("-i","--infile",  nargs='?', help="use file as input instead of stdin")
parser.add_argument("-o","--outfile", nargs='?', help="location/filename to use for sql database (by default a tempfile is used)")
parser.add_argument("-f","--file", nargs='?', help="write output to file instead of stdout. It's overwriting the whole file, and not just appending, so please be cautious")
parser.add_argument("-e","--edit", help="writes to the input file instead of stdout. For this to work the input file has to be specified with the -i/--infile option. It's overwriting the whole file, and not just appending, so please be cautious", action="store_true")
parser.add_argument("-q","--queryfile", nargs='?', help="load complex query from file, queries on the commandline get executed last")
parser.add_argument("-t","--table", nargs='?', help="tablename (must be a valid sqlite tablename)")
parser.add_argument("-s","--sqlite", nargs='?', help="path to sqlite executable")
parser.add_argument("-v","--version", help="display version information", action="store_true")
args = parser.parse_args()


#process command line input
if args.mode:
    mode = args.mode.lower()
if args.delimiter:
    delimiter = args.delimiter
if args.separator:
    separator = args.separator
if args.whitespace:
    mode = 'column'
if args.calc:
    calc = args.calc
if args.merge:
    merge = int(args.merge)
if args.outfile:
    sql_db_file = args.outfile
if args.file:
    filename = args.file
if args.table:
    table_name = args.table
if args.infile:
    infile = open(args.infile)
    if args.edit: #no need to check for this option unless user specifies infile
        filename = args.infile
elif not args.calc:
    infile = sys.stdin
else:
    infile = None
if args.sqlite:
    sqlite_cmd = args.sqlite
if args.key_columns:
    key_columns = [x.strip() for x in args.key_columns.split(',')]
if args.columns:
    columns = [x.strip() for x in args.columns.split(',')]
if args.offset_head:
    offset_head = args.offset_head
if args.queryfile:
    queryfile_data = open(args.queryfile).read()
if args.show_header:
    show_header = True
if args.append:
    drop_table = False
if args.version:
    print(VERSION)
    exit(0)

#simplify subprocess.call
def shell(str):
    subprocess.call(str, shell=True)

colnames = [] #only needed when using --head optional arg

#get name of column x
def get_col_name(i):
    if args.columns and len(columns) > i:
        return columns[i]
    elif args.head and len(colnames) > i:
        return colnames[i]
    else:
        return "COL"+str(i)

inserts = []

#test if a certain number/value/string is compatible with type float
def isfloat(value):
  try:
    locale.atof(value)
    return True
  except ValueError:
    return False

col_properties = {}
#this function is called for each row that gets inserted into a column,
#and from that the data type and other information about the column is gathered
def update_col_properties(col,data):
    if not col in col_properties:
        col_properties[col] = {}
        col_properties[col]["INTEGER"] = True
        col_properties[col]["REAL"] = True
        col_properties[col]["TEXT"] = True
    if not data.isdigit():
        col_properties[col]["INTEGER"] = False
    if not isfloat(data):
        col_properties[col]["REAL"] = False
    if "MAXLEN" not in col_properties[col] or len(data) > col_properties[col]["MAXLEN"]:
        col_properties[col]["MAXLEN"] = len(data)

#returns the SQL value type of a column
def get_col_type(col):
    if not col in col_properties:
        return "TEXT"
    elif col_properties[col]["INTEGER"]:
        return "INTEGER"
    elif col_properties[col]["REAL"]:
        return "REAL"
    return "TEXT"

insert_count = 0
#build list for insert row and add to global inserts list
def build_insert(row):
    insert = []
    c = 0
    for col in row:
        if merge != None and c > merge:
            insert[-1] = str(insert[-1]) + delimiter + str(col)
            update_col_properties(merge,insert[-1])
        else:
            insert.append(col)
            update_col_properties(c,col)
        c += 1
    inserts.append(insert)

#get column names using row
def get_col_names(row):
    count = 0
    for col in row:
        colnames.append(col)
        count += 1

max_count = 0

def traverse_input():
    global max_count, insert_count, offset_head
    #traverse input
    first_line = True
    row = [] #declared in this scope becaues of --line-as-column
    col_count = 0 #declared in this scope becaues of --line-as-column
    while True:
        line = infile.readline()

        if not line:
            break
        if offset_head > 0:
            offset_head -= 1
            continue

        if not args.line_as_column: #this is the standard input traversal
            if args.max_rows and insert_count >= int(args.max_rows):
                break
            if delimiter != ' ':
                row = line.rstrip('\r\n').split(delimiter)
            else:
                row = line.split()
            if len(row) > max_count:
                max_count = len(row)
            if first_line and args.head:
                get_col_names(row)
            else:
                build_insert(row)
                insert_count += 1
            first_line = False
        else: #this is the special input traversal for --line-as-column mode
            max_count = int(args.line_as_column)

            row.append(line.rstrip('\r\n'))
            col_count += 1
            if col_count == int(args.line_as_column):
                col_count = 0
                build_insert(row)
                insert_count += 1
                row = []
            if args.max_rows and insert_count >= int(args.max_rows):
                break

    if args.line_as_column and len(row) > 0:
            build_insert(row)
            insert_count += 1

    if len(col_properties) < max_count:
        max_count = len(col_properties)


if infile != None:
    traverse_input()
    infile.close() #at this point this file shouldn't be open anymore

temp = []
for ins in inserts:
    ins.extend([None] * (max_count - len(ins)))
    temp.append(ins)
inserts = temp

#create create database string
if max_count > 0:
    create_table_string =  "create table \""+table_name+"\"("
    for x in range(0,max_count):
        if x > 0:
            create_table_string += ", "
        create_table_string += '"'+ get_col_name(x) + '"' + " " + get_col_type(x)
    if len(key_columns) > 0:
        create_table_string += ", PRIMARY KEY ("+','.join(key_columns)+")"
    create_table_string += ");"
else:
    if calc == '':
        exit(0)

if sql_db_file == '':
    f = tempfile.NamedTemporaryFile()
    sql_db_file = f.name

conn = sqlite3.connect(sql_db_file)
conn.text_factory = str
c = conn.cursor()

if drop_table:
    c.execute("drop table if exists \""+table_name+"\";")

    #create database and databasefile
    if max_count > 0:
        c.execute(create_table_string)
        if args.dump_create_table:
            print(create_table_string)
            query_off = True

temp = []
temp.extend('?' * (max_count))
placeholder_str = ",".join(temp)

if args.offset_tail and int(args.offset_tail) > 0: #remove all the entries from the inserts list that can be ignored due to option --offset-tail
    del inserts[-int(args.offset_tail):]

if max_count > 0:
    #execute inserts, fill table with row data
    c.executemany('insert into '+table_name+' values ('+placeholder_str+')', inserts)
    conn.commit()


def to_temp_file(s):
    t = tempfile.NamedTemporaryFile('w+',delete=False)
    t.write(s)
    t.close()
    return t

def remove_temp_file(t):
    os.unlink(t.name)

actual_query = ' '.join(args.query)

#IDEA: expand the user supplied query so that "from table_name" is automagically inserted
#- select * [from tbl] where/order by
# - not easy, as * can be complex, also what comes after [] isn't clear either, can be any number of things?! -- actually that's one of he most realistic ways of solving this
# - SOLVED: using sqlparser/sqlformat from pypi
# - to parse for the stuff that comes after
#- update [table_name] set col0...
# - easy
#- insert [into table_name] values()
# - easy
#- delete [from table_name] where
# - easy
#- alter [table table_name] ..
# - also easy
#but even for the easy ones there are probably corner cases, so this feature might break something down the line ... who knows


actual_query = actual_query.lstrip()
if actual_query != '' and sqlformat:
    #first find out the query type i.e. select
    query_type = actual_query.split(' ')[0]
    if query_type.lower() == "select":
        #use sqlparse/sqlformat
        out = sqlparse.format(actual_query,reindent=True)
        s = ''
        select_complete = False
        from_inserted = False
        from_ok = False
        for line in out.split('\n'):
            if select_complete:
                s += " "+line.lstrip()
            elif line.lower().startswith("select") or line.startswith(' '):
                s += line.lstrip()
            else:
                if line.lower().startswith("from"):
                    #no need to insert from when there's one already
                    s += " "+line+" "
                    from_ok = True
                elif not from_ok and not from_inserted:
                    s += " from "+table_name+" "
                    s += line
                    from_inserted = True
                select_complete = True
        if not select_complete:
            s += " from "+table_name+" "
        actual_query = s

if max_count > 0:
    if actual_query == "" and queryfile_data == '':
        actual_query = "select * from \""+table_name+"\""
    elif args.select_all:
        actual_query = actual_query +  ";select * from \""+table_name+"\""

if calc != '':
   actual_query = actual_query + "; select "+calc

if queryfile_data != '':
   actual_query = queryfile_data + ';' + actual_query

#create tempfile for queries
temp_query_file = to_temp_file(actual_query + ";")

if args.sql:
    print(actual_query +";")
    exit(0)

#create tempfile for sqlite init file
init_file = tempfile.NamedTemporaryFile('w+',delete=False) #temporary file that we give sqlite for initializing settings such as mode

if mode == 'insert':
    init_file.write("\n.mode \"" + mode + "\" "+table_name+"\n")
else:
    init_file.write("\n.mode \"" + mode + "\"\n")
if mode == 'column':
    #init_file.write("\n.header yes\n")
    #in -m column mode we want the column widths to be optimal and so we tell sqlite in the sqlite init file about the right width
    column_width_string = ''
    for col in col_properties:
        column_width_string = column_width_string + ' ' + str(col_properties[col]["MAXLEN"])
    init_file.write("\n.width "+column_width_string+"\n")
if show_header:
    init_file.write(".header on\n")
if separator != '':
    init_file.write(".separator \"" + separator + "\"\n")
if args.dump:
    init_file.write(".dump\n")
    query_off = True
if not query_off:
    init_file.write(".read \"" + temp_query_file.name + "\"\n")

init_file.close()

command_string = sqlite_cmd + ' -init ' + init_file.name + ' '  + sql_db_file

if filename != '':
    command_string = command_string + ' > ' + filename

# echo " " | is a hack to prevent sqlite from getting into interactive model, maybe there is a more pythonic method
shell('echo " " | ' + command_string)

#remove temporary files
remove_temp_file(init_file)
remove_temp_file(temp_query_file)