diff --git a/client/app/assets/images/db-logos/excel.png b/client/app/assets/images/db-logos/excel.png new file mode 100755 index 0000000000..80715081f6 Binary files /dev/null and b/client/app/assets/images/db-logos/excel.png differ diff --git a/redash/query_runner/csv.py b/redash/query_runner/csv.py new file mode 100644 index 0000000000..9f18b39582 --- /dev/null +++ b/redash/query_runner/csv.py @@ -0,0 +1,87 @@ +import json +import logging +import yaml +import re + +from redash.query_runner import * +from redash.utils import JSONEncoder + +logger = logging.getLogger(__name__) + +try: + import pandas as pd + import numpy as np + enabled = True +except ImportError: + enabled = False + + +class CSV(BaseQueryRunner): + @classmethod + def type(cls): + return "csv" + + @classmethod + def enabled(cls): + return enabled + + @classmethod + def configuration_schema(cls): + return { + 'type': 'object', + 'properties': {}, + } + + def __init__(self, configuration): + super(CSV, self).__init__(configuration) + self.syntax = "csv" + + def test_connection(self): + pass + + def run_query(self, query, user): + path = "" + args = {} + query = re.sub(r"/\*(.|\n)*?\*/", "", query).strip() + try: + args = yaml.safe_load(query) + path = args['url'] + args.pop('url', None) + except: + pass + try: + workbook = pd.read_csv(path, **args) + + df = workbook.copy() + data = {'columns': [], 'rows': []} + conversions = [ + {'pandas_type': np.integer, 'redash_type': 'integer',}, + {'pandas_type': np.inexact, 'redash_type': 'float',}, + {'pandas_type': np.datetime64, 'redash_type': 'datetime', 'to_redash': lambda x: x.strftime('%Y-%m-%d %H:%M:%S')}, + {'pandas_type': np.bool_, 'redash_type': 'boolean'}, + {'pandas_type': np.object, 'redash_type': 'string'} + ] + labels = [] + for dtype, label in zip(df.dtypes, df.columns): + for conversion in conversions: + if issubclass(dtype.type, conversion['pandas_type']): + data['columns'].append({'name': label, 'friendly_name': label, 'type': conversion['redash_type']}) + labels.append(label) + func = conversion.get('to_redash') + if func: + df[label] = df[label].apply(func) + break + data['rows'] = df[labels].replace({np.nan: None}).to_dict(orient='records') + + json_data = json.dumps(data, cls=JSONEncoder) + error = None + except KeyboardInterrupt: + error = "Query cancelled by user." + json_data = None + except Exception as e: + error = "Error reading {0}. {1}".format(path, str(e)) + json_data = None + + return json_data, error + +register(CSV) diff --git a/redash/query_runner/excel.py b/redash/query_runner/excel.py new file mode 100644 index 0000000000..817865ef1d --- /dev/null +++ b/redash/query_runner/excel.py @@ -0,0 +1,89 @@ +import json +import logging +import yaml +import re + +from redash.query_runner import * +from redash.utils import JSONEncoder + +logger = logging.getLogger(__name__) + +try: + import pandas as pd + import xlrd + import numpy as np + enabled = True +except ImportError: + enabled = False + + +class Excel(BaseQueryRunner): + @classmethod + def type(cls): + return "excel" + + @classmethod + def enabled(cls): + return enabled + + @classmethod + def configuration_schema(cls): + return { + 'type': 'object', + 'properties': {}, + } + + def __init__(self, configuration): + super(Excel, self).__init__(configuration) + self.syntax = "excel" + + def test_connection(self): + pass + + def run_query(self, query, user): + path = "" + args = {} + query = re.sub(r"/\*(.|\n)*?\*/", "", query).strip() + try: + args = yaml.safe_load(query) + path = args['url'] + args.pop('url', None) + except: + pass + + try: + workbook = pd.read_excel(path, **args) + + df = workbook.copy() + data = {'columns': [], 'rows': []} + conversions = [ + {'pandas_type': np.integer, 'redash_type': 'integer',}, + {'pandas_type': np.inexact, 'redash_type': 'float',}, + {'pandas_type': np.datetime64, 'redash_type': 'datetime', 'to_redash': lambda x: x.strftime('%Y-%m-%d %H:%M:%S')}, + {'pandas_type': np.bool_, 'redash_type': 'boolean'}, + {'pandas_type': np.object, 'redash_type': 'string'} + ] + labels = [] + for dtype, label in zip(df.dtypes, df.columns): + for conversion in conversions: + if issubclass(dtype.type, conversion['pandas_type']): + data['columns'].append({'name': label, 'friendly_name': label, 'type': conversion['redash_type']}) + labels.append(label) + func = conversion.get('to_redash') + if func: + df[label] = df[label].apply(func) + break + data['rows'] = df[labels].replace({np.nan: None}).to_dict(orient='records') + + json_data = json.dumps(data, cls=JSONEncoder) + error = None + except KeyboardInterrupt: + error = "Query cancelled by user." + json_data = None + except Exception as e: + error = "Error reading {0}. {1}".format(path, str(e)) + json_data = None + + return json_data, error + +register(Excel) diff --git a/redash/settings/__init__.py b/redash/settings/__init__.py index 539805d88b..4596e2e33a 100644 --- a/redash/settings/__init__.py +++ b/redash/settings/__init__.py @@ -364,6 +364,8 @@ def email_server_is_configured(): "redash.query_runner.exasol", "redash.query_runner.cloudwatch", "redash.query_runner.cloudwatch_insights", + "redash.query_runner.csv", + "redash.query_runner.excel", ] enabled_query_runners = array_from_string(