From be655fbebc8a042b4ec40153e4da5fa70eca09b9 Mon Sep 17 00:00:00 2001 From: Geir Freysson Date: Mon, 27 Feb 2017 12:30:15 +0000 Subject: [PATCH] Fixes issue #719, savReaderWriter including headers in data We check for the version of savReaderWriter being used and if it is a recent one, we start reading after line 0 (i.e. the header). --- core/tools/dp/spss/reader.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/core/tools/dp/spss/reader.py b/core/tools/dp/spss/reader.py index 5f70a4218..c2038a0d0 100644 --- a/core/tools/dp/spss/reader.py +++ b/core/tools/dp/spss/reader.py @@ -1,6 +1,7 @@ import numpy as np import pandas as pd import savReaderWriter as sr +from packaging import version from collections import defaultdict from quantipy.core.tools.dp.prep import start_meta, condense_dichotomous_set @@ -41,7 +42,11 @@ def extract_sav_data(sav_file, ioLocale='en_US.UTF-8', ioUtf8=True): """ see parse_sav_file doc """ with sr.SavReader(sav_file, returnHeader=True, ioLocale=ioLocale, ioUtf8=ioUtf8) as reader: header = next(reader) - dataframe = pd.DataFrame.from_records((x for x in reader), coerce_float=False) + # in latter versions of savReaderWriter the first line of the dataset includes the headers + if(version.parse(sr.__version__) > version.parse('3.4.0')): + dataframe = pd.DataFrame.from_records((x for x in reader[0:]), coerce_float=False) + else: + dataframe = pd.DataFrame.from_records((x for x in reader), coerce_float=False) dataframe.columns = header for column in header: if isinstance(dataframe[column].dtype, np.object):