diff --git a/Orange/OrangeWidgets/Prototypes/OWCSVFileImport.py b/Orange/OrangeWidgets/Prototypes/OWCSVFileImport.py
index 274f43d09..d0e68d7a9 100644
--- a/Orange/OrangeWidgets/Prototypes/OWCSVFileImport.py
+++ b/Orange/OrangeWidgets/Prototypes/OWCSVFileImport.py
@@ -1,5 +1,6 @@
"""
CSV File import
+icons/FileCSV.png
Import comma separated file
"""
@@ -49,7 +50,7 @@ def reload_icon(self):
class OWCSVFileImport(OWWidget):
- settingsList = ["recent_files", "hints"]
+ settingsList = ["recent_files", "hints","ignore_first_lines"]
DELIMITERS = [("Tab", "\t"),
("Comma", ","),
@@ -75,6 +76,8 @@ def __init__(self, parent=None, signalManager=None,
self.skipinitialspace = True
self.has_header = True
self.has_orange_header = True
+ self.ignore_first_lines = 0 #3
+ self.add_simple_orange_header = False #
# List of recent opened files.
self.recent_files = []
@@ -190,6 +193,12 @@ def __init__(self, parent=None, signalManager=None,
form.addRow(self.skipinitialspace_check)
+ self.spin_sk_ln= OWGUI.spin(box, self, "ignore_first_lines", label="Skip first lines", # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ min=0, max=1000, step=1,
+ callback=self.ignore_first_lines_changed,
+ controlWidth=40,
+ keyboardTracking=False)
+
self.has_header_check = \
QCheckBox(objectName="has_header_check",
checked=self.has_header,
@@ -254,6 +263,16 @@ def quote_changed(self):
self.quote = str(self.quote_edit.text())
self.update_preview()
+ def ignore_first_lines_changed(self): # !!!!!!!!!!!!!!!!!!
+ #self.ignore_first_lines = self.spin_sk_ln.value()
+ if self.selected_file:
+ with open(self.selected_file, "rU") as f:
+ self.skipinitiallines(f)
+ self.selected_file_head=[]
+ for i, line in zip(range(30), f):
+ self.selected_file_head.append(line)
+ self.update_preview()
+
def missing_changed(self):
self.missing = str(self.missing_edit.text())
self.update_preview()
@@ -270,6 +289,20 @@ def skipinitialspace_changed(self):
self.skipinitialspace = self.skipinitialspace_check.isChecked()
self.update_preview()
+ def skipinitiallines(self,file): # !!!!!!!!!!!!!!
+ ignore=self.ignore_first_lines
+ while ignore and file.readline():
+ ignore-=1
+
+ def open_and_skiplines(self,file, mode="rb"): # !!!!!!!!!!!!!!
+ if isinstance(file, basestring):
+ file = open(file, mode)
+ else: # assuming it is file like with proper mode, could check for write, read
+ pass
+ self.skipinitiallines(file)
+ return file
+
+
def set_selected_file(self, filename):
basedir, name = os.path.split(filename)
index_to_remove = None
@@ -291,7 +324,7 @@ def set_selected_file(self, filename):
hints = self.hints[filename]
else:
try:
- hints = sniff_csv(filename)
+ hints = self.sniff_csv(filename)
except csv.Error, ex:
self.warning(1, str(ex))
hints = dict(DEFAULT_HINTS)
@@ -338,6 +371,7 @@ def set_selected_file(self, filename):
self.selected_file = filename
self.selected_file_head = []
with open(self.selected_file, "rU") as f:
+ self.skipinitiallines(f)
for i, line in zip(range(30), f):
self.selected_file_head.append(line)
@@ -357,7 +391,7 @@ def update_preview(self):
hints["skipinitialspace"] = self.skipinitialspace
hints["DK"] = self.missing or None
try:
- data = Orange.data.io.load_csv(head, delimiter=self.delimiter,
+ data = Orange.data.io.load_csv(head, delimiter=self.delimiter,
quotechar=self.quote,
has_header=self.has_header,
has_types=self.has_orange_header,
@@ -379,7 +413,9 @@ def send_data(self):
self.error(0)
if self.selected_file:
try:
- data = Orange.data.io.load_csv(self.selected_file,
+ with open(self.selected_file, "rb") as f:
+ self.skipinitiallines(f)
+ data = Orange.data.io.load_csv(f,
delimiter=self.delimiter,
quotechar=self.quote,
has_header=self.has_header,
@@ -397,26 +433,28 @@ def send_data(self):
self.send("Data", self.data)
-def sniff_csv(file):
- snifer = csv.Sniffer()
- if isinstance(file, basestring):
- file = open(file, "rU")
-
- sample = file.read(2 ** 20) # max 1MB sample
- dialect = snifer.sniff(sample)
- has_header = snifer.has_header(sample)
-
- return {"delimiter": dialect.delimiter,
- "doublequote": dialect.doublequote,
- "escapechar": dialect.escapechar,
- "quotechar": dialect.quotechar,
- "quoting": dialect.quoting,
- "skipinitialspace": dialect.skipinitialspace,
- "has_header": has_header,
- "has_orange_header": False,
- "skipinitialspace": True,
- "DK": None,
- }
+ def sniff_csv(self,file):
+ snifer = csv.Sniffer()
+ if isinstance(file, basestring):
+ with open(file, "rb") as f:
+ self.skipinitiallines(f)
+ sample = f.read(2 ** 20) # max 1MB sample self opened file
+ else:
+ sample = file.read(2 ** 20) # max 1MB sample
+ dialect = snifer.sniff(sample)
+ has_header = snifer.has_header(sample)
+
+ return {"delimiter": dialect.delimiter,
+ "doublequote": dialect.doublequote,
+ "escapechar": dialect.escapechar,
+ "quotechar": dialect.quotechar,
+ "quoting": dialect.quoting,
+ "skipinitialspace": dialect.skipinitialspace,
+ "has_header": has_header,
+ "has_orange_header": False,
+ "skipinitialspace": True,
+ "DK": None,
+ }
if __name__ == "__main__":
import sys
diff --git a/Orange/OrangeWidgets/Prototypes/icons/FileCSV.png b/Orange/OrangeWidgets/Prototypes/icons/FileCSV.png
new file mode 100644
index 000000000..cabb4cc0e
Binary files /dev/null and b/Orange/OrangeWidgets/Prototypes/icons/FileCSV.png differ
diff --git a/Orange/data/io.py b/Orange/data/io.py
index 72424b7b5..d385d85e6 100644
--- a/Orange/data/io.py
+++ b/Orange/data/io.py
@@ -630,6 +630,7 @@ def load_csv(file, create_new_on=MakeStatus.Incompatible,
"""Load an Orange.data.Table from a csv file."""
file = as_open_file(file, "rU")
+ start=file.tell()
snifer = csv.Sniffer()
# Max 5MB sample
@@ -647,7 +648,7 @@ def load_csv(file, create_new_on=MakeStatus.Incompatible,
except csv.Error:
has_header = False
- file.seek(0) # Rewind
+ file.seek(start) # Rewind
def kwparams(**kwargs):
"""Return not None kwargs.
@@ -724,7 +725,7 @@ def kwparams(**kwargs):
var_attrs += [None] * (len(header) - len(var_attrs))
# start from the beginning
- file.seek(0)
+ file.seek(start)
reader = csv.reader(file, dialect=dialect, **fmtparam)
for defined in [has_header, has_types, has_annotations]: