Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Import CSV File optionally ignore some first lines, and add Orange header … #34

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 62 additions & 24 deletions Orange/OrangeWidgets/Prototypes/OWCSVFileImport.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""
<name>CSV File import</name>
<icon>icons/FileCSV.png</icon>
<description>Import comma separated file</description>

"""
Expand Down Expand Up @@ -49,7 +50,7 @@ def reload_icon(self):


class OWCSVFileImport(OWWidget):
settingsList = ["recent_files", "hints"]
settingsList = ["recent_files", "hints","ignore_first_lines"]

DELIMITERS = [("Tab", "\t"),
("Comma", ","),
Expand All @@ -75,6 +76,8 @@ def __init__(self, parent=None, signalManager=None,
self.skipinitialspace = True
self.has_header = True
self.has_orange_header = True
self.ignore_first_lines = 0 #3
self.add_simple_orange_header = False #

# List of recent opened files.
self.recent_files = []
Expand Down Expand Up @@ -190,6 +193,12 @@ def __init__(self, parent=None, signalManager=None,

form.addRow(self.skipinitialspace_check)

self.spin_sk_ln= OWGUI.spin(box, self, "ignore_first_lines", label="Skip first lines", # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!
min=0, max=1000, step=1,
callback=self.ignore_first_lines_changed,
controlWidth=40,
keyboardTracking=False)

self.has_header_check = \
QCheckBox(objectName="has_header_check",
checked=self.has_header,
Expand Down Expand Up @@ -254,6 +263,16 @@ def quote_changed(self):
self.quote = str(self.quote_edit.text())
self.update_preview()

def ignore_first_lines_changed(self): # !!!!!!!!!!!!!!!!!!
#self.ignore_first_lines = self.spin_sk_ln.value()
if self.selected_file:
with open(self.selected_file, "rU") as f:
self.skipinitiallines(f)
self.selected_file_head=[]
for i, line in zip(range(30), f):
self.selected_file_head.append(line)
self.update_preview()

def missing_changed(self):
self.missing = str(self.missing_edit.text())
self.update_preview()
Expand All @@ -270,6 +289,20 @@ def skipinitialspace_changed(self):
self.skipinitialspace = self.skipinitialspace_check.isChecked()
self.update_preview()

def skipinitiallines(self,file): # !!!!!!!!!!!!!!
ignore=self.ignore_first_lines
while ignore and file.readline():
ignore-=1

def open_and_skiplines(self,file, mode="rb"): # !!!!!!!!!!!!!!
if isinstance(file, basestring):
file = open(file, mode)
else: # assuming it is file like with proper mode, could check for write, read
pass
self.skipinitiallines(file)
return file


def set_selected_file(self, filename):
basedir, name = os.path.split(filename)
index_to_remove = None
Expand All @@ -291,7 +324,7 @@ def set_selected_file(self, filename):
hints = self.hints[filename]
else:
try:
hints = sniff_csv(filename)
hints = self.sniff_csv(filename)
except csv.Error, ex:
self.warning(1, str(ex))
hints = dict(DEFAULT_HINTS)
Expand Down Expand Up @@ -338,6 +371,7 @@ def set_selected_file(self, filename):
self.selected_file = filename
self.selected_file_head = []
with open(self.selected_file, "rU") as f:
self.skipinitiallines(f)
for i, line in zip(range(30), f):
self.selected_file_head.append(line)

Expand All @@ -357,7 +391,7 @@ def update_preview(self):
hints["skipinitialspace"] = self.skipinitialspace
hints["DK"] = self.missing or None
try:
data = Orange.data.io.load_csv(head, delimiter=self.delimiter,
data = Orange.data.io.load_csv(head, delimiter=self.delimiter,
quotechar=self.quote,
has_header=self.has_header,
has_types=self.has_orange_header,
Expand All @@ -379,7 +413,9 @@ def send_data(self):
self.error(0)
if self.selected_file:
try:
data = Orange.data.io.load_csv(self.selected_file,
with open(self.selected_file, "rb") as f:
self.skipinitiallines(f)
data = Orange.data.io.load_csv(f,
delimiter=self.delimiter,
quotechar=self.quote,
has_header=self.has_header,
Expand All @@ -397,26 +433,28 @@ def send_data(self):
self.send("Data", self.data)


def sniff_csv(file):
snifer = csv.Sniffer()
if isinstance(file, basestring):
file = open(file, "rU")

sample = file.read(2 ** 20) # max 1MB sample
dialect = snifer.sniff(sample)
has_header = snifer.has_header(sample)

return {"delimiter": dialect.delimiter,
"doublequote": dialect.doublequote,
"escapechar": dialect.escapechar,
"quotechar": dialect.quotechar,
"quoting": dialect.quoting,
"skipinitialspace": dialect.skipinitialspace,
"has_header": has_header,
"has_orange_header": False,
"skipinitialspace": True,
"DK": None,
}
def sniff_csv(self,file):
snifer = csv.Sniffer()
if isinstance(file, basestring):
with open(file, "rb") as f:
self.skipinitiallines(f)
sample = f.read(2 ** 20) # max 1MB sample self opened file
else:
sample = file.read(2 ** 20) # max 1MB sample
dialect = snifer.sniff(sample)
has_header = snifer.has_header(sample)

return {"delimiter": dialect.delimiter,
"doublequote": dialect.doublequote,
"escapechar": dialect.escapechar,
"quotechar": dialect.quotechar,
"quoting": dialect.quoting,
"skipinitialspace": dialect.skipinitialspace,
"has_header": has_header,
"has_orange_header": False,
"skipinitialspace": True,
"DK": None,
}

if __name__ == "__main__":
import sys
Expand Down
Binary file added Orange/OrangeWidgets/Prototypes/icons/FileCSV.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 3 additions & 2 deletions Orange/data/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,7 @@ def load_csv(file, create_new_on=MakeStatus.Incompatible,
"""Load an Orange.data.Table from a csv file."""

file = as_open_file(file, "rU")
start=file.tell()
snifer = csv.Sniffer()

# Max 5MB sample
Expand All @@ -647,7 +648,7 @@ def load_csv(file, create_new_on=MakeStatus.Incompatible,
except csv.Error:
has_header = False

file.seek(0) # Rewind
file.seek(start) # Rewind

def kwparams(**kwargs):
"""Return not None kwargs.
Expand Down Expand Up @@ -724,7 +725,7 @@ def kwparams(**kwargs):
var_attrs += [None] * (len(header) - len(var_attrs))

# start from the beginning
file.seek(0)
file.seek(start)
reader = csv.reader(file, dialect=dialect, **fmtparam)

for defined in [has_header, has_types, has_annotations]:
Expand Down