Skip to content

Commit c2c4897

Browse files
committed
Changed xml2csv's --header option to --noheader
1 parent 29c459e commit c2c4897

5 files changed

Lines changed: 13 additions & 14 deletions

File tree

README.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# xmlutils.py
2-
xmlutils.py is a set of Python utilities for processing xml files serially,
3-
namely converting them to other formats (SQL, CSV, JSON). The scripts use
4-
ElementTree.iterparse() to iterate through nodes in an XML file, thus not
5-
needing to load the whole DOM into memory. The scripts can be used to churn
2+
xmlutils.py is a set of Python utilities for processing xml files serially
3+
for converting them to various formats (SQL, CSV, JSON). The scripts use
4+
ElementTree.iterparse() to iterate through nodes in an XML document, thus not
5+
needing to load the entire DOM into memory. The scripts can be used to churn
66
through large XML files (albeit taking long :P) without memory hiccups.
77

88
Blind conversion of XML to CSV and SQL is not recommended.
@@ -43,11 +43,11 @@ xml2csv --input "samples/fruits.xml" --output "samples/fruits.csv" --tag "item"
4343
--output Output CSV file's filename*
4444
--tag The tag of the node that represents a single record (Eg: item, record)*
4545
--delimiter Delimiter for seperating items in a row. Default is , (a comma followed by a space)
46-
--ignore A space separated list of element tags in the XML document to ignore.
47-
--header Whether to print the CSV header (list of fields) in the first line; 1=yes, 0=no. Default is 1.
46+
--ignore A space separated list of element tags in the XML document to ignore
47+
--noheader Exclude CSV fields header (first line). Off by default
4848
--encoding Character encoding of the document. Default is utf-8
4949
--limit Limit the number of records to be processed from the document to a particular number. Default is no limit (-1)
50-
--buffer The number of records to be kept in memory before it is written to the output CSV file. Helps reduce the number of disk writes. Default is 1000.
50+
--buffer The number of records to be kept in memory before it is written to the output CSV file. Helps reduce the number of disk writes. Default is 1000
5151
```
5252

5353
##xml2sql

samples/fruits.csv

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
id,name,price,quantity
21
"1000","Apple","4","133"
32
"1001","Apricot","5","175"
43
"1002","Avocado","5","182"

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
setup(
66
name="xmlutils",
7-
version="0.94",
7+
version="1.0",
88
description="A set of utilities for processing XML documents and converting to other formats",
99
author="Kailash Nadh",
1010
author_email="kailash.nadh@gmail.com",

xmlutils/console.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,9 @@ def run_xml2csv():
5050
parser.add_argument('--input', dest='input_file', required=True, help='input xml filename')
5151
parser.add_argument('--output', dest='output_file', required=True, help='output csv filename')
5252
parser.add_argument('--tag', dest='tag', required=True, help='the record tag. eg: item')
53-
parser.add_argument('--delimiter', dest='delimiter', default=',', help='delimiter character. (default=, comma-space)')
53+
parser.add_argument('--delimiter', dest='delimiter', default=',', help='delimiter character. (default=,)')
5454
parser.add_argument('--ignore', dest='ignore', default='', nargs='+', help='list of tags to ignore')
55-
parser.add_argument('--header', dest='header', action='store_false', default=True, help='print csv header (default=True)')
55+
parser.add_argument('--noheader', dest='noheader', action='store_true', help='exclude csv header (default=False)')
5656
parser.add_argument('--encoding', dest='encoding', default='utf-8', help='character encoding (default=utf-8)')
5757
parser.add_argument('--limit', type=int, dest='limit', default=-1, help='maximum number of records to process')
5858
parser.add_argument('--buffer_size', type=int, dest='buffer_size', default='1000',
@@ -62,7 +62,7 @@ def run_xml2csv():
6262

6363
converter = xml2csv(args.input_file, args.output_file, args.encoding)
6464
num = converter.convert(tag=args.tag, delimiter=args.delimiter, ignore=args.ignore,
65-
header=args.header, limit=args.limit, buffer_size=args.buffer_size)
65+
noheader=args.noheader, limit=args.limit, buffer_size=args.buffer_size)
6666

6767
print "\n\nWrote", num, "records to", args.output_file
6868

xmlutils/xml2csv.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def __init__(self, input_file, output_file, encoding='utf-8'):
3636
raise
3737

3838

39-
def convert(self, tag="item", delimiter=",", ignore=[], header=True,
39+
def convert(self, tag="item", delimiter=",", ignore=[], noheader=False,
4040
limit=-1, buffer_size=1000):
4141

4242
"""Convert the XML file to SQL file
@@ -69,7 +69,7 @@ def convert(self, tag="item", delimiter=",", ignore=[], header=True,
6969
# if elem is an unignored child node of the record tag, it should be written to buffer
7070
should_write = elem.tag != tag and started and elem.tag not in ignore
7171
# and if a header is required and if there isn't one
72-
should_tag = not tagged and should_write and header
72+
should_tag = not tagged and should_write and not noheader
7373

7474
if event == 'start':
7575
if elem.tag == tag and not started:

0 commit comments

Comments
 (0)