Skip to content

Commit ba41645

Browse files
authored
Merge pull request #56 from MartinPacker/OPML-import
Opml import
2 parents 6c3dc89 + dfd9d59 commit ba41645

File tree

3 files changed

+156
-33
lines changed

3 files changed

+156
-33
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,11 +159,12 @@ will merge any bullet whose text or note is 'A1' with its parent. The text of th
159159

160160
### Input Files
161161

162-
Input files can be in one of four formats:
162+
Input files can be in one of five formats:
163163

164164
* A CSV file that is already in a format supported by iThoughts' Import function.
165165
* A flat file where each line is a new node. Spaces and tabs can be used to indent the text. Here the level of indentation is used to control what level the line is added at.
166166
* A Markdown nested list where each line is a new node. Spaces and tabs can be used to indent the text. Here the level of indentation is used to control what level the line is added at. Only an asterisk (`*`) followed by a space is supported as a list item marker.
167+
* An OPML XML file - with or without `head` or `body` elements.
167168
* An XML file, including one with namespaces (both default and named).
168169

169170
#### Nesting Level Detection

filterCSV

Lines changed: 153 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ import xml.etree.ElementTree as ElementTree
3333

3434
# from CSVTree import CSVTree
3535

36-
filterCSV_level = "1.4"
37-
filterCSV_date = "27 June, 2020"
36+
filterCSV_level = "1.5"
37+
filterCSV_date = "13 July, 2020"
3838

3939

4040
class ParameterParser:
@@ -269,35 +269,31 @@ class TreeReader:
269269
csvRows.append(newRow)
270270

271271
return self.ensureMandatoryColumns(csvRows) + (output,)
272+
273+
def readOPMLTree(self, tree):
274+
self.XMLNamespaces = {}
272275

273-
def readXMLTree(self, inputFile):
274-
output = []
275-
276-
# Prepare the input text for namespace parsing and XML parsing
277-
XMLText = "\n".join(inputFile)
278-
279-
# Create the XML parse tree
280-
self.XMLTree = ElementTree.fromstring(XMLText)
281-
282-
# Hunt for the default namespace
283-
split1 = XMLText.split('xmlns="')
284-
if len(split1) == 1:
285-
output.append("No default namespace specification.")
286-
self.defaultXMLNamespace = ""
276+
output = ["XML is specifically 'OPML'.\n"]
277+
278+
if tree[0].tag == "head":
279+
# Level 0 node will be the contents of the title element within the
280+
# head element
281+
titleText = tree[0][0].text.strip()
282+
haveHead = True
283+
284+
if (len(tree) > 1) & (tree[1].tag == "body"):
285+
haveBody = True
286+
bodyElement = tree[1]
287+
else:
288+
haveBody = False
287289
else:
288-
self.defaultXMLNamespace = split1[1].split('"')[0]
289-
output.append(f"Default namespace is '{self.defaultXMLNamespace}'")
290-
291-
# Hunt for other namespaces
292-
self.XMLNamespaces = {}
293-
split3 = XMLText.split("xmlns:")
294-
for fragment in range(len(split3)):
295-
if fragment > 0:
296-
split4 = split3[fragment].split('="')
297-
key = split4[0]
298-
split5 = split4[1].split('"')
299-
value = split5[0]
300-
self.XMLNamespaces[key] = value
290+
haveHead = False
291+
292+
if tree[0].tag == "body":
293+
haveBody = True
294+
bodyElement = tree[0]
295+
else:
296+
haveBody = False
301297

302298
# Build array of rows
303299
csvRows = []
@@ -333,10 +329,136 @@ class TreeReader:
333329
"level20",
334330
],
335331
)
332+
333+
if haveHead:
334+
headCSVRow = [
335+
"",
336+
"",
337+
"",
338+
"0",
339+
titleText
340+
]
341+
342+
csvRows.append(headCSVRow)
343+
344+
if haveBody:
345+
# Any level 1+ elements are children of the body element
346+
for child in bodyElement:
347+
csvRows += self._readOPMLTree(child,1)
348+
else:
349+
# All top-level children of tree, except head, are level 1
350+
for child in tree:
351+
if child.tag != "head":
352+
csvRows += self._readOPMLTree(child,1)
353+
else:
354+
# Don't have a head row so have to look for body or top-level outline
355+
# elements
356+
if haveBody:
357+
# All top-level children of body element are level 0
358+
for child in bodyElement:
359+
csvRows += self._readOPMLTree(child,0)
360+
else:
361+
# All top-level children of tree are level 0
362+
for child in tree:
363+
csvRows += self._readOPMLTree(child,0)
364+
365+
return self.ensureMandatoryColumns(csvRows) + (output,)
336366

337-
csvRows += self._readXMLTree(self.XMLTree, 0)
367+
def _readOPMLTree(self,XMLNode,level):
368+
csvRows = []
369+
nodeText = XMLNode.attrib["text"]
338370

339-
return self.ensureMandatoryColumns(csvRows) + (output,)
371+
nodeRow = [
372+
"",
373+
"",
374+
"",
375+
str(level)
376+
]
377+
378+
levelBlankCells = [""] * (level)
379+
nodeRow += levelBlankCells
380+
381+
nodeRow.append(nodeText)
382+
csvRows.append(nodeRow)
383+
384+
for child in XMLNode:
385+
csvRows += self._readOPMLTree(child, level + 1)
386+
387+
return csvRows
388+
389+
def readXMLTree(self, inputFile):
390+
output = []
391+
392+
# Prepare the input text for namespace parsing and XML parsing
393+
XMLText = "\n".join(inputFile)
394+
395+
# Create the XML parse tree
396+
self.XMLTree = ElementTree.fromstring(XMLText)
397+
398+
# Check if OPML
399+
if self.XMLTree.tag == "opml":
400+
# Is OPML so treat separately from other XML
401+
return self.readOPMLTree(self.XMLTree)
402+
else:
403+
# Is not OPML
404+
# Hunt for the default namespace
405+
split1 = XMLText.split('xmlns="')
406+
if len(split1) == 1:
407+
output.append("No default namespace specification.")
408+
self.defaultXMLNamespace = ""
409+
else:
410+
self.defaultXMLNamespace = split1[1].split('"')[0]
411+
output.append(f"Default namespace is '{self.defaultXMLNamespace}'")
412+
413+
# Hunt for other namespaces
414+
self.XMLNamespaces = {}
415+
split3 = XMLText.split("xmlns:")
416+
for fragment in range(len(split3)):
417+
if fragment > 0:
418+
split4 = split3[fragment].split('="')
419+
key = split4[0]
420+
split5 = split4[1].split('"')
421+
value = split5[0]
422+
self.XMLNamespaces[key] = value
423+
424+
# Build array of rows
425+
csvRows = []
426+
427+
# Insert a header row, with attribute columns and a level0 column plus other
428+
# levels
429+
csvRows.append(
430+
[
431+
"position",
432+
"colour",
433+
"shape",
434+
"level",
435+
"level0",
436+
"level1",
437+
"level2",
438+
"level3",
439+
"level4",
440+
"level5",
441+
"level6",
442+
"level7",
443+
"level8",
444+
"level9",
445+
"level10",
446+
"level11",
447+
"level12",
448+
"level13",
449+
"level14",
450+
"level15",
451+
"level16",
452+
"level17",
453+
"level18",
454+
"level19",
455+
"level20",
456+
],
457+
)
458+
459+
csvRows += self._readXMLTree(self.XMLTree, 0)
460+
461+
return self.ensureMandatoryColumns(csvRows) + (output,)
340462

341463
def resolveNamespaces(self, textToEdit):
342464
editedText = textToEdit.replace("{" + self.defaultXMLNamespace + "}", "")

tests/iThoughts-OPML.opml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<opml version="1.0">
22
<head>
33
<title>
4-
Central Idea
4+
Central Idea - Really Level 0
55
</title>
66
</head>
77
<body>

0 commit comments

Comments
 (0)