-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathooolib.py
More file actions
1865 lines (1681 loc) · 74 KB
/
ooolib.py
File metadata and controls
1865 lines (1681 loc) · 74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"ooolib-python - Copyright (C) 2006-2008 Joseph Colton"
# ooolib-python - Python module for creating Open Document Format documents.
# Copyright (C) 2006-2008 Joseph Colton
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
# You can contact me by email at josephcolton@gmail.com
# Import Standard Modules
import zipfile # Needed for reading/writing documents
import time
import sys
import glob
import os
import re
import xml.parsers.expat # Needed for parsing documents
def version():
"Get the ooolib-python version"
return "ooolib-python-0.0.16"
def clean_string(data):
"Returns an XML friendly copy of the data string"
data = unicode(data) # This line thanks to Chris Ender
data = data.replace('&', '&')
data = data.replace("'", ''')
data = data.replace('"', '"')
data = data.replace('<', '<')
data = data.replace('>', '>')
data = data.replace('\t', '<text:tab-stop/>')
data = data.replace('\n', '<text:line-break/>')
return data
class XML:
"XML Class - Used to convert nested lists into XML"
def __init__(self):
"Initialize ooolib XML instance"
pass
def _xmldata(self, data):
datatype = data.pop(0)
datavalue = data.pop(0)
outstring = '%s' % datavalue
return outstring
def _xmltag(self, data):
outstring = ''
# First two
datatype = data.pop(0)
dataname = data.pop(0)
outstring = '<%s' % dataname
# Element Section
element = 1
while(data):
# elements
newdata = data.pop(0)
if (newdata[0] == 'element' and element):
newstring = self._xmlelement(newdata)
outstring = '%s %s' % (outstring, newstring)
continue
if (newdata[0] != 'element' and element):
element = 0
outstring = '%s>' % outstring
if (newdata[0] == 'tag' or newdata[0] == 'tagline'):
outstring = '%s\n' % outstring
if (newdata[0] == 'tag'):
newstring = self._xmltag(newdata)
outstring = '%s%s' % (outstring, newstring)
continue
if (newdata[0] == 'tagline'):
newstring = self._xmltagline(newdata)
outstring = '%s%s' % (outstring, newstring)
continue
if (newdata[0] == 'data'):
newstring = self._xmldata(newdata)
outstring = '%s%s' % (outstring, newstring)
continue
if (element):
element = 0
outstring = '%s>\n' % outstring
outstring = '%s</%s>\n' % (outstring, dataname)
return outstring
def _xmltagline(self, data):
outstring = ''
# First two
datatype = data.pop(0)
dataname = data.pop(0)
outstring = '<%s' % dataname
# Element Section
while(data):
# elements
newdata = data.pop(0)
if (newdata[0] != 'element'): break
newstring = self._xmlelement(newdata)
outstring = '%s %s' % (outstring, newstring)
outstring = '%s/>\n' % outstring
# Non-Element Section should not exist
return outstring
def _xmlelement(self, data):
datatype = data.pop(0)
dataname = data.pop(0)
datavalue = data.pop(0)
outstring = '%s="%s"' % (dataname, datavalue)
return outstring
def convert(self, data):
"""Convert nested lists into XML
The convert method takes a nested lists and converts them
into XML to be used in Open Document Format documents.
There are three types of lists that are recognized at this
time. They are as follows:
'tag' - Tag opens a set of data that is eventually closed
with a similar tag.
List: ['tag', 'xml']
XML: <xml></xml>
'tagline' - Taglines are similar to tags, except they open
and close themselves.
List: ['tagline', 'xml']
XML: <xml/>
'element' - Elements are pieces of information stored in an
opening tag or tagline.
List: ['element', 'color', 'blue']
XML: color="blue"
'data' - Data is plain text directly inserted into the XML
document.
List: ['data', 'hello']
XML: hello
Bring them all together for something like this.
Lists:
['tag', 'xml', ['element', 'a', 'b'], ['tagline', 'xml2'],
['data', 'asdf']]
XML:
<xml a="b"><xml2/>asdf</xml>
"""
outlines = []
outlines.append('<?xml version="1.0" encoding="UTF-8"?>')
if (type(data) == type([]) and len(data) > 0):
if data[0] == 'tag': outlines.append(self._xmltag(data))
return outlines
class Meta:
"Meta Data Class"
def __init__(self, doctype, debug=False):
self.doctype = doctype
# Set the debug mode
self.debug = debug
# The generator should always default to the version number
self.meta_generator = version()
self.meta_title = ''
self.meta_subject = ''
self.meta_description = ''
self.meta_keywords = []
self.meta_creator = 'ooolib-python'
self.meta_editor = ''
self.meta_user1_name = 'Info 1'
self.meta_user2_name = 'Info 2'
self.meta_user3_name = 'Info 3'
self.meta_user4_name = 'Info 4'
self.meta_user1_value = ''
self.meta_user2_value = ''
self.meta_user3_value = ''
self.meta_user4_value = ''
self.meta_creation_date = self.meta_time()
# Parser data
self.parser_element_list = []
self.parser_element = ""
self.parser_count = 0
def set_meta(self, metaname, value):
"""Set meta data in your document.
Currently implemented metaname options are as follows:
'creator' - The document author
"""
if metaname == 'creator': self.meta_creator = value
if metaname == 'editor': self.meta_editor = value
if metaname == 'title': self.meta_title = value
if metaname == 'subject': self.meta_subject = value
if metaname == 'description': self.meta_description = value
if metaname == 'user1name': self.meta_user1_name = value
if metaname == 'user2name': self.meta_user2_name = value
if metaname == 'user3name': self.meta_user3_name = value
if metaname == 'user4name': self.meta_user4_name = value
if metaname == 'user1value': self.meta_user1_value = value
if metaname == 'user2value': self.meta_user2_value = value
if metaname == 'user3value': self.meta_user3_value = value
if metaname == 'user4value': self.meta_user4_value = value
if metaname == 'keyword':
if value not in self.meta_keywords:
self.meta_keywords.append(value)
def get_meta_value(self, metaname):
"Get meta data value for a given metaname."
if metaname == 'creator': return self.meta_creator
if metaname == 'editor': return self.meta_editor
if metaname == 'title': return self.meta_title
if metaname == 'subject': return self.meta_subject
if metaname == 'description': return self.meta_description
if metaname == 'user1name': return self.meta_user1_name
if metaname == 'user2name': return self.meta_user2_name
if metaname == 'user3name': return self.meta_user3_name
if metaname == 'user4name': return self.meta_user4_name
if metaname == 'user1value': return self.meta_user1_value
if metaname == 'user2value': return self.meta_user2_value
if metaname == 'user3value': return self.meta_user3_value
if metaname == 'user4value': return self.meta_user4_value
if metaname == 'keyword': return self.meta_keywords
def meta_time(self):
"Return time string in meta data format"
t = time.localtime()
stamp = "%04d-%02d-%02dT%02d:%02d:%02d" % (t[0], t[1], t[2], t[3], t[4], t[5])
return stamp
def parse_start_element(self, name, attrs):
if self.debug: print '* Start element:', name
self.parser_element_list.append(name)
self.parser_element = self.parser_element_list[-1]
# Need the meta name from the user-defined tags
if (self.parser_element == "meta:user-defined"):
self.parser_count += 1
# Set user-defined name
self.set_meta("user%dname" % self.parser_count, attrs['meta:name'])
# Debugging statements
if self.debug: print " List: ", self.parser_element_list
if self.debug: print " Attributes: ", attrs
def parse_end_element(self, name):
if self.debug: print '* End element:', name
if name != self.parser_element:
print "Tag Mismatch: '%s' != '%s'" % (name, self.parser_element)
self.parser_element_list.pop()
# Readjust parser_element_list and parser_element
if (self.parser_element_list):
self.parser_element = self.parser_element_list[-1]
else:
self.parser_element = ""
def parse_char_data(self, data):
if self.debug: print " Character data: ", repr(data)
# Collect Meta data fields
if (self.parser_element == "dc:title"):
self.set_meta("title", data)
if (self.parser_element == "dc:description"):
self.set_meta("description", data)
if (self.parser_element == "dc:subject"):
self.set_meta("subject", data)
if (self.parser_element == "meta:initial-creator"):
self.set_meta("creator", data)
# Try to maintain the same creation date
if (self.parser_element == "meta:creation-date"):
self.meta_creation_date = data
# The user defined fields need to be kept track of, parser_count does that
if (self.parser_element == "meta:user-defined"):
self.set_meta("user%dvalue" % self.parser_count, data)
def meta_parse(self, data):
"Parse Meta Data from a meta.xml file"
# Debugging statements
if self.debug:
# Sometimes it helps to see the document that was read from
print data
print "\n\n\n"
# Create parser
parser = xml.parsers.expat.ParserCreate()
# Set up parser callback functions
parser.StartElementHandler = self.parse_start_element
parser.EndElementHandler = self.parse_end_element
parser.CharacterDataHandler = self.parse_char_data
# Actually parse the data
parser.Parse(data, 1)
def get_meta(self):
"Generate meta.xml file data"
self.meta_date = self.meta_time()
self.data = ['tag', 'office:document-meta',
['element', 'xmlns:office', 'urn:oasis:names:tc:opendocument:xmlns:office:1.0'],
['element', 'xmlns:xlink', 'http://www.w3.org/1999/xlink'],
['element', 'xmlns:dc', 'http://purl.org/dc/elements/1.1/'],
['element', 'xmlns:meta', 'urn:oasis:names:tc:opendocument:xmlns:meta:1.0'],
['element', 'xmlns:ooo', 'http://openoffice.org/2004/office'],
['element', 'office:version', '1.0'],
['tag', 'office:meta',
['tag', 'meta:generator', # Was: 'OpenOffice.org/2.0$Linux OpenOffice.org_project/680m5$Build-9011'
['data', self.meta_generator]], # Generator is set the the ooolib-python version.
['tag', 'dc:title',
['data', self.meta_title]], # This data is the document title
['tag', 'dc:description',
['data', self.meta_description]], # This data is the document description
['tag', 'dc:subject',
['data', self.meta_subject]], # This data is the document subject
['tag', 'meta:initial-creator',
['data', self.meta_creator]], # This data is the document creator
['tag', 'meta:creation-date',
['data', self.meta_creation_date]], # This is the original creation date of the document
['tag', 'dc:creator',
['data', self.meta_editor]], # This data is the document editor
['tag', 'dc:date',
['data', self.meta_date]], # This is the last modified date of the document
['tag', 'dc:language',
['data', 'en-US']], # We will probably always use en-US for language
['tag', 'meta:editing-cycles',
['data', '1']], # Edit cycles will probably always be 1 for generated documents
['tag', 'meta:editing-duration',
['data', 'PT0S']], # Editing duration is modified - creation date
['tag', 'meta:user-defined',
['element', 'meta:name', self.meta_user1_name],
['data', self.meta_user1_value]],
['tag', 'meta:user-defined',
['element', 'meta:name', self.meta_user2_name],
['data', self.meta_user2_value]],
['tag', 'meta:user-defined',
['element', 'meta:name', self.meta_user3_name],
['data', self.meta_user3_value]],
['tag', 'meta:user-defined',
['element', 'meta:name', self.meta_user4_name],
['data', self.meta_user4_value]]]]
# ['tagline', 'meta:document-statistic',
# ['element', 'meta:table-count', len(self.sheets)], # len(self.sheets) ?
# ['element', 'meta:cell-count', '15']]]] # Not sure how to keep track
# Generate content.xml XML data
xml = XML()
self.lines = xml.convert(self.data)
self.filedata = '\n'.join(self.lines)
# Return generated data
return self.filedata
class CalcStyles:
"Calc Style Management - Used to keep track of created styles."
def __init__(self):
self.style_config = {}
# Style Counters
self.style_table = 1
self.style_column = 1
self.style_row = 1
self.style_cell = 1
# Style Properties (Defaults) - To be used later
self.property_column_width_default = '0.8925in' # Default Column Width
self.property_row_height_default = '0.189in' # Default Row Height
# Set Defaults
self.property_column_width = '0.8925in' # Default Column Width
self.property_row_height = '0.189in' # Default Row Height
self.property_cell_bold = False # Bold off be default
self.property_cell_italic = False # Italic off be default
self.property_cell_underline = False # Underline off be default
self.property_cell_fg_color = 'default' # Text Color Default
self.property_cell_bg_color = 'default' # Cell Background Default
self.property_cell_fontsize = '10' # Cell Font Size Default
self.property_cell_valign = 'default' # Vertial Alignment Default
self.property_cell_halign = 'default' # Horizantal Alignment Default
def get_next_style(self, style):
"Returns the next style code for the given style"
style_code = ""
if style == 'table':
style_code = 'ta%d' % self.style_table
self.style_table+=1
if style == 'column':
style_code = 'co%d' % self.style_column
self.style_column+=1
if style == 'row':
style_code = 'ro%d' % self.style_row
self.style_row+=1
if style == 'cell':
style_code = 'ce%d' % self.style_cell
self.style_cell+=1
return style_code
def set_property(self, style, name, value):
"Sets a property which will later be turned into a code"
if style == 'table':
pass
if style == 'column':
if name == 'style:column-width': self.property_column_width = value
if style == 'row':
if name == 'style:row-height': self.property_row_height = value
if style == 'cell':
if name == 'bold' and type(value) == type(True): self.property_cell_bold = value
if name == 'italic' and type(value) == type(True): self.property_cell_italic = value
if name == 'underline' and type(value) == type(True): self.property_cell_underline = value
if name == 'fontsize': self.property_cell_fontsize = value
if name == 'color':
self.property_cell_fg_color = 'default'
redata = re.search("^(#[\da-fA-F]{6})$", value)
if redata: self.property_cell_fg_color = value.lower()
if name == 'background':
self.property_cell_bg_color = 'default'
redata = re.search("^(#[\da-fA-F]{6})$", value)
if redata: self.property_cell_bg_color = value.lower()
if name == 'valign':
self.property_cell_valign = value
if name == 'halign':
self.property_cell_halign = value
def get_style_code(self, style):
style_code = ""
if style == 'table':
style_code = "ta1"
if style == 'column':
style_data = tuple([style,
('style:column-width', self.property_column_width)])
if style_data in self.style_config:
# Style Exists, return code
style_code = self.style_config[style_data]
else:
# Style does not exist, create code and return it
style_code = self.get_next_style(style)
self.style_config[style_data] = style_code
if style == 'row':
style_data = tuple([style,
('style:row-height', self.property_row_height)])
if style_data in self.style_config:
# Style Exists, return code
style_code = self.style_config[style_data]
else:
# Style does not exist, create code and return it
style_code = self.get_next_style(style)
self.style_config[style_data] = style_code
if style == 'cell':
style_data = [style]
# Add additional styles
if self.property_cell_bold: style_data.append(('bold', True))
if self.property_cell_italic: style_data.append(('italic', True))
if self.property_cell_underline: style_data.append(('underline', True))
if self.property_cell_fontsize != '10': style_data.append(('fontsize', self.property_cell_fontsize))
if self.property_cell_fg_color != 'default': style_data.append(('color', self.property_cell_fg_color))
if self.property_cell_bg_color != 'default': style_data.append(('background', self.property_cell_bg_color))
if self.property_cell_valign != 'default': style_data.append(('valign', self.property_cell_valign))
if self.property_cell_halign != 'default': style_data.append(('halign', self.property_cell_halign))
style_data = tuple(style_data)
if style_data in self.style_config:
# Style Exists, return code
style_code = self.style_config[style_data]
else:
# Style does not exist, create code and return it
style_code = self.get_next_style(style)
self.style_config[style_data] = style_code
return style_code
def get_automatic_styles(self):
"Return 'office:automatic-styles' lists"
automatic_styles = ['tag', 'office:automatic-styles']
for style_data in self.style_config:
style_code = self.style_config[style_data]
style_data = list(style_data)
style = style_data.pop(0)
if style == 'column':
style_list = ['tag', 'style:style',
['element', 'style:name', style_code], # Column 'co1' properties
['element', 'style:family', 'table-column']]
tagline = ['tagline', 'style:table-column-properties',
['element', 'fo:break-before', 'auto']] # unsure what break before means
for set in style_data:
name, value = set
if name == 'style:column-width':
tagline.append(['element', 'style:column-width', value])
style_list.append(tagline)
automatic_styles.append(style_list)
if style == 'row':
style_list = ['tag', 'style:style',
['element', 'style:name', style_code], # Column 'ro1' properties
['element', 'style:family', 'table-row']]
tagline = ['tagline', 'style:table-row-properties']
for set in style_data:
name, value = set
if name == 'style:row-height':
tagline.append(['element', 'style:row-height', value])
tagline.append(['element', 'fo:break-before', 'auto'])
# tagline.append(['element', 'style:use-optimal-row-height', 'true']) # Overrides settings
style_list.append(tagline)
automatic_styles.append(style_list)
if style == 'cell':
style_list = ['tag', 'style:style',
['element', 'style:name', style_code], # ce1 style
['element', 'style:family', 'table-cell'], # cell
['element', 'style:parent-style-name', 'Default']] # parent is Default
# Cell Properties
tagline = ['tagline', 'style:table-cell-properties']
for set in style_data:
name, value = set
if name == 'background':
tagline.append(['element', 'fo:background-color', value])
if name == 'valign':
if value in ['top', 'bottom', 'middle']:
tagline.append(['element', 'style:vertical-align', value])
if name == 'halign':
tagline.append(['element', 'style:text-align-source', 'fix'])
if value in ['filled']:
tagline.append(['element', 'style:repeat-content', 'true'])
else:
tagline.append(['element', 'style:repeat-content', 'false'])
style_list.append(tagline)
# Paragraph Properties
tagline = ['tagline', 'style:paragraph-properties']
tagline_valid = False
for set in style_data:
name, value = set
if name == 'halign':
tagline_valid = True
if value in ['center']:
tagline.append(['element', 'fo:text-align', 'center'])
if value in ['end', 'right']:
tagline.append(['element', 'fo:text-align', 'end'])
if value in ['start', 'filled', 'left']:
tagline.append(['element', 'fo:text-align', 'start'])
if value in ['justify']:
tagline.append(['element', 'fo:text-align', 'justify'])
# Conditionally add the tagline
if tagline_valid: style_list.append(tagline)
# Text Properties
tagline = ['tagline', 'style:text-properties']
for set in style_data:
name, value = set
if name == 'bold':
tagline.append(['element', 'fo:font-weight', 'bold'])
if name == 'italic':
tagline.append(['element', 'fo:font-style', 'italic'])
if name == 'underline':
tagline.append(['element', 'style:text-underline-style', 'solid'])
tagline.append(['element', 'style:text-underline-width', 'auto'])
tagline.append(['element', 'style:text-underline-color', 'font-color'])
if name == 'color':
tagline.append(['element', 'fo:color', value])
if name == 'fontsize':
tagline.append(['element', 'fo:font-size', '%spt' % value])
style_list.append(tagline)
automatic_styles.append(style_list)
# Attach ta1 style
automatic_styles.append(['tag', 'style:style',
['element', 'style:name', 'ta1'],
['element', 'style:family', 'table'],
['element', 'style:master-page-name', 'Default'],
['tagline', 'style:table-properties',
['element', 'table:display', 'true'],
['element', 'style:writing-mode', 'lr-tb']]])
return automatic_styles
class CalcSheet:
"Calc Sheet Class - Used to keep track of the data for an individual sheet."
def __init__(self, sheetname):
"Initialize a sheet"
self.sheet_name = sheetname
self.sheet_values = {}
self.sheet_config = {}
self.max_col = 0
self.max_row = 0
def get_sheet_dimensions(self):
"Returns the max column and row"
return (self.max_col, self.max_row)
def clean_formula(self, data):
"Returns a formula for use in ODF"
# Example Translations
# '=SUM(A1:A2)'
# datavalue = 'oooc:=SUM([.A1:.A2])'
# '=IF((A5>A4);A4;"")'
# datavalue = 'oooc:=IF(([.A5]>[.A4]);[.A4];"")'
data = str(data)
data = clean_string(data)
redata = re.search('^=([A-Z]+)(\(.*)$', data)
if redata:
# funct is the function name. The rest if the string will be the functArgs
funct = redata.group(1)
functArgs = redata.group(2)
# Search for cell lebels and replace them
reList = re.findall('([A-Z]+\d+)', functArgs)
# sort and keep track so we do not do a cell more than once
reList.sort()
lastVar = ''
while reList:
# Replace each cell label
curVar = reList.pop()
if curVar == lastVar: continue
lastVar = curVar
functArgs = functArgs.replace(curVar, '[.%s]' % curVar)
data = 'oooc:=%s%s' % (funct, functArgs)
return data
def get_name(self):
"Returns the sheet name"
return self.sheet_name
def set_name(self, sheetname):
"Resets the sheet name"
self.sheet_name = sheetname
def get_sheet_values(self):
"Returns the sheet cell values"
return self.sheet_values
def get_sheet_value(self, col, row):
"Get the value contents of a cell"
cell = (col, row)
if cell in self.sheet_values:
return self.sheet_values[cell]
else:
return None
def get_sheet_config(self):
"Returns the sheet cell properties"
return self.sheet_config
def set_sheet_config(self, location, style_code):
"Sets Style Code for a given location"
self.sheet_config[location] = style_code
def set_sheet_value(self, cell, datatype, datavalue):
"""Sets the value for a specific cell
cell must be in the format (col, row) where row and col are int.
Example: B5 would be written as (2, 5)
datatype must be one of 'string', 'float', 'formula'
datavalue should be a string
"""
# Catch invalid data
if type(cell) != type(()) or len(cell) != 2:
print "Invalid Cell"
return
(col, row) = cell
if type(col) != type(1):
print "Invalid Cell"
return
if type(row) != type(1):
print "Invalid Cell"
return
# Fix String Data
if datatype in ['string', 'annotation']:
datavalue = clean_string(datavalue)
# Fix Link Data. Link's value is a tuple containing (url, description)
if (datatype == 'link'):
url = clean_string(datavalue[0])
desc = clean_string(datavalue[1])
datavalue = (url, desc)
# Fix Formula Data
if datatype == 'formula':
datavalue = self.clean_formula(datavalue)
# Adjust maximum sizes
if col > self.max_col: self.max_col = col
if row > self.max_row: self.max_row = row
datatype = str(datatype)
if (datatype not in ['string', 'float', 'formula', 'annotation', 'link']):
# Set all unknown cell types to string
datatype = 'string'
datavalue = str(datavalue)
# The following lines are taken directly from HPS
# self.sheet_values[cell] = (datatype, datavalue)
# HPS: Cell content is now a list of tuples instead of a tuple
# While storing here, store the cell contents first and the annotation next. While generating the XML reverse this
contents = self.sheet_values.get(cell, {'annotation':None,'link':None, 'value':None})
if datatype == 'annotation':
contents['annotation'] = (datatype, datavalue)
elif datatype == 'link':
contents['link'] = (datatype, datavalue)
else:
contents['value'] = (datatype, datavalue)
self.sheet_values[cell] = contents
def get_lists(self):
"Returns nested lists for XML processing"
if (self.max_col == 0 and self.max_row == 0):
sheet_lists = ['tag', 'table:table',
['element', 'table:name', self.sheet_name], # Set the Sheet Name
['element', 'table:style-name', 'ta1'],
['element', 'table:print', 'false'],
['tagline', 'table:table-column',
['element', 'table:style-name', 'co1'],
['element', 'table:default-cell-style-name', 'Default']],
['tag', 'table:table-row',
['element', 'table:style-name', 'ro1'],
['tagline', 'table:table-cell']]]
else:
# Base Information
sheet_lists = ['tag', 'table:table',
['element', 'table:name', self.sheet_name], # Set the sheet name
['element', 'table:style-name', 'ta1'],
['element', 'table:print', 'false']]
# ['tagline', 'table:table-column',
# ['element', 'table:style-name', 'co1'],
# ['element', 'table:number-columns-repeated', self.max_col], # max_col? '2'
# ['element', 'table:default-cell-style-name', 'Default']],
# Need to add column information
for col in range(1, self.max_col+1):
location = ('col', col)
style_code = 'co1'
if location in self.sheet_config:
style_code = self.sheet_config[location]
sheet_lists.append(['tagline', 'table:table-column',
['element', 'table:style-name', style_code],
['element', 'table:default-cell-style-name', 'Default']])
# Need to create each row
for row in range(1, self.max_row + 1):
location = ('row', row)
style_code = 'ro1'
if location in self.sheet_config:
style_code = self.sheet_config[location]
rowlist = ['tag', 'table:table-row',
['element', 'table:style-name', style_code]]
for col in range(1, self.max_col + 1):
cell = (col, row)
style_code = 'ce1' # Default all cells to ce1
if cell in self.sheet_config:
style_code = self.sheet_config[cell] # Lookup cell if available
if cell in self.sheet_values:
# (datatype, datavalue) = self.sheet_values[cell] # Marked for removal
collist = ['tag', 'table:table-cell']
if style_code != 'ce1':
collist.append(['element', 'table:style-name', style_code])
# Contents, annotations, and links added by HPS
contents = self.sheet_values[cell] # cell contents is a dictionary
if contents['value']:
(datatype, datavalue) = contents['value']
if datatype == 'float':
collist.append(['element', 'office:value-type', datatype])
collist.append(['element', 'office:value', datavalue])
if datatype == 'string':
collist.append(['element', 'office:value-type', datatype])
if datatype == 'formula':
collist.append(['element', 'table:formula', datavalue])
collist.append(['element', 'office:value-type', 'float'])
collist.append(['element', 'office:value', '0'])
datavalue = '0'
else:
datavalue = None
if contents['annotation']:
(annotype, annoval) = contents['annotation']
collist.append(['tag', 'office:annotation',
['tag', 'text:p', ['data', annoval]]])
if contents['link']:
(linktype, linkval) = contents['link']
if datavalue:
collist.append(['tag', 'text:p', ['data', datavalue],
['tag', 'text:a', ['element', 'xlink:href', linkval[0]],
['data', linkval[1]]]])
else: # no value; just fill the link
collist.append(['tag', 'text:p',
['tag', 'text:a', ['element', 'xlink:href', linkval[0]],
['data', linkval[1]]]])
else:
if datavalue:
collist.append(['tag', 'text:p', ['data', datavalue]])
else:
collist = ['tagline', 'table:table-cell']
rowlist.append(collist)
sheet_lists.append(rowlist)
return sheet_lists
class Calc:
"Calc Class - Used to create OpenDocument Format Calc Spreadsheets."
def __init__(self, sheetname=None, opendoc=None, debug=False):
"Initialize ooolib Calc instance"
# Default to no debugging
self.debug = debug
if not sheetname: sheetname = "Sheet1"
self.sheets = [CalcSheet(sheetname)] # The main sheet will be initially called 'Sheet1'
self.sheet_index = 0 # We initially start on the first sheet
self.styles = CalcStyles()
self.meta = Meta('ods')
self.styles.get_style_code('column') # Force generation of default column
self.styles.get_style_code('row') # Force generation of default row
self.styles.get_style_code('table') # Force generation of default table
self.styles.get_style_code('cell') # Force generation of default cell
# Data Parsing
self.parser_element_list = []
self.parser_element = ""
self.parser_sheet_num = 0
self.parser_sheet_row = 0
self.parser_sheet_column = 0
self.parser_cell_repeats = 0
self.parser_cell_string_pending = False
self.parser_cell_string_line = ""
# See if we need to read a document
if opendoc:
# Verify that the document exists
if self.debug: print "Opening Document: %s" % opendoc
# Okay, now we load the file
self.load(opendoc)
def debug_level(self, level):
"""Set debug level:
True if you want debugging messages
False if you do not.
"""
self.debug = level
def set_meta(self, metaname, value):
"Set meta data in your document."
self.meta.set_meta(metaname, value)
def get_meta_value(self, metaname):
"Get meta data value for a given metaname"
return self.meta.get_meta_value(metaname)
def get_sheet_name(self):
"Returns the sheet name"
return self.sheets[self.sheet_index].get_name()
def get_sheet_dimensions(self):
"Returns the sheet dimensions in (cols, rows)"
return self.sheets[self.sheet_index].get_sheet_dimensions()
def set_column_property(self, column, name, value):
"Set Column Properties"
if name == 'width':
# column number column needs column-width set to value
self.styles.set_property('column', 'style:column-width', value)
style_code = self.styles.get_style_code('column')
self.sheets[self.sheet_index].set_sheet_config(('col', column), style_code)
def set_row_property(self, row, name, value):
"Set row Properties"
if name == 'height':
# row number row needs row-height set to value
self.styles.set_property('row', 'style:row-height', value)
style_code = self.styles.get_style_code('row')
self.sheets[self.sheet_index].set_sheet_config(('row', row), style_code)
def set_cell_property(self, name, value):
"""Turn and off cell properties
Actual application of properties is handled by setting a value."""
self.styles.set_property('cell', name, value)
def get_sheet_index(self):
"Return the current sheet index number"
return self.sheet_index
def set_sheet_index(self, index):
"Set the sheet index"
if type(index) == type(1):
if index >= 0 and index < len(self.sheets):
self.sheet_index = index
return self.sheet_index
def get_sheet_count(self):
"Returns the number of existing sheets"
return len(self.sheets)
def new_sheet(self, sheetname):
"Create a new sheet"
self.sheet_index = len(self.sheets)
self.sheets.append(CalcSheet(sheetname))
return self.sheet_index
def set_cell_value(self, col, row, datatype, value):
"Set the value for a given cell"
self.sheets[self.sheet_index].set_sheet_value((col, row), datatype, value)
style_code = self.styles.get_style_code('cell')
self.sheets[self.sheet_index].set_sheet_config((col, row), style_code)
def get_cell_value(self, col, row):
"Get a cell value tuple (type, value) for a given cell"
sheetvalue = self.sheets[self.sheet_index].get_sheet_value(col, row)
# We stop here if there is no value for sheetvalue
if sheetvalue == None: return sheetvalue
# Now check to see if we have a value tuple
if 'value' in sheetvalue:
return sheetvalue['value']
else:
return None
def load(self, filename):
"""Load .ods spreadsheet.
The load function loads data from a document into the current cells.
"""
# Read in the important files
# meta.xml
data = self._zip_read(filename, "meta.xml")
self.meta.meta_parse(data)
# content.xml
data = self._zip_read(filename, "content.xml")
self.content_parse(data)
# settings.xml - I do not remember putting anything here
# styles.xml - I do not remember putting anything here
def parse_content_start_element(self, name, attrs):
if self.debug: print '* Start element:', name
self.parser_element_list.append(name)
self.parser_element = self.parser_element_list[-1]
# Keep track of the current sheet number
if (self.parser_element == 'table:table'):
# Move to starting cell
self.parser_sheet_row = 0
self.parser_sheet_column = 0
# Increment the sheet number count
self.parser_sheet_num += 1
if (self.parser_sheet_num - 1 != self.sheet_index):
# We are not on the first sheet and need to create a new sheet.
# We will automatically move to the new sheet
sheetname = "Sheet%d" % self.parser_sheet_num
if 'table:name' in attrs: sheetname = attrs['table:name']
self.new_sheet(sheetname)
else:
# We are on the first sheet and will need to overwrite the default name
sheetname = "Sheet%d" % self.parser_sheet_num
if 'table:name' in attrs: sheetname = attrs['table:name']
self.sheets[self.sheet_index].set_name(sheetname)
# Update the row numbers
if (self.parser_element == 'table:table-row'):
self.parser_sheet_row += 1
self.parser_sheet_column = 0
# Okay, now keep track of the sheet cell data
if (self.parser_element == 'table:table-cell'):
# By default it will repeat zero times
self.parser_cell_repeats = 0
# We must be in a new column
self.parser_sheet_column += 1
# Set some default values
datatype = ""
value = ""
# Get values from attrs hash
if 'office:value-type' in attrs: datatype = attrs['office:value-type']