-
Notifications
You must be signed in to change notification settings - Fork 31
Expand file tree
/
Copy pathcpdfannot.ml
More file actions
263 lines (248 loc) · 10.2 KB
/
cpdfannot.ml
File metadata and controls
263 lines (248 loc) · 10.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
(** A loose JSON equivalent of XFDF for annotations. *)
open Pdfutil
open Cpdferror
let rec subtype_of_string = function
| "Text" -> Pdfannot.Text
| "Link" -> Pdfannot.Link
| "FreeText" -> Pdfannot.FreeText
| "Line" -> Pdfannot.Line
| "Square" -> Pdfannot.Square
| "Circle" -> Pdfannot.Circle
| "Polygon" -> Pdfannot.Polygon
| "PolyLine" -> Pdfannot.PolyLine
| "Highlight" -> Pdfannot.Highlight
| "Underline" -> Pdfannot.Underline
| "Squiggly" -> Pdfannot.Squiggly
| "StrikeOut" -> Pdfannot.StrikeOut
| "Stamp" -> Pdfannot.Stamp
| "Caret" -> Pdfannot.Caret
| "Ink" -> Pdfannot.Ink
| "FileAttachment" -> Pdfannot.FileAttachment
| "Sound" -> Pdfannot.Sound
| "Movie" -> Pdfannot.Movie
| "Widget" -> Pdfannot.Widget
| "Screen" -> Pdfannot.Screen
| "PrinterMark" -> Pdfannot.PrinterMark
| "TrapNet" -> Pdfannot.TrapNet
| "Watermark" -> Pdfannot.Watermark
| "3D" -> Pdfannot.ThreeDee
| s ->
match explode s with
| 'P'::'o'::'p'::'u'::'p'::':'::r ->
Pdfannot.Popup
{subtype = subtype_of_string (implode r);
annot_contents = None;
subject = None;
rectangle = (0., 0., 0., 0.);
border = {width = 0.; vradius = 0.; hradius = 0.; style = Pdfannot.NoStyle; dasharray = [||]};
colour = None;
annotrest = Pdf.Null}
| _ -> Pdfannot.Unknown s
(* List annotations, simple old style. *)
let get_annotation_string encoding pdf annot =
match Pdf.lookup_direct pdf "/Contents" annot with
| Some (Pdf.String s) -> Cpdfmetadata.encode_output encoding s
| _ -> ""
let print_annotation encoding pdf num s =
let s = get_annotation_string encoding pdf s in
match s with
| "" -> ()
| s ->
flprint (Printf.sprintf "Page %d: " num);
flprint s;
flprint "\n"
let list_page_annotations encoding pdf num page =
match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with
| Some (Pdf.Array annots) ->
iter (print_annotation encoding pdf num) (map (Pdf.direct pdf) annots)
| _ -> ()
let list_annotations range encoding pdf =
Cpdfpage.iter_pages (list_page_annotations encoding pdf) pdf range
(* New, JSON style *)
let rewrite_destination f d =
match d with
| Pdf.Array (Pdf.Indirect i::r) -> (* out *)
Pdf.Array (Pdf.Integer (f i)::r)
| Pdf.Array (Pdf.Integer i::r) -> (* in *)
Pdf.Array (Pdf.Indirect (f i)::r)
| x -> x
let rewrite_destinations f pdf annot =
(* Deal with /P in annotation *)
let annot =
match Pdf.indirect_number pdf "/P" annot with
| Some i -> Pdf.add_dict_entry annot "/P" (Pdf.Integer (f i))
| None -> annot
in
(* Deal with /Dest in annotation *)
match Pdf.lookup_direct pdf "/Dest" annot with
| Some d -> Pdf.add_dict_entry annot "/Dest" (rewrite_destination f d)
| None ->
(* Deal with /A --> /D dest when /A --> /S = /GoTo *)
match Pdf.lookup_direct pdf "/A" annot with
| Some action ->
begin match Pdf.lookup_direct pdf "/D" action with
| Some d ->
Pdf.add_dict_entry
annot "/A" (Pdf.add_dict_entry action "/D" (rewrite_destination f d))
| None -> annot
end
| None -> annot
(* We exclude the same annotations as the XFDF spec does. (NB: not any more) *)
let excluded pdf annot =
match Pdf.lookup_direct pdf "/Subtype" annot with
(*| Some (Pdf.Name ("/Movie" | "/Widget" | "/Screen" | "/PrinterMark" | "/TrapNet")) -> true*)
| _ -> false
let extra = ref []
let annotations_json_page calculate_pagenumber pdf page pagenum =
match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with
| Some (Pdf.Array annots) ->
option_map
(fun annot ->
begin match annot with
| Pdf.Indirect objnum ->
let annot = Pdf.direct pdf annot in
if excluded pdf annot then None else
let annot =
rewrite_destinations
(fun i -> calculate_pagenumber (Pdfdest.Fit (Pdfdest.PageObject i)))
pdf annot
in
extra := annot::!extra;
Some (`List
[`Int pagenum;
`Int objnum;
Cpdfjson.json_of_object ~utf8:true ~clean_strings:true pdf (fun _ -> ())
~no_stream_data:false ~parse_content:false annot])
| _ -> Pdfe.log "Warning: annotations must be indirect\n"; None
end)
annots
| _ -> []
let get_annotations_json pdf range =
let refnums = Pdf.page_reference_numbers pdf in
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
let calculate_pagenumber = Pdfpage.pagenumber_of_target ~fastrefnums pdf in
extra := [];
let pages = Pdfpage.pages_of_pagetree pdf in
let pagenums = indx pages in
let pairs = combine pages pagenums in
let pairs = option_map (fun (p, n) -> if mem n range then Some (p, n) else None) pairs in
let pages, pagenums = split pairs in
let json = flatten (map2 (annotations_json_page calculate_pagenumber pdf) pages pagenums) in
let jsonobjnums : int list = map (function `List [_; `Int n; _] -> n | _ -> assert false) json in
(*Printf.eprintf "%i extra roots to explore\n" (length !extra);
iter (fun x -> Pdfe.log (Printf.sprintf "%s\n\n" (Pdfwrite.string_of_pdf x))) !extra;*)
let extra =
map
(fun n ->
`List
[`Int n;
Cpdfjson.json_of_object ~utf8:true ~clean_strings:true pdf (fun _ -> ())
~no_stream_data:false ~parse_content:false (Pdf.lookup_obj pdf n)])
(setify
(flatten
(map
(fun x ->
let x = Pdf.remove_dict_entry x "/Popup" in
let x = Pdf.remove_dict_entry x "/Parent" in
let r = Pdf.objects_referenced [] [] pdf x in
(*Printf.eprintf "%i extra for annot %s\n" (length r) (Pdfwrite.string_of_pdf x);*)
r)
!extra)))
in
let extra =
option_map
(function `List [`Int n; _] as json -> if mem n jsonobjnums then None else Some json | _ -> assert false)
extra
in
let header =
`List
[`Int ~-1;
Cpdfjson.json_of_object ~utf8:true ~clean_strings:true pdf (fun _ -> ())
~no_stream_data:false ~parse_content:false
(Pdf.Dictionary ["/CPDFJSONannotformatversion", Pdf.Integer 1])]
in
let json = `List ([header] @ json @ extra) in
Pdfio.bytes_of_string (Cpdfyojson.Safe.pretty_to_string json)
(* Return annotations *)
let get_annotations encoding pdf =
let pages = Pdfpage.pages_of_pagetree pdf in
flatten
(map2
(fun page pagenumber ->
match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with
| Some (Pdf.Array annots) ->
let strings =
map (get_annotation_string encoding pdf) (map (Pdf.direct pdf) annots)
in
combine (many pagenumber (length strings)) strings
| _ -> [])
pages
(ilist 1 (length pages)))
(** Set annotations from JSON, keeping any existing ones. *)
let set_annotations_json pdf i =
match Cpdfyojson.Safe.from_string (Pdfio.string_of_input i) with
| `List entries ->
(* Renumber the PDF so everything has bigger object numbers than that. *)
let maxobjnum =
fold_left max min_int
(map
(function
| `List [_; `Int i; _] | `List [`Int i; _] -> i
| _ -> error "Bad annots entry")
entries)
in
let pdf_objnums = map fst (list_of_hashtbl pdf.Pdf.objects.Pdf.pdfobjects) in
let change_table =
hashtable_of_dictionary (map2 (fun f t -> (f, t)) pdf_objnums (ilist (maxobjnum + 1) (maxobjnum + length pdf_objnums)))
in
let pdf' = Pdf.renumber change_table pdf in
pdf.root <- pdf'.root;
pdf.objects <- pdf'.objects;
pdf.trailerdict <- pdf'.trailerdict;
(* Add the extra objects back in and build the annotations. *)
let extras = option_map (function `List [`Int i; o] -> Some (i, o) | _ -> None) entries in
let annots = option_map (function `List [`Int pagenum; `Int i; o] -> Some (pagenum, i, o) | _ -> None) entries in
iter (fun (i, o) -> Pdf.addobj_given_num pdf (i, Cpdfjson.object_of_json o)) extras;
let pageobjnummap =
let refnums = Pdf.page_reference_numbers pdf in
combine (indx refnums) refnums
in
let pages = Pdfpage.pages_of_pagetree pdf in
let annotsforeachpage = collate compare (sort compare annots) in
let newpages =
map2
(fun pagenum page ->
let forthispage = flatten (keep (function (p, _, _)::t when p = pagenum -> true | _ -> false) annotsforeachpage) in
iter
(fun (_, i, o) ->
let f = fun pnum -> match lookup pnum pageobjnummap with Some x -> x | None -> pnum in
Pdf.addobj_given_num pdf (i, rewrite_destinations f pdf (Cpdfjson.object_of_json o)))
forthispage;
if forthispage = [] then page else
let annots =
match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with | Some (Pdf.Array annots) -> annots | _ -> []
in
let newannots = map (fun (_, i, _) -> Pdf.Indirect i) forthispage in
{page with Pdfpage.rest = Pdf.add_dict_entry page.Pdfpage.rest "/Annots" (Pdf.Array (annots @ newannots))})
(indx pages)
pages
in
let pdf' = Pdfpage.change_pages true pdf newpages in
pdf.root <- pdf'.root;
pdf.objects <- pdf'.objects;
pdf.trailerdict <- pdf'.trailerdict
| _ -> error "Bad Annotations JSON file"
let copy_annotations range frompdf topdf =
set_annotations_json topdf (Pdfio.input_of_bytes (get_annotations_json frompdf range))
(* Remove annotations *)
let remove_annotations range pdf =
let remove_annotations_page pagenum page =
if mem pagenum range then
let rest' =
Pdf.remove_dict_entry page.Pdfpage.rest "/Annots"
in
{page with Pdfpage.rest = rest'}
else
page
in
Cpdfpage.process_pages (Pdfpage.ppstub remove_annotations_page) pdf range