tpac-schedule/ics-to-html.awk at main · bert-github/tpac-schedule · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env -S awk -f
#
# Read one or more icalendar files, optional files with tags and an
# HTML file, and output the HTML file with certain placeholders
# replaced by corresponding events from the calendars.
#
# Call as
#
#   awk -f ics-to-html.awk ics-file [ics-file...] [tags-file...] [ecosystem-file...] [id-file...] html-file
#
# To include an HTML comment in the output, set the variable "comment"
# as follows:
#
#   awk -v comment="Generated by ics-to-html" -f ics-to-html.awk ...
#
# e.g.:
#
#   curl https://www.w3.org/calendar/tpac2023/export/ | \
#    awk -f ics-to-html.awk - schedule.html
#
# To include some text in case a calendar is empty, set the
# "emptytext" variable to an HTML fragment, e.g.:
#
#   awk -v emptytext="<span>There are currently no events.</span>" ...
#
# Note: The TPAC pages are in US English and encoded in UTF-8. If your
# system is not, set at least LC_TIME=en_US.UTF=8 before calling awk.
# (The ics-to-html.cron script does that.)
#
# If there are any files with tags, they must be TSV files. Only lines
# with at least five columns and that have a URL in the fifth column
# that starts with "https:///www.w3.org/events/meetings/" are used.
#
# The first column is interpreted as an extra category to add to the
# event identified by the URL in the fifth column. If the text in the
# first column is empty, it is assumed to be the same as the previous
# line. The category is transformed to contain only lowercase letters,
# digits and dashes before being added to the event's categories. The
# text is only added if the fourth column is empty or contains the
# text "ok" (in upper- or lowercase). The second and third columns and
# any columns after the fifth are not used.
#
# (The script makes use of the fact that URLs of the form above
# contain the UID of an event and it assumes that it is the same UID
# as used in the icalendar files passed on the command line.)
#
# Ecosysytem files are files that assign groups (rather than events)
# to tracks. If an event has as attendee a group that belongs to a
# track, then the event is labeled with that track. (Two track names
# are considered the same if they are the equal after being
# transformed to contain only lowercase letters, digits and dashes.)
# The files must contain lines with three tab-separated values: the
# literal string "__ECOSYSTEM__", a track name, and a group
# name. Other lines are ignored. E.g.:
#
#   __ECOSYSTEM__<TAB>Web Advertising<TAB>Privacy Interest Group
#
# The html-file will be copied to stdout, except for any text between
#
#   <!-- begin-include KEYWORD OPTIONAL-ARGUMENTS -->
#
# and
#
#  <!-- end-include -->
#
# The KEYWORD can be a date or time in the form yyyymmdd or
# yyyymmddThhmmss, in which case the text between will be replaced by
# the events from the ics-file that start within 24 hours from that
# time. If there is more text after the keyword (e.g. "<!--
# begin-include 20230912 20230912T120000 web-payments -->") it is
# interpreted as follows: If the first word is a time in the form
# yyyymmddThhmmss, it restricts events to those that start before that
# time, rather than within 24 hours. If there is other text, it is
# interpreted as a tag, a category or an ecosystem and only events
# that are listed in the tags-file(s) under that tag, that have a
# corresponding category, or that have attendees that belong to that
# ecosystem are included. If the tag is preceded by "-", the filter is
# inverted: only events that do *not* match are included.
#
# The KEYWORD can also be the word "last-mod" (without quotes), in which
# case the text between will be replaced by the date and time of the
# most recent modification in the ics-file (expressed in UTC).
#
# The KEYWORD can be the word "desc", followed by two date-times,
# in which case the text between the comments will be replaced by a
# list of descriptions of all breakout sessions that start between
# those two date-times. (To be precise: at or after the first time and
# before the second time.)
#
# The KEYWORD can be the word "local", followed by a date YYYYMMDD
# and possibly more text. The behavior is the same as without the word
# "local" (i.e., when the keyword is a date), except that all
# generated events will be links to a local file instead of to the W3C
# Calendar. Each event will link to "filename#b-UID", where UID is the
# UID of the event. (To know which file contains a target anchor with
# id=b-UID, the script needs id-files, see below.)
#
# The keyword can be the word "next", followed optionally by the
# word "local" and followed by a time HH:MM (hours and minutes) and
# possibly more text. This will output all events that start within
# the next HH hours and MM minutes from the time the script is run. If
# there is more text after the time, it is interpreted as a tag or an
# ecosystem and only events that are listed in the tags-file(s) under
# that tag, or that have attendees that belong to that ecosystem are
# included. Each event is printed as an A element containing the
# title, location and time of the event, and a class attribute with
# the type of events. The link will point to the event in the W3C
# calendar, or, if the word "local" is present, to a local anchor of
# the form "filename#b-UID". Where "UID" is the UID of the event.
#
# The keyword can be "legend", followed by a date or time in the form
# yyyymmdd or yyyymmddThhmmss, optionally followed by another time
# and/or other text, in which case the text between the two comments
# will be replaced by a list of all the track names that occur in
# events within 24 hours after that the first time. If there is
# another time, that is the end time, instead of 24 hours. And if
# there is other text, it is a tag (track or ecosystem) and restricts
# the events that are considered to those that belong to that track or
# ecosystem. The list will have entries such as: <abbr class="legend
# faicon fa-robot" title="robot icon"></abbr> AI
#
# And the keyword can be the word "now", optionally followed by a
# pattern. If there is a pattern, it will be interpreted as a pattern
# for strftime(3) and the text between the two comments will be
# replaced by the current time in the format of that pattern. E.g., if
# the pattern is "%H:MM on %B %d, %Y", this will output something like
# "13:36 on October 29, 2023". If there is no pattern, the pattern is
# assumed to be "%d %B %Y, %l:%M %p", i.e., it will output the current
# time in the form "29 October 2023, 1:36 PM".
#
# The html-file may also contain comments of the form
#
#   <!-- TZ = time-zone -->
#
# The time-zone must be something like Europe/Paris, America/New_York
# or UTC. This sets the time zone for interpreting any dates and times
# in subsequent <!-- begin-include... --> comments.
#
# A comment of the form
#
#   <!-- sort = ORDER -->
#
# specifies the sort order for events in calendars. ORDER must be one
# of "time", "name" or "room". Spaces are ignored. The first such
# comment sets the default sort order for all calendars. Subsequent
# ones set the sort order only for the next calendar, i.e., after that
# calendar, the sort order reverts to the default. If there is no
# comment before a calendar and no default, the calendar will be
# sorted on name. (Note that this works differently from how
# calendar-sort.js finds the comment, but it is possible to place the
# comments such that they work the same.) Example: "<!-- sort=time
# -->"
#
# If there are any id-files, they must be files that start with the line
#
#   ## IDs ## FILENAME
#
# followed by values of ID attributes in FILENAME, one per line.
# (These are used when the "local" keyword is present in the comments
# in the HTML file, to know which target anchor occurs in which file.)
#
# Testing:
#
# To test the markdown parser, set the environment variable ICSTOHTML
# before calling the script:
#
# ICSTOHTML="test markdown": The script will read the first input
#   file, parse it as markdown and output the HTML.
#
# ICSTOHTML="test inline markdown": The script will read the first
#   input file, parse it as inline markdown (no block elements) and
#   output the HTML.
#
# ICSTOHTML="test flatten markdown": The script will read the first
#   input file, interpret it as markdown and output just the text
#   content, i.e., HTML without any tags (but with character
#   entities).
#
#
# TODO: An easy way for people to run this script after they update
# the W3C calendar, so they don't have to wait for the cron job. A
# button on the schedule page that queues a run in the next three
# minutes?
#
# TODO: Style the events in the calendar, especially in the list view,
# to make it clearer that they are links. Making them blue and
# underlined makes the list unreadable.
#
# TODO: Different classes for reception, AC and developer meetup?
# (instead of single class=special)
#
# TODO: This does not handle arbitrary ICS files, only the one for
# TPAC. (No repeating events, DTEND instead of DURATION, etc.)
#
# TODO: Turn the names of breakout proposers into mailto links. This
# could be done by splitting the list of names at each comma and
# finding corresponding email addresses in the various ATTENDEE fields
# in the icalendar files. (This assumes that each comma is a separator
# and that each name is unique and spelled the same under "Chairs" and
# inside ATTENDEE.)
#
# TODO: Instead of simple local links with the "local" keyword, allow
# an arbitrary URL prefix? (E.g., "prefix=#b-")
#
# TODO: The track labels for breakouts are not fixed, but determined
# by the breakout day organizers based on the proposed breakouts. We
# should have a way to automatically import those labels (and icons
# for them).
#
# TODO: A link to easily subscribe to a meeting directly from the
# calendar grid and the breakout descriptions, so people don't have to
# go to the calendar? And a link to the
# https://www.w3.org/users/myprofile/calendar/ ? Maybe copy the "my
# account" menu from the W3C home page?
#
# TODO: Allow somewhat more readable dates (e.g., ISO dates
# (YYYY-MM-DDTHH:MM:SS of HTML5 dates YYYY-MM-DD HH:MM:SS) in addition
# to icalendar dates (YYYYMMDDTHHMMSS).
#
# TODO: Allow a syntax with elements instead of comments? E.g., <meta
# data-tz="America/Los_Angeles"> and <div data-start="2024-09-24"
# data-local>...</div>
#
# TODO: The "local" keyword allows generating links to anchors earlier
# in the file, but not to anchors later on in the file.
#
# Created: 15 January 2025
# based on https://www.w3.org/2024/09/TPAC/ics-to-html.awk
# Author: Bert Bos <bert@w3.org>

@include "markdown.awk"

BEGIN {
  if (ENVIRON["ICSTOHTML"] == "test markdown") {
    # Test mode for the markdown parser function: Read the first
    # input, convert to html and output it, then exit.
    RS = "^$"
    getline
    printf "%s", markdown::to_html($0)
    mode = "html"		# Tell END that we're done
    copying = 1			# Tell END that we're done
    exit			# Jump to END
  }
  if (ENVIRON["ICSTOHTML"] == "test inline markdown") {
    # Test mode for the markdown parser function: Read the first
    # input, convert to html and output it, then exit.
    RS = "\0"
    getline
    printf "%s", markdown::to_inline_html($0)
    mode = "html"		# Tell END that we're done
    copying = 1			# Tell END that we're done
    exit			# Jump to END
  }
  if (ENVIRON["ICSTOHTML"] == "test flatten markdown") {
    # Test mode for the markdown parser function: Read the first
    # input, convert to html and output it, then exit.
    RS = "\0"
    getline
    printf "%s", markdown::to_text($0)
    mode = "html"		# Tell END that we're done
    copying = 1			# Tell END that we're done
    exit			# Jump to END
  }

  # Events in the tags files must be URLs that start with this:
  cal_prefix = "https://www.w3.org/events/meetings/"
  cal_prefix_len = length(cal_prefix)

  # The following tags in the tag files have an icon from the
  # Iconmonstr Iconic Font, and a dedicated schedule page.
  icon["automotive-and-transportation"] = "picto im-car"
  link["automotive-and-transportation"] = "track-automotive.html"
  icon["e-commerce"] = "picto im-coin"
  link["e-commerce"] = "track-web-payments.html"
  icon["media-and-entertainment"] = "picto im-audio"
  link["media-and-entertainment"] = "track-media-entertainment.html"
  icon["network-and-communications"] = "picto im-radio"
  link["network-and-communications"] = "track-networks-communications.html"
  icon["publishing"] = "picto im-book"
  link["publishing"] = "track-publishing.html"
  icon["smart-cities"] = "picto im-apartment"
  link["smart-cities"] = "track-smart-cities.html"
  icon["web-advertising"] = "picto im-shopping-cart"
  link["web-advertising"] = "track-web-advertising.html"
  icon["web-of-things"] = "picto im-factory"
  link["web-of-things"] = "track-web-of-things.html"

  # These tags are used by the breakouts. They don't have a linked
  # page. The icons come from the Font Awesome or Iconmonstr fonts.
  icon["accessibility"] = "faicon fa-universal-access"
  icon["ai"] = "faicon fa-robot"
  icon["developer-experience"] = "faicon fa-user-astronaut"
  icon["feature-lifecycle"] = "picto im-sync"
  icon["getting-work-done"] = "faicon fa-list-check"
  icon["identity"] = "faicon fa-passport" # or "picto im-check-mark"?
  icon["performance"] = "faicon fa-person-running"
  icon["permissions"] = "picto im-forbidden"
  icon["privacy"] = "faicon fa-user-shield"
  icon["real-time-web"] = "picto im-timer"
  icon["standards"] = "picto im-shield"
  icon["trust"] = "faicon fa-handshake"
  icon["ux"] = "faicon fa-chalkboard-user"
  icon["wallets"] = "faicon fa-wallet "
  icon["web-apps"] = "picto im-windows-o"
  icon["web-components"] = "faicon fa-gears"

  # When looking for events that start now, also look for events that
  # started this long ago (in seconds).
  slack = 60 * 15;

  # The time of the most recently modified calendar entry, in seconds
  # after the epoch. Anything before is considered unknown.
  last_mod = -1

  # To check the right order of cmdline arguments, in order: "cal",
  # "tag", and "html".
  mode = "cal"

  # Whether to copy input lines to output. Will be set to 1 at the
  # start of the HTML input.
  copying = 0

  # If the "comment" variable is set on the command line, make sure it
  # does not contain "-->".
  gsub(/-->/, "-- >", comment)

  # If the "emptytext" variable is not set, set it to an empty string.
  if (!emptytext) emptytext = ""

  # The current time.
  now = systime()

  # Generate times in UTC, unless there is a TZ=... in the HTML input.
  ENVIRON["TZ"] = "UTC"
}

{sub(/\r/, "")}			# Remove CR characters

#
# Rules to parse ICS files.
#

/^BEGIN:VEVENT/ {
  status = ""
  uid = ""
  location = "Room TBD"
  summary = ""
  description = ""
  dtstart = 0			# In seconds since the epoch
  dtend = 0			# In seconds since the epoch
  categories = ""		# String with \a (BEL) before each category
  url = ""
  attendees = ""		# String with \n before each name
}
/^[^ ]/ {field = gensub(/[;:].*/, "", 1)}
/^DTSTAMP/ {if ((h = datetime_to_time(substr($0, 9))) > last_mod) last_mod = h}
/^DTSTART/ {dtstart = datetime_to_time(substr($0, 9))}
/^DTEND/ {dtend = datetime_to_time(substr($0, 7))}
/^SUMMARY/ {summary = substr($0, 9)}
/^DESCRIPTION/ {description = substr($0, 13)}
/^UID/ {uid = substr($0, 5)}
/^URL/ {url = substr($0, index($0, ":") + 1)}
/^STATUS/ {status = substr($0, 8)}
/^LOCATION/ {location = substr($0, 10)}
/^CATEGORIES/ {categories = categories "," substr($0, 12)}
/^ATTENDEE/ {attendees = attendees "\n" substr($0, 9)}
/^ / && field=="SUMMARY" {summary = summary substr($0, 2)}
/^ / && field=="DESCRIPTION" {description = description substr($0,2)}
/^ / && field=="CATEGORIES" {categories = categories substr($0, 2)}
/^ / && field=="LOCATION" {location = location substr($0, 2)}
/^ / && field=="ATTENDEE" {attendees = attendees substr($0, 2)}
/^END:VEVENT/ && mode == "cal"{
  status = unesc(status)
  if (status != "CANCELLED") {
    # Normalize various texts that mean "Room TBD".
    location = unesc(location)
    if (location == "Sheraton Vancouver Wall Centre") location = "Room TBD"
    else if (location == "Sheraton Wall Centre") location = "Room TBD"
    # Add a unique (but hidden) string to "Room TBD", to avoid that
    # two events without a room are seen as being in the same room.
    # Note: The location is a MarkDown string.
    if (location == "Room TBD")
      location = sprintf("Room TBD<span class=\"tbd %s\"></span>",
			 esc(unesc(uid)))
    # Similarly, add a unique, hidden string to online-only events.
    if (location ~ /^([Oo]nline|[Vv]irtual [Mm]eeting)/)
      location = sprintf("%s <span class=\"online %s\"></span>", location,
			 esc(unesc(uid)))
    # Add the event to the array of events.
    n++
    event_summary[n] = unesc(summary)
    event_dtstart[n] = dtstart
    event_dtend[n] = dtend
    event_status[n] = status
    event_location[n] = location
    event_uid[n] = unesc(uid)
    # Start each category with \a (BEL). Note that categories starts with ","
    # TODO: "foo\\,bar" should be split into "foo\" and "bar".
    event_categories[n] = unesc(gensub(/(^|[^\\]),/, "\\1\a", "g", categories))
    event_description[n] = unesc(description)
    event_attendees[n] = store_attendees(attendees)
    parse_description(n)	# Results in event_info[n,...]
    event_url[n] = unesc(url)
    # If there is no explicit URL, link to the W3C calendar for this UID.
    if (! event_url[n]) event_url[n] = event_info[n,"self"]
    if (! event_url[n]) event_url[n] = cal_prefix event_uid[n]
  }
}

#
# Rules to parse tags files.
#

# The interesting lines from the tags files are of the form
# LABEL<tab>...<tab>...<tab>STATUS<tab>URL
# where the URL starts with cal_prefix.
#
/\t.*\t.*\t.*\t/ {
  split($0, col, "\t")
  if (substr(col[5], 1, cal_prefix_len) == cal_prefix) {
    if (mode == "cal") mode = "tag" # Start parsing tags files
    if (mode == "tag") {
      if (! col[1]) col[1] = prev_title
      else prev_title = col[1]
      if (col[4] == "" || tolower(col[4]) == "ok") {
	uid = substr(col[5], cal_prefix_len + 1) # The part after "meeting/"
	sub(/[#?].*/, "", uid)			 # Remove fragment id or query
	# Transform the first column, but remember the original in track_title
	label = gensub(/\W+/, "-", "g",
		       gensub(/\s*&\s*/, " and ", "g", tolower(col[1])))
	track_title[label] = col[1]
	# Add to any already stored labels for this UID, prefixed with \a (BEL)
	extra_tags[uid] = extra_tags[uid] "\a" label
      }
      next
    }
  }
}

#
# Rules to parse ecosystem files (group-to-track mappings).
#

/^__ECOSYSTEM__\t/ {
  if (split($0, x, /\t/) == 3) { # It is a line with three columns
    if (mode == "cal" || mode == "tag") mode = "ecosystem" # Order of inputs
    if (mode == "ecosystem") {
      # Canonicalize the ecosystem name. Remember the original in track_title.
      ecosystem = gensub(/\W+/, "-", "g",
	gensub(/\s*&\s*/, " and ", "g", tolower(x[2])))
      track_title[ecosystem] = x[2]
      # group_track_membership[g][t] is true iff group g belongs to
      # ecosystem (track) t.
      group_track_membership[x[3]][ecosystem] = 1
      next
    }
  }
}

#
# Rules to parse the HTML file.
#

# The "<!DOCTYPE" means that we started reading the HTML file.
#
/<!DOCTYPE/ {
  if (mode == "html") {
    printf "%s: Only one HTML file is allowed\n", FILENAME, FNR > "/dev/stderr"
    exit 1
  }
  mode = "html"
  copying = 1
  IGNORECASE = 1
}
/<!--\s*TZ\s*=([^-]|-[^-]|--[^>])*-->/ && copying {
  match($0, /<!--\s*TZ\s*=(([^-]|-[^-]|--[^>])*)-->/, a)
  ENVIRON["TZ"] = trim(a[1])	# Set time zone
}
/<!--\s*sort\s*=([^-]|-[^-]|--[^>])*-->/ && copying {
  if (! match($0, /<!--\s*sort\s*=\s*(name|time|room)\s*-->/, a)) {
    printf "%s:%d: Sort order must be name, time or room, but found %s\n",
      FILENAME, FNR, a[0] > "/dev/stderr"
    exit 1
  }
  order = a[1]
  if (! default_order) default_order = a[1]
}
/<!--\s*begin-include\s+desc\>([^-]|-[^-]|--[^>])*-->/ && copying {
  if (!match($0, /<!--\s*begin-include\s+desc\s+([0-9]{8}T[0-9]{6})\s+([0-9]{8}T[0-9]{6})\s*-->/, a)) {
    printf "%s:%d: Incorrect date-times after \"<!-- begin-include desc\"\n",
      FILENAME, FNR > "/dev/stderr"
  } else if (a[2] < a[1]) {
    printf "%s:%d: Start time (%s) is later than end time (%s)\n",
      FILENAME, FNR, a[1], a[2] > "/dev/stderr"
  } else {
    startlineno = FNR
    printf "%s", substr($0, 1, RSTART + RLENGTH - 1)
    if (comment) printf "\n<!-- %s -->\n", comment
    print_breakouts(datetime_to_time(a[1]), datetime_to_time(a[2]))
    copying = 0			# Do not copy lines until <!-- end-include -->
  }
}
/<!--\s*begin-include\s+last-mod\s*-->/ && copying {
  match($0, /<!--\s*begin-include\s*last-mod\s*-->/)
  printf "%s", substr($0, 1, RSTART + RLENGTH - 1)
  startlineno = FNR
  if (comment) printf "<!-- %s -->", comment
  print_last_mod()
  copying = 0
}
/<!--\s*begin-include\s+(local\s+)?[0-9]{8}(T[0-9]{6})?(\s+([^-]|-[^-]|--[^>])*)?\s*-->/ && copying {
  match($0, /<!--\s*begin-include\s+(local\s+)?([0-9]{8}(T[0-9]{6})?)(\s+[0-9]{8}T[0-9]{6})?(\s+([^-]|-[^-]|--[^>])*)?\s*-->/, a)
  print substr($0, 1, RSTART + RLENGTH - 1)
  startlineno = FNR
  indent = gensub(/[^ 	].*/, "", 1, $0)
  if (comment) printf "%s<!-- %s -->\n", indent, comment
  starttime = datetime_to_time(a[2])
  endtime = a[4] ? datetime_to_time(trim(a[4])) : (starttime + 24 * 60 * 60);
  print_calendar(trim(a[5]), trim(a[1]) == "local", starttime, endtime, order,
		 indent)
  copying = 0			# Do not copy lines (until <!-- end-include -->)
}
/<!--\s*begin-include\s+next\>([^-]|-[^-]|--[^>])*-->/ && copying {
  if (!match($0, /<!--\s*begin-include\s+next\s+(local\s+)?([0-9]*):([0-9][0-9]?)(\s+([^-]|-[^-]|--[^>])*)?\s*-->/, a)) {
    printf "%s:%d: Expected hours:minutes after \"<!-- begin-include next\"\n",
      FILENAME, FNR > "/dev/stderr"
  } else {
    print substr($0, 1, RSTART + RLENGTH - 1)
    startlineno = FNR
    indent = gensub(/[^ 	].*/, "", 1, $0)
    if (comment) printf "%s<!-- %s -->\n", indent, comment
    start = now - slack
    end = now + 60 * 60 * a[2] + 60 * a[3]
    print_calendar(trim(a[4]), trim(a[1]) == "local", start, end, "time",
		   indent)
    copying = 0			# Do not copy lines (until <!-- end-include -->)
  }
}
/<!--\s*begin-include\s+now(\s+([^-]|-[^-]|--[^>])+)?\s*-->/ && copying {
  match($0, /<!--\s*begin-include\s+now(\s+([^-]|-[^-]|--[^>])+)?\s*-->/, a)
  printf "%s", substr($0, 1, RSTART + RLENGTH - 1)
  startlineno = FNR
  if (comment) printf "<!-- %s -->", comment
  if (a[2]) print strftime(a[2], now)
  else printf "%s", strftime("%d %B %Y, %l:%M %p", now)
  copying = 0			# Do not copy lines until <!-- end-include -->
}
/<!--\s*begin-include\s+legend\>([^-]|-[^-]|--[^>])*-->/ && copying {
  if (!match($0, /<!--\s*begin-include\s+legend\s+([0-9]{8}(T[0-9]{6})?)(\s+[0-9]{8}T[0-9]{6})?(\s+([^-]|-[^-]|--[^>])*)?\s*-->/, a)) {
    printf "%s:%d:  Incorrect date-times after \"<!-- begin-include legend\"\n",
      FILENAME, FNR > "/dev/stderr"
  } else {
    print substr($0, 1, RSTART + RLENGTH - 1)
    startlineno = FNR
    indent = gensub(/[^ 	].*/, "", 1, $0)
    if (comment) printf "%s<!-- %s -->\n", indent, comment
    starttime = datetime_to_time(a[1])
    endtime = a[3] ? datetime_to_time(trim(a[3])) : (starttime + 24 * 60 * 60);
    print_legend(trim(a[4]), starttime, endtime, indent)
    copying = 0			# Do not copy lines (until <!-- end-include -->)
  }
}
/<!--\s*end-include\s*-->/ && !copying {
  match($0, /\s*<!--\s*end-include\s*-->/)
  $0 = substr($0, RSTART)
  copying = 1
  order = default_order
}
copying {			# Find existing IDs in the HTML file.
  s = $0
  while (match(s, /\<id\*=\*["']?([^ "'<>]+)/, x)) {
    targetfile[x[1]] = ""	# "" is the relative URL of the file itself
    s = substr(s, RSTART + RLENGTH)
  }
}
copying {print}			# Copy input to output when copying == 1

#
# Rules to parse files with lists of IDs.
#

/^## IDs ## / {mode = "ids"; id_filename = $4}
mode == "ids" && /^[^ "'<>]+$/ {targetfile[$0] = id_filename}


END {
  if (_assert_exit) exit 1
  if (mode != "html") {
    printf "%s: Last argument must be an HTML file\n", FILENAME > "/dev/stderr"
    exit 1
  }
  if (!copying) {
    printf "%s:%d: \"begin-include\" without matching \"end-include\"\n",
      FILENAME, startlineno > "/dev/stderr"
    exit 1
  }
}


# unesc -- remove iCalendar escape sequences
function unesc(s,	t)
{
  t = ""
  while (match(s, /\\(.)(.*)/, x)) {
    t = t substr(s, 1, RSTART - 1) (x[1] == "n" || x[1] == "N" ? "\n" : x[1])
    s = x[2]
  }
  return t s
}


# trim -- remove leading and trailing white space
function trim(s,	t)
{
  sub(/^\s+/, "", s)
  sub(/\s+$/, "", s)
  return s
}


# print_breakouts -- print breakouts that start between starttime and endtime
function print_breakouts(starttime, endtime,	categories, x, i, j, class, s)
{
  # printf ">print_breakouts(%s \"%s\", %s \"%s\")\n",
  #   starttime, strftime("%H:%M", starttime),
  #   endtime, strftime("%H:%M", endtime) > "/dev/stderr"
  for (i in event_dtstart) {
    if (event_dtstart[i] < starttime) continue
    if (event_dtstart[i] >= endtime) continue
    if (event_categories[i] !~ /\aBreakout Sessions(\a|$)/) continue
    # printf " Found %s\n", event_summary[i] > "/dev/stderr"

    # The inferred type will be put in a class attribute.
    delete class
    class[meeting_type(i)] = 1

    # Add classes from extra_tags, if any. The key of the extra_tags
    # array is the UID. And if there is a value, it starts with a "\a" (BEL).
    for (j = split(extra_tags[event_uid[i]], x, /\a/); j > 1; j--)
      class[x[j]] = 1

    # Print a breakout.
    printf "\n<article id=\"b-%s\" class=\"%s\">\n",
      esc(event_uid[i]), esc(ajoini(class, " "))
    targetfile["b-" event_uid[i]] = "" # Remember that the ID occurs in the file
    printf "<h3>%s</h3>\n", markdown::to_inline_html(event_summary[i])
    printf "<dl>\n"
    # if (event_status[i] == "TENTATIVE")
    #   printf "<dt class=status>Status\n<dd>Tentative\n"
    s = ""
    for (j in class)
      if (j in icon && j in link)
	s = s sprintf("\n<a href=\"%s\"><abbr class=\"tr-%s %s\">%s</abbr></a>",
	  esc(link[j]), esc(j), icon[j], esc(track_title[j]))
      else if (j in icon)
	s = s sprintf("\n<abbr class=\"tr-%s %s\">%s</abbr>",
	  esc(j), icon[j], esc(track_title[j]))
      else if (j in link)
	s = s sprintf("\n<a href=\"%s\" class=\"tr-%s\">%s</a>",
	  esc(link[j]), esc(j), esc(track_title[j]))
    if (s)
      printf "<dt>Tracks\n<dd>%s\n", s
    if (event_info[i,"chairs"])
      printf "<dt>Proposer\n<dd>\n%s",
	markdown::to_html(event_info[i,"chairs"])
    if (event_info[i,"description"])
      printf "<dt>Description\n<dd>\n%s",
	markdown::to_html(event_info[i,"description"])
    if (event_info[i,"goals"])
      printf "<dt>Goals\n<dd>\n%s",
	markdown::to_html(event_info[i,"goals"])
    if (event_info[i,"agenda"])
      printf "<dt>Agenda\n<dd>\n%s",
	markdown::to_html(event_info[i,"agenda"])
    printf "<dt>Links\n<dd>\n"
    if (event_info[i,"self"] || event_url[i]) {
      printf "<ul>\n"
      if (event_info[i,"self"])
	printf "<li><a href=\"%s\">W3C Calendar</a>\n", esc(event_info[i,"self"])
      if (event_url[i] && event_url[i] != event_info[i,"self"])
	printf "<li><a href=\"%s\">More info</a>\n", esc(event_url[i])
      printf "</ul>\n"
    }
    if (event_info[i,"materials"])
      printf "%s", markdown::to_html(event_info[i,"materials"])
    if (event_info[i,"joining"])
      printf "<dt>Dial-in\n<dd>\n%s",
	markdown::to_html(event_info[i,"joining"])
    if (event_info[i,"chat"]) {
      printf "<dt>IRC\n<dd>\n%s", markdown::to_html(event_info[i,"chat"])
    }
    printf "<dt class=room>Room\n<dd>%s,\n",
      markdown::to_inline_html(event_location[i])
    printf "<time datetime=\"%s\">%s</time>&#x200B;–&#x200B;",
      esc(strftime("%Y-%m-%d %H:%M:%SZ", event_dtstart[i], 1)),
      esc(strftime("%H:%M", event_dtstart[i]))
    printf "<time datetime=\"%s\">%s</time>\n",
      esc(strftime("%Y-%m-%d %H:%M:%SZ", event_dtend[i], 1)),
      esc(strftime("%H:%M", event_dtend[i]))
    printf "</dl>\n"
    printf "</article>\n"
  }
  # printf "<print_breakouts(%s, %s)\n", starttime, endtime > "/dev/stderr"
}


# parse_description -- split a description into goals, proposer, dial-in, etc.
function parse_description(n,	s, x, agenda, fields, titles, i, h, t)
{
  # The DESCRIPTION field in the W3C Calendar contains the URL of the
  # event n itself, a description, an optional agenda link, an optional
  # agenda, an optional joining link, optional joining instructions
  # and optional chat instructions.
  #
  s = event_description[n]
  if (match(s, /^([a-zA-Z]+:\S*)\s*/, x)) {
    event_info[n,"self"] = x[1]
    s = substr(s, RLENGTH)
  }
  # Parse the rest from the end backwards. (The sections for chat and
  # joining-instructions are currently only present in personalized
  # rather than public calendars.)
  if (match(s, /\n+Chat\n\n\s*(.*)$/, x)) {
    event_info[n,"chat"] = x[1]
    s = substr(s, 1, RSTART - 1)
    # If the "chat" field consist of a raw URL to irc.w3.org, reformat
    # it as a link with an anchor. If it consist of an IRC channel
    # name, make it a link to irc.w3.org. Otherwise we assume it is
    # some markdown.
    if (match(event_info[n,"chat"],
	      /^\s*(https:\/\/irc\.w3\.org\/\?channels=)(%23|#)(\S+)\s*$/, x))
      event_info[n,"chat"] = "[#" x[3] "](" x[1] "%23" x[3] ")"
    else if (match(event_info[n,"chat"], /^\s*#(\S+)\s*$/, x))
      event_info[n,"chat"] = "[#" x[1] "](https://irc.w3.org/?channels=%23" \
	x[1] ")"
  }
  if (match(s, /\n+Joining Instructions\n\n(.*)$/, x)) {
    event_info[n,"joining"] = x[1]
    s = substr(s, 1, RSTART - 1)
  }
  if (match(s, /\n+Joining Instructions: \s*(.*)$/, x)) {
    # TODO: Handle "(" and ")" in the URL.
    event_info[n,"joining"] = "[Join the meeting](" x[1] ")\n\n" event_info[n,"joining"]
    s = substr(s, 1, RSTART - 1)
  }
  if (match(s, /\n+Agenda(\n\n.*)$/, x)) {
    agenda = x[1]
    s = substr(s, 1, RSTART - 1)
  }
  if (match(s, /\n+Agenda: \s*(.*)$/, x)) {
    # TODO: Handle "(" and ")" in the URL.
    event_info[n,"agenda"] = "[Agenda](" x[1] ")"
    s = substr(s, 1, RSTART - 1)
  }
  event_info[n,"description"] = s

  # In the case of breakouts, the agenda contains further sections. So
  # try to split the agenda into fields at each "**title:**". Store the
  # fields in event_info[n,h], where h is the title in lowercase with
  # non-alphanumeric characters removed. There is probably a
  # "description" field, which will override the one set above.
  patsplit(agenda, titles, /\n\n\*\*[^\n]+:\*\*\n/, fields)
  for (i in titles) {
    h = tolower(gensub(/[^[:alnum:]]/, "", "g", titles[i]))
    event_info[n,h] = fields[i]
  }

  # Convert the tracks list to extra tags. Normalize the tags like the
  # ecosystem names: "&" -> "and" and sequences of non-word-character
  # to "-".
  s = gensub(/(^-\s+|\n-\s+|\n$)/, "\a", "g", event_info[n,"tracks"])
  t = ""
  for (i = patsplit(s, x, /[^\a]+/); i > 0; i--) {
    h = gensub(/[^[:alnum:]_\a]+/, "-", "g",
      gensub(/\s*&\s*/, " and ", "g",
        gensub(/-\s+([^\n]+)\n*/, "\a\\1", "g", tolower(x[i]))))
    track_title[h] = x[i]
    t = t "\a" h
  }
  event_info[n,"tracks"] = t
  extra_tags[event_uid[n]] = extra_tags[event_uid[n]] event_info[n,"tracks"]
}


# esc -- escape HTML delimiters
function esc(s)
{
  gsub(/&/, "\\&amp;", s)
  gsub(/</, "\\&lt;", s)
  gsub(/>/, "\\&gt;", s)
  gsub(/"/, "\\&quot;", s)
  return s
}


# datetime_to_time -- convert icalendar datetime to seconds since epoch
function datetime_to_time(s,	x, tz, t, datespec)
{
  if (!match(s, /^(TZID=([^:]+):)?([0-9]{4})([0-9]{2})([0-9]{2})(T([0-9]{2})([0-9]{2})([0-9]{2})(Z)?)?$/, x)) {
    printf "%s:%d: Invalid date-time: %s\n", FILENAME, FNR, s > "/dev/stderr"
    exit 1
  }
  if (!x[6]) datespec = x[3] " " x[4] " " x[5] " 00 00 00"
  else datespec = x[3] " " x[4] " " x[5] " " x[7] " " x[8] " " x[9]
  if (x[2]) {tz = ENVIRON["TZ"]; ENVIRON["TZ"] = x[2]} # Set TZ
  t = mktime(datespec, x[10])
  if (x[2]) ENVIRON["TZ"] = tz	# Restore TZ
  # printf "=datetime_to_time(%s) -> %d (%s)\n", s, t, strftime("%d %b %Y %H:%M", t) > "/dev/stderr"
  return t
}


# print_last_mod -- print the date of last modification of the calendar
function print_last_mod()
{
  if (last_mod < 0)
    printf "unknown\n"
  else
    printf "%s\n", strftime("%c", last_mod)
}


# print_calendar -- print the HTML for a calendar for a day (YYYYMMDD)
function print_calendar(tag, locallinks, starttime, endtime, sort, indent,
			i, j, location, locationnr, found,
			m, x, y, earliest, latest, h, k, events, a, r)
{
  # If tag is set, it filters the events to those with this tag. If
  # locallinks is set, each event is a link to a local anchor #b-<UID>
  # instead of to the W3C calendar entry. If starttime and endtime are
  # > 0, only events that start between starttime and endtime are
  # printed. Sort is the sort sort: "name" (default), "time" or
  # "room". indent is a string of zero or more spaces and tabs (empty
  # by default) to put at the start of each printed line.

  # printf ">print_calendar(%s)\n", tag > "/dev/stderr"

  # Find all events that start between starttime and endtime and that
  # belong to track or ecosystem "tag" (if tag is not empty). Store
  # their indexes in the array "found".
  for (i in event_dtstart)
    if (event_dtstart[i] >= starttime &&
	event_dtstart[i] < endtime &&
	filter(i, tag))
      found[i] = i

  if (length(found) == 0) {	# No events found at all
    if (emptytext ~ /^[ \t]/) printf "%s\n", emptytext
    else if (emptytext) printf "%s%s\n", indent, emptytext
    return
  }

  # Find the earliest start time and the latest end time of all found
  # events.
  earliest = endtime
  latest = starttime
  for (i in found) {
    if (event_dtstart[i] < earliest) earliest = event_dtstart[i]
    if (event_dtend[i] > latest) latest = event_dtend[i]
  }

  # Round earliest and latest down, resp. up to the nearest quarter hours.
  h = 0 + strftime("%M", earliest)
  earliest -= (h - 15 * int(h / 15)) * 60
  h = 0 + strftime("%M", latest)
  latest += (15 * int((h + 14) / 15) - h) * 60
  # printf "earliest = %d latest = %d\n", earliest, latest > "/dev/stderr"

  # Collect all locations used  in the list "location".
  # Sort the list alphabetically. Then invert that list to get a
  # mapping location -> number in the array "locationnr".
  for (i in found) location[event_location[i]] = 1
  asorti(location)
  for (i in location) locationnr[location[i]] = i

  # printf " Found locations:" > "/dev/stderr"
  # for (i in location) printf " %d = %s", i, location[i] > "/dev/stderr"
  # printf "\n" > "/dev/stderr"

  # Put all events in a grid. m[y][x] is the slot at time y
  # at location x. The value can be empty, a number n (event n starts
  # at time y in location x) or an "x" (the slot is the continuation
  # of an event that started earlier)
  for (i in found) {
    y = time_to_row(event_dtstart[i] - earliest, 0)
    x = locationnr[event_location[i]]
    if (m[y][x])
      printf "Conflicting events on %s for %s: \"%s\" and \"%s\". Ignoring the former.\n",
	strftime("%d %b %H:%M", event_dtstart[i]), event_location[i],
	event_summary[m[y][x]], event_summary[i] > "/dev/stderr"
    m[y][x] = i;
    for (j = time_to_row(event_dtend[i] - earliest, 1) - 1; j > y; j--)
      m[j][x] = "x"
    # The events array will be used to sort the events.
    if (sort == "room")
      events[++k] = event_location[i] event_summary[i] "\a" y "\a" x "\a" i
    else if (sort == "time")
      events[++k] = event_dtstart[i] event_summary[i] "\a" y "\a" x "\a" i
    else
      events[++k] = event_summary[i] "\a" y "\a" x "\a" i
  }

  # Make a row along the top of the grid with all locations used. The
  # classes "cX", "rY" and "hZ" mean that the element is
  # to be displayed in column X and row Y and spans Z rows.
  for (i = 1; i <= length(location); i++)
     printf "%s<b class=\"r%02d c%02d\" aria-hidden=true>%s</b>\n",
       indent, 1, i + 1, markdown::to_inline_html(location[i])
  printf "%s<br>\n", indent

  # Print times along the left edge of the grid. Only print hh:00 and
  # hh:30, not hh:15 and hh:45.
  i = earliest
  r = 2
  while (i < latest) {
    if (((i - earliest) / (15 * 60)) % 2 != 0)
      printf "%s<time class=\"r%02d c%01\" aria-hidden=true></time>\n", indent, r
    else
      printf "%s<time class=\"r%02d c01\" aria-hidden=true>%s</time>\n",
   	indent, r, hours_and_minutes(i, starttime)
    i += 15 * 60
    r++
  }
  # printf "printed rows 2 (%s) to %d (%d)\n", earliest, r - 1, latest >"/dev/stderr"

  # Print all slots. These empty slots can be used to attach borders
  # and backgrounds.
  printf "%s<i class=\"r01 c01\" aria-hidden=true>&nbsp;</i>\n", indent # Top left corner
  i = earliest
  r = 2
  while (i < latest) {
    for (j in location)
      printf "%s<i class=\"r%02d c%02d\" aria-hidden=true>&nbsp;</i>\n", indent, r, j + 1
    i += 15 * 60
    r++
  }
  printf "%s<br>\n", indent

  # Print all slots in the matrix.
  # We first sort the events.
  asort(events)
  for (i = 1; i <= k; i++) {
    split(events[i], a, /\a/)	# a[2] = row, a[3] = column, a[4] is event nr.
    print_event(a[4], a[2] + 2, a[3] + 1, locallinks, starttime, indent)
  }

  # # Make a row along the bottom of the grid with all locations used.
  # for (i = 1; i <= length(location); i++)
  #    printf "%s<b class=\"r%02d c%02d\">%s</b>\n", indent,
  #      latest - earliest + 2, i + 1, markdown::to_inline_html(location[i])

  # printf "<print_calendar(%s)\n", tag > "/dev/stderr"
}


# print_legend -- print the HTML for the icon legends for a day (YYYYMMDD)
function print_legend(tag, starttime, endtime, indent,
		      i, j, n, found, class, legend, label)
{
  # If tag is set, it filters the events to those with this tag. If
  # starttime and endtime are > 0, only events that start between
  # starttime and endtime are considered.

  # printf ">print_legend(%s, %s, %s)\n", tag,
  #   strftime("%Y%m%dT%H%M%S", starttime),
  #   strftime("%Y%m%dT%H%M%S", endtime) > "/dev/stderr"

  # Find all events that start between starttime and endtime and that
  # belong to track or ecosystem "tag" (if tag is not empty). Store
  # their indexes in the array "found".
  for (n in event_dtstart)
    if (event_dtstart[n] >= starttime &&
	event_dtstart[n] < endtime &&
	filter(n, tag))
      found[n] = 1

  # Collect all track types of the found events in the array "legend".
  for (n in found) {
    delete class

    # Add classes from extra_tags, if any. The key of the extra_tags
    # array is the UID. And if there is a value, it starts with a "\a"
    # (BEL).
    for (i = split(extra_tags[event_uid[n]], x, /\a/); i > 1; i--)
      class[x[i]] = 1

    # If any of the attendees are groups that belong to known industry
    # tracks ("ecosystems"), add those tracks to the class attribute.
    # event_attendees[n] is a list of names where each name starts
    # with \a.
    for (i = split(event_attendees[n], x, /\a/); i > 1; i--)
    if (x[i] in group_track_membership)
      for (j in group_track_membership[x[i]]) class[j] = 1

    # If any of the keywords in the class is a special tag, add it to
    # the legend to print.
    for (i in class) if (i in icon) legend[i] = 1

    # printf " print_legend event=%d class=", n > "/dev/stderr"
    # for (i in class) printf "%s ", i > "/dev/stderr"