CSS_MPC_toolkit/app/discovery_stats.py at main · rlseaman/CSS_MPC_toolkit · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
NEO Discovery Statistics — Interactive Dash Explorer

Stacked bar chart of NEO discoveries by year and survey, with drill-down
by size class, survey grouping, and cumulative views.  Data sourced from
the MPC/SBN PostgreSQL database (mpc_orbits + obs_sbn).

Usage:
    source venv/bin/activate.csh
    python app/discovery_stats.py

Then open http://127.0.0.1:8050/ in a browser.
"""

import hashlib
import os
import sys
import threading
import time

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

# Must be set before lib imports so their module-level cache ages see it
if "--serve-only" in sys.argv:
    os.environ["CSS_SERVE_ONLY"] = "1"

import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio
import dash
from dash import ALL, Dash, Input, Output, State, ctx, dcc, html, no_update
from dash.dcc import send_data_frame
from dash.exceptions import PreventUpdate
from plotly.subplots import make_subplots

from lib.db import connect, timed_query
from lib.mpec_parser import (fetch_recent_mpecs, fetch_mpec_detail,
                              mpec_id_to_url, lookup_mpecs_by_designation)
from mpc_designation import pack as pack_designation, unpack as unpack_designation
from lib.nea_catalog import load_nea_h_lookup
from lib.pha_catalog import load_pha_set
from lib.sbdb_moid import load_sbdb_moid_lookup
from lib.solar import (sun_altitude, classify_twilight,
                       _observer_latitude, TWILIGHT_ORDER)
from lib.identifications import resolve_designation
from lib.api_clients import (
    fetch_sbdb, fetch_sentry, fetch_neofixer_orbit, fetch_neocc_risk,
    fetch_neofixer_ephem, fetch_neofixer_ades, resolve_mps_url,
    _load_mps_bundles,
)
try:
    from lib.api_clients import check_service_health
except ImportError:
    def check_service_health():
        return {"NEOfixer": False, "MPC": True, "JPL": False,
                "Sentry": False, "NEOCC": False}

# ---------------------------------------------------------------------------
# Data constants
# ---------------------------------------------------------------------------

# Cache file includes a hash of the SQL so it auto-invalidates on query changes
_APP_DIR = os.path.dirname(os.path.abspath(__file__))

# Station code -> readable name (top discovery sites only)
STATION_NAMES = {
    "703": "Catalina",
    "G96": "Mt. Lemmon",
    "E12": "Siding Spring",
    "I52": "Mt. Lemmon-Steward",
    "V06": "CSS-Kuiper",
    "G84": "Mt. Lemmon SkyCenter",
    "693": "Catalina Station",
    "V00": "Kitt Peak-Bok",
    "X05": "Rubin",
    "F51": "Pan-STARRS 1",
    "F52": "Pan-STARRS 2",
    "T05": "ATLAS-HKO",
    "T08": "ATLAS-MLO",
    "T03": "ATLAS-Sutherland",
    "M22": "ATLAS-El Sauce",
    "W68": "ATLAS-Río Hurtado",
    "R17": "ATLAS-TDO",
    "704": "LINEAR",
    "699": "LONEOS",
    "691": "Spacewatch",
    "291": "Spacewatch II",
    "644": "NEAT-Palomar",
    "608": "NEAT-Haleakala",
    "I41": "ZTF",
    "C51": "WISE/NEOWISE",
    "W84": "DECam",
    "U68": "SynTrack",
    "U74": "SynTrack 2",
    "675": "Palomar Mountain",
    "K88": "GINOP-KHK",
    "W94": "MAPS",
    "L51": "MARGO",
    "W16": "Pleasant Groves",
    "O18": "WFST",
    "N94": "Altay",
    "L87": "Moonbase South",
}

# Station code -> project (for grouped view)
# Core groupings match CNEOS site_all.json definitions.
# Extended with Palomar Mountain, Independent Surveys (dynamic in
# yearly_breakdown.py, static here), Historical, Other Surveys,
# and Other Follow-up.
STATION_TO_PROJECT = {
    "704": "LINEAR", "G45": "LINEAR", "P07": "LINEAR",
    "566": "NEAT", "608": "NEAT", "644": "NEAT",
    "691": "Spacewatch", "291": "Spacewatch",
    "699": "LONEOS",
    "703": "Catalina Survey", "E12": "Catalina Survey",
    "G96": "Catalina Survey",
    "I52": "Catalina Follow-up", "V06": "Catalina Follow-up",
    "G84": "Catalina Follow-up",
    "693": "Catalina Survey",
    "V00": "Bok NEO Survey",
    "F51": "Pan-STARRS", "F52": "Pan-STARRS",
    "C51": "NEOWISE",
    "T05": "ATLAS", "T07": "ATLAS", "T08": "ATLAS",
    "T03": "ATLAS", "M22": "ATLAS", "W68": "ATLAS", "R17": "ATLAS",
    "X05": "Rubin/LSST",
    "I41": "Other-US", "U68": "Other-US", "U74": "Other-US",
    "W84": "Other-US",
    # Palomar Mountain — single station, historical significance
    "675": "Palomar Mountain",
    # Independent Surveys — active NEO discoverers not affiliated
    # with a major survey project (>10 NEOs in recent years)
    "K88": "Independent Surveys", "W94": "Independent Surveys",
    "L51": "Independent Surveys", "W16": "Independent Surveys",
    "O18": "Independent Surveys", "N94": "Independent Surveys",
    "L87": "Independent Surveys",
    # Historical — stations whose last NEO discovery was 1999 or earlier
    "010": "Historical", "024": "Historical", "026": "Historical",
    "029": "Historical", "045": "Historical", "046": "Historical",
    "071": "Historical", "074": "Historical", "078": "Historical",
    "104": "Historical", "120": "Historical", "327": "Historical",
    "372": "Historical", "385": "Historical", "391": "Historical",
    "399": "Historical", "400": "Historical", "402": "Historical",
    "411": "Historical", "500": "Historical", "511": "Historical",
    "548": "Historical", "561": "Historical", "595": "Historical",
    "662": "Historical", "688": "Historical", "690": "Historical",
    "734": "Historical", "760": "Historical", "805": "Historical",
    "808": "Historical", "883": "Historical", "888": "Historical",
    "896": "Historical", "910": "Historical",
    # Other Surveys — stations with NEO discoveries from the MPC
    # YearlyBreakdown page, not in a named project above
    "012": "Other Surveys", "033": "Other Surveys",
    "049": "Other Surveys", "069": "Other Surveys",
    "095": "Other Surveys", "106": "Other Surveys",
    "113": "Other Surveys", "114": "Other Surveys",
    "118": "Other Surveys", "119": "Other Surveys",
    "152": "Other Surveys", "185": "Other Surveys",
    "198": "Other Surveys", "221": "Other Surveys",
    "240": "Other Surveys", "246": "Other Surveys",
    "247": "Other Surveys", "290": "Other Surveys",
    "300": "Other Surveys", "304": "Other Surveys",
    "309": "Other Surveys", "333": "Other Surveys",
    "381": "Other Surveys", "408": "Other Surveys",
    "413": "Other Surveys", "428": "Other Surveys",
    "446": "Other Surveys", "461": "Other Surveys",
    "493": "Other Surveys", "557": "Other Surveys",
    "568": "Other Surveys", "599": "Other Surveys",
    "620": "Other Surveys", "621": "Other Surveys",
    "661": "Other Surveys", "673": "Other Surveys",
    "678": "Other Surveys", "683": "Other Surveys",
    "695": "Other Surveys", "705": "Other Surveys",
    "807": "Other Surveys", "809": "Other Surveys",
    "823": "Other Surveys", "858": "Other Surveys",
    "926": "Other Surveys", "941": "Other Surveys",
    "950": "Other Surveys",
    "A44": "Other Surveys", "A50": "Other Surveys",
    "A77": "Other Surveys", "B01": "Other Surveys",
    "B74": "Other Surveys",
    "C41": "Other Surveys", "C55": "Other Surveys",
    "C57": "Other Surveys", "C85": "Other Surveys",
    "C94": "Other Surveys", "C95": "Other Surveys",
    "D00": "Other Surveys", "D29": "Other Surveys",
    "D35": "Other Surveys",
    "F84": "Other Surveys",
    "G03": "Other Surveys", "G32": "Other Surveys",
    "G37": "Other Surveys", "G78": "Other Surveys",
    "G89": "Other Surveys", "G92": "Other Surveys",
    "H15": "Other Surveys", "H21": "Other Surveys",
    "H27": "Other Surveys", "H36": "Other Surveys",
    "H55": "Other Surveys",
    "I08": "Other Surveys", "I16": "Other Surveys",
    "I93": "Other Surveys",
    "J04": "Other Surveys", "J13": "Other Surveys",
    "J43": "Other Surveys", "J75": "Other Surveys",
    "K19": "Other Surveys", "K95": "Other Surveys",
    "L96": "Other Surveys",
    "M11": "Other Surveys", "M57": "Other Surveys",
    "N56": "Other Surveys", "N86": "Other Surveys",
    "N87": "Other Surveys", "N89": "Other Surveys",
    "O75": "Other Surveys",
    "Q57": "Other Surveys", "Q60": "Other Surveys",
    "Q62": "Other Surveys", "Q66": "Other Surveys",
    "T09": "Other Surveys", "T14": "Other Surveys",
    "U63": "Other Surveys",
    "V03": "Other Surveys", "V11": "Other Surveys",
    "W57": "Other Surveys", "W76": "Other Surveys",
    "W86": "Other Surveys", "W93": "Other Surveys",
    "W95": "Other Surveys",
    "X07": "Other Surveys", "X19": "Other Surveys",
    "X74": "Other Surveys",
    "Y00": "Other Surveys", "Y01": "Other Surveys",
    "Y05": "Other Surveys", "Y66": "Other Surveys",
    "Y89": "Other Surveys",
    "Z84": "Other Surveys",
}
# Any station not in STATION_TO_PROJECT falls into "Other Follow-up"

# Reverse mapping: project -> list of station codes
PROJECT_STATIONS = {}
for _stn, _proj in STATION_TO_PROJECT.items():
    PROJECT_STATIONS.setdefault(_proj, []).append(_stn)

# Station-level color: inherit from parent project
STATION_COLORS = {stn: None for stn in STATION_TO_PROJECT}  # placeholder
# (filled after PROJECT_COLORS is defined below)

# Stacking order matches CNEOS (bottom to top in the bar chart).
# Plotly stacks traces in list order, so first entry = bottom of stack.
PROJECT_ORDER = [
    "LINEAR",
    "NEAT",
    "Spacewatch",
    "LONEOS",
    "Catalina Survey",
    "Catalina Follow-up",
    "Pan-STARRS",
    "NEOWISE",
    "ATLAS",
    "Bok NEO Survey",
    "Rubin/LSST",
    "Other-US",
    "Palomar Mountain",
    "Independent Surveys",
    "Historical",
    "Other Surveys",
    "Other Follow-up",
]

# Colors match CNEOS site_all.json exactly for core groups.
# Extended groups use distinguishable muted tones.
PROJECT_COLORS = {
    "LINEAR": "#4363d8",
    "NEAT": "#f58231",
    "Spacewatch": "#e6194B",
    "LONEOS": "#ffe119",
    "Catalina Survey": "#3cb44b",
    "Catalina Follow-up": "#aaffc3",
    "Pan-STARRS": "#f032e6",
    "NEOWISE": "#469990",
    "ATLAS": "#42d4f4",
    "Bok NEO Survey": "#dcbeff",
    "Rubin/LSST": "#800000",
    "Other-US": "#9A6324",
    "Palomar Mountain": "#e6beff",
    "Independent Surveys": "#fabebe",
    "Historical": "#c0c0c0",
    "Other Surveys": "#d4bc9a",
    "Other Follow-up": "#b0b0b0",
}

# Fill station-level colors from their parent project
for _stn, _proj in STATION_TO_PROJECT.items():
    STATION_COLORS[_stn] = PROJECT_COLORS.get(_proj, "#a9a9a9")
STATION_COLORS["Other Follow-up"] = PROJECT_COLORS["Other Follow-up"]

# Ordered station list for dropdown (excluding grouped categories)
_SURVEY_STATIONS = []
for _proj in ["LINEAR", "NEAT", "Spacewatch", "LONEOS",
              "Catalina Survey", "Pan-STARRS", "NEOWISE",
              "ATLAS", "Bok NEO Survey", "Rubin/LSST", "Other-US",
              "Palomar Mountain", "Independent Surveys"]:
    for _stn in sorted(PROJECT_STATIONS.get(_proj, [])):
        _SURVEY_STATIONS.append(_stn)

# H magnitude size classes (standard p_v = 0.14 boundaries)
H_BINS = [
    ("H < 17.75 (~1 km+)", None, 17.75),
    ("17.75 \u2264 H < 22 (~140 m\u20131 km)", 17.75, 22),
    ("22 \u2264 H < 24.25 (~50\u2013140 m)", 22, 24.25),
    ("24.25 \u2264 H < 27.75 (~10\u201350 m)", 24.25, 27.75),
    ("H \u2265 27.75 (< 10 m)", 27.75, None),
]

# Colors for size-class stacking (viridis palette, matching size histogram)
SIZE_COLORS = ["#440154", "#31688e", "#35b779", "#90d743", "#fde725"]

# ---------------------------------------------------------------------------
# Ecliptic and galactic plane coordinates for sky map overlays
# ---------------------------------------------------------------------------

# Ecliptic plane: parametric (RA, Dec) from ecliptic longitude 0→360°
_ECL_LON = np.linspace(0, 360, 361)
_OBLIQUITY = 23.44  # degrees
_ECL_RA_360 = np.degrees(np.arctan2(
    np.sin(np.radians(_ECL_LON)) * np.cos(np.radians(_OBLIQUITY)),
    np.cos(np.radians(_ECL_LON)),
)) % 360
_ECL_DEC = np.degrees(np.arcsin(
    np.sin(np.radians(_ECL_LON)) * np.sin(np.radians(_OBLIQUITY))
))

# Galactic plane (b=0): standard J2000 rotation matrix (Hipparcos/IAU).
# Columns are galactic x̂, ŷ, ẑ (=NGP) basis vectors in equatorial coords.
# Verified: GC at (266.4°, -28.9°), NGP at (192.86°, 27.13°).
_R_GAL_TO_EQ = np.array([
    [-0.05487554,  0.49410943, -0.86766615],
    [-0.87343711, -0.44482963, -0.19807637],
    [-0.48383502,  0.74698224,  0.45598378],
])
_GAL_L_RAD = np.radians(np.linspace(0, 360, 361))
_gal_xyz = np.vstack([np.cos(_GAL_L_RAD), np.sin(_GAL_L_RAD),
                       np.zeros_like(_GAL_L_RAD)])
_eq_xyz = _R_GAL_TO_EQ @ _gal_xyz
_GAL_DEC = np.degrees(np.arcsin(np.clip(_eq_xyz[2], -1, 1)))
_GAL_RA_360 = np.degrees(np.arctan2(_eq_xyz[1], _eq_xyz[0])) % 360

# Convert RA from [0,360) to centered (-180,180] for sky map display
# Convention: 180° (East) on left, 0° center, -180° (West) on right


def _ra_to_centered(ra):
    """Map RA from [0, 360) to (-180, 180]: values > 180 become negative."""
    return np.where(ra > 180, ra - 360, ra)


def _split_at_wraparound(ra, dec, threshold=90):
    """Insert NaN where RA jumps by more than threshold degrees."""
    out_ra, out_dec = [ra[0]], [dec[0]]
    for i in range(1, len(ra)):
        if abs(ra[i] - ra[i - 1]) > threshold:
            out_ra.append(np.nan)
            out_dec.append(np.nan)
        out_ra.append(ra[i])
        out_dec.append(dec[i])
    return np.array(out_ra), np.array(out_dec)


ECL_RA, ECL_DEC = _split_at_wraparound(
    _ra_to_centered(_ECL_RA_360), _ECL_DEC)
GAL_RA, GAL_DEC = _split_at_wraparound(
    _ra_to_centered(_GAL_RA_360), _GAL_DEC)

# ---------------------------------------------------------------------------
# NEOMOD3 population model (Nesvorny et al. 2024, Icarus 411, Table 3)
# Half-magnitude bins: (H1, H2, dN, N_cumulative, N_min, N_max)
# dN = estimated NEOs in bin; N = cumulative N(H < H2)
# N_min/N_max = 1-sigma bounds on cumulative
# ---------------------------------------------------------------------------

NEOMOD3_BINS = [
    # H1      H2     dN       N(H2)    N_min    N_max
    (15.25, 15.75,    61,      130,      124,      137),
    (15.75, 16.25,   104,      234,      219,      250),
    (16.25, 16.75,   156,      390,      365,      416),
    (16.75, 17.25,   218,      608,      579,      639),
    (17.25, 17.75,   328,      936,      898,      977),
    (17.75, 18.25,   513,     1450,     1400,     1510),
    (18.25, 18.75,   790,     2240,     2170,     2320),
    (18.75, 19.25,  1170,     3410,     3310,     3500),
    (19.25, 19.75,  1640,     5050,     4920,     5170),
    (19.75, 20.25,  2160,     7210,     7030,     7370),
    (20.25, 20.75,  2720,     9920,     9700,    10100),
    (20.75, 21.25,  3500,    13400,    13100,    13700),
    (21.25, 21.75,  4710,    18100,    17800,    18500),
    (21.75, 22.25,  6730,    24900,    24400,    25400),
    (22.25, 22.75, 10400,    35300,    34500,    36000),
    (22.75, 23.25, 17300,    52500,    51400,    53600),
    (23.25, 23.75, 31100,    83600,    81800,    85300),
    (23.75, 24.25, 60800,   144000,   142000,   147000),
    (24.25, 24.75,121000,   266000,   260000,   272000),
    (24.75, 25.25,229000,   494000,   482000,   506000),
    (25.25, 25.75,411000,   905000,   882000,   928000),
    (25.75, 26.25,728000,  1630000,  1590000,  1680000),
    (26.25, 26.75,1290000, 2920000,  2840000,  3000000),
    (26.75, 27.25,2250000, 5170000,  5000000,  5340000),
    (27.25, 27.75,3950000, 9120000,  8750000,  9490000),
]

NEOMOD3_DF = pd.DataFrame(
    NEOMOD3_BINS,
    columns=["h1", "h2", "dn_model", "n_cumul", "n_min", "n_max"],
)
NEOMOD3_DF["h_center"] = (NEOMOD3_DF["h1"] + NEOMOD3_DF["h2"]) / 2
NEOMOD3_DF["bin_label"] = NEOMOD3_DF.apply(
    lambda r: f"{r['h1']:.2f}\u2013{r['h2']:.2f}", axis=1
)

# Half-magnitude bin edges for digitizing discovered NEO H values
H_BIN_EDGES = np.arange(15.25, 28.25, 0.5)
H_BIN_CENTERS = (H_BIN_EDGES[:-1] + H_BIN_EDGES[1:]) / 2

# ---------------------------------------------------------------------------
# Size reference lines: H magnitude for selected diameter thresholds
# Standard uses fixed p_v = 0.14 (Harris & Chodas 2021).
# NEOMOD3 uses size-dependent debiased albedos (Nesvorny et al. 2024,
#   arXiv:2404.18805): p_v,ref ~ 0.15 for H<18, ~0.16 for 18<H<22,
#   ~0.18 for H>22.
# ---------------------------------------------------------------------------
SIZE_REFS = {
    "standard": [
        (16.25, "2 km"),
        (17.75, "1 km"),
        (19.25, "500 m"),
        (20.75, "250 m"),
        (22.0,  "140 m"),
        (22.75, "100 m"),
        (24.25, "50 m"),
        (26.25, "20 m"),
        (27.75, "10 m"),
    ],
    "neomod3": [
        (16.2, "2 km"),
        (17.7, "1 km"),
        (19.1, "500 m"),
        (20.6, "250 m"),
        (21.9, "140 m"),
        (22.5, "100 m"),
        (24.0, "50 m"),
        (26.0, "20 m"),
        (27.5, "10 m"),
    ],
}
# Index of the 140m entry in SIZE_REFS lists (for completeness annotation)
_140M_IDX = 4

# ---------------------------------------------------------------------------
# Theme helpers
# ---------------------------------------------------------------------------

THEMES = {
    "dark": dict(
        template="plotly_dark",
        paper="#1e1e1e",
        plot="#1e1e1e",
        page="#1e1e1e",
        text="#e0e0e0",
        subtext="#888888",
        control_text="#cccccc",
        input_text="#dddddd",
        mark_color="#aaaaaa",
        table_header="#333333",
        table_cell="#1e1e1e",
        table_font="#dddddd",
        model_outline="white",
        hr_color="#444444",
    ),
    "light": dict(
        template="plotly_white",
        paper="white",
        plot="white",
        page="#f5f5f5",
        text="#222222",
        subtext="#555555",
        control_text="#333333",
        input_text="#222222",
        mark_color="#555555",
        table_header="#e0e0e0",
        table_cell="#ffffff",
        table_font="#222222",
        model_outline="#333333",
        hr_color="#cccccc",
    ),
}


def theme(name):
    return THEMES.get(name, THEMES["light"])


# Customize Plotly templates: dark-mode hover labels and spike lines
pio.templates["plotly_dark"].layout.update(
    hoverlabel=dict(bgcolor="#2a2a2a", font_color="#e0e0e0",
                    bordercolor="#555555"),
    xaxis_spikecolor="#888888",
    yaxis_spikecolor="#888888",
)
pio.templates["plotly_white"].layout.update(
    hoverlabel=dict(bgcolor="white", font_color="#222222",
                    bordercolor="#cccccc"),
    xaxis_spikecolor="#999999",
    yaxis_spikecolor="#999999",
)


# Plotly modebar config — enable PNG download with 2x resolution
GRAPH_CONFIG = {
    "toImageButtonOptions": {
        "format": "png",
        "scale": 2,
    },
    "displaylogo": False,
}

# ---------------------------------------------------------------------------
# Data loading
# ---------------------------------------------------------------------------

LOAD_SQL = """
WITH neo_list AS (
    SELECT
        mo.packed_primary_provisional_designation AS packed_desig,
        mo.unpacked_primary_provisional_designation AS unpacked_desig,
        ni.permid IS NOT NULL AS is_numbered,
        ni.permid AS asteroid_number,
        CASE WHEN ni.permid IS NULL
             THEN mo.unpacked_primary_provisional_designation
        END AS provisional_desig,
        ni.unpacked_primary_provisional_designation AS num_provid,
        mo.h,
        mo.orbit_type_int,
        mo.q, mo.e, mo.i
    FROM mpc_orbits mo
    LEFT JOIN numbered_identifications ni
        ON ni.packed_primary_provisional_designation
         = mo.packed_primary_provisional_designation
    WHERE mo.q <= 1.30
),
discovery_obs_all AS (
    SELECT neo.unpacked_desig, obs.stn, obs.obsid, obs.trkid, obs.obstime
    FROM neo_list neo
    INNER JOIN obs_sbn obs ON obs.permid = neo.asteroid_number
    WHERE neo.is_numbered AND obs.disc = '*'
    UNION ALL
    SELECT neo.unpacked_desig, obs.stn, obs.obsid, obs.trkid, obs.obstime
    FROM neo_list neo
    INNER JOIN obs_sbn obs ON obs.provid = neo.provisional_desig
    WHERE NOT neo.is_numbered AND obs.disc = '*'
    UNION ALL
    SELECT neo.unpacked_desig, obs.stn, obs.obsid, obs.trkid, obs.obstime
    FROM neo_list neo
    INNER JOIN obs_sbn obs ON obs.provid = neo.num_provid
    WHERE neo.num_provid IS NOT NULL AND obs.disc = '*'
),
discovery_info AS (
    SELECT DISTINCT ON (unpacked_desig)
        unpacked_desig, stn, obsid, NULLIF(trkid, '') AS trkid, obstime
    FROM discovery_obs_all
    ORDER BY unpacked_desig, obstime
),
tracklet_obs_all AS (
    SELECT di.unpacked_desig, obs.obstime, obs.ra, obs.dec, obs.mag, obs.band
    FROM discovery_info di
    INNER JOIN obs_sbn obs ON obs.trkid = di.trkid
    WHERE di.trkid IS NOT NULL
      AND ABS(EXTRACT(EPOCH FROM (obs.obstime - di.obstime))) / 3600.0 <= 12.0
    UNION ALL
    SELECT di.unpacked_desig, obs.obstime, obs.ra, obs.dec, obs.mag, obs.band
    FROM discovery_info di
    INNER JOIN obs_sbn obs ON obs.obsid = di.obsid
    WHERE di.trkid IS NULL
),
discovery_tracklet_stats AS (
    SELECT
        unpacked_desig,
        AVG(ra) AS avg_ra_deg,
        AVG(dec) AS avg_dec_deg,
        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY
            mag + CASE band
                WHEN 'V' THEN 0.0
                WHEN 'v' THEN 0.0
                WHEN 'B' THEN -0.8
                WHEN 'U' THEN -1.3
                WHEN 'R' THEN 0.4
                WHEN 'I' THEN 0.8
                WHEN 'g' THEN -0.35
                WHEN 'r' THEN 0.14
                WHEN 'i' THEN 0.32
                WHEN 'z' THEN 0.26
                WHEN 'y' THEN 0.32
                WHEN 'u' THEN 2.5
                WHEN 'w' THEN -0.13
                WHEN 'c' THEN -0.05
                WHEN 'o' THEN 0.33
                WHEN 'G' THEN 0.28
                WHEN 'J' THEN 1.2
                WHEN 'H' THEN 1.4
                WHEN 'K' THEN 1.7
                WHEN 'C' THEN 0.4
                WHEN 'W' THEN 0.4
                WHEN 'L' THEN 0.2
                WHEN 'Y' THEN 0.7
                WHEN '' THEN -0.8
                ELSE 0.0
            END
        ) FILTER (WHERE mag IS NOT NULL) AS median_v_mag,
        COUNT(*) AS nobs,
        EXTRACT(EPOCH FROM (MAX(obstime) - MIN(obstime))) / 86400.0
            AS span_days,
        (array_agg(ra  ORDER BY obstime ASC))[1]  AS first_ra,
        (array_agg(dec ORDER BY obstime ASC))[1]  AS first_dec,
        (array_agg(ra  ORDER BY obstime DESC))[1] AS last_ra,
        (array_agg(dec ORDER BY obstime DESC))[1] AS last_dec
    FROM tracklet_obs_all
    GROUP BY unpacked_desig
)
SELECT
    di.unpacked_desig AS designation,
    EXTRACT(YEAR FROM di.obstime)::int AS disc_year,
    EXTRACT(MONTH FROM di.obstime)::int AS disc_month,
    di.obstime::date AS disc_date,
    di.obstime AS disc_obstime,
    di.stn AS station_code,
    neo.h,
    neo.orbit_type_int,
    neo.q, neo.e, neo.i,
    oc.longitude::double precision AS stn_longitude,
    oc.rhocosphi::double precision AS stn_rhocosphi,
    oc.rhosinphi::double precision AS stn_rhosinphi,
    oc.observations_type AS stn_type,
    dts.avg_ra_deg,
    dts.avg_dec_deg,
    dts.median_v_mag,
    dts.nobs AS tracklet_nobs,
    CASE WHEN dts.span_days > 0 THEN
        2.0 * DEGREES(ASIN(SQRT(
            SIN(RADIANS(dts.last_dec - dts.first_dec) / 2.0) ^ 2
            + COS(RADIANS(dts.first_dec)) * COS(RADIANS(dts.last_dec))
              * SIN(RADIANS(dts.last_ra - dts.first_ra) / 2.0) ^ 2
        ))) / dts.span_days
    END AS rate_deg_per_day,
    CASE WHEN dts.span_days > 0 THEN
        (360.0 + DEGREES(ATAN2(
            SIN(RADIANS(dts.last_ra - dts.first_ra))
                * COS(RADIANS(dts.last_dec)),
            COS(RADIANS(dts.first_dec)) * SIN(RADIANS(dts.last_dec))
            - SIN(RADIANS(dts.first_dec)) * COS(RADIANS(dts.last_dec))
              * COS(RADIANS(dts.last_ra - dts.first_ra))
        )))::numeric % 360.0
    END AS position_angle_deg
FROM discovery_info di
JOIN neo_list neo ON neo.unpacked_desig = di.unpacked_desig
LEFT JOIN discovery_tracklet_stats dts
    ON dts.unpacked_desig = di.unpacked_desig
LEFT JOIN obscodes oc ON oc.obscode = di.stn
ORDER BY di.obstime
"""


APPARITION_SQL = """
WITH neo_list AS MATERIALIZED (
    SELECT
        mo.unpacked_primary_provisional_designation AS unpacked_desig,
        ni.permid IS NOT NULL AS is_numbered,
        ni.permid AS asteroid_number,
        CASE WHEN ni.permid IS NULL
             THEN mo.unpacked_primary_provisional_designation
        END AS provisional_desig,
        ni.unpacked_primary_provisional_designation AS num_provid
    FROM mpc_orbits mo
    LEFT JOIN numbered_identifications ni
        ON ni.packed_primary_provisional_designation
         = mo.packed_primary_provisional_designation
    WHERE mo.q <= 1.30
),
discovery_obs_all AS (
    SELECT neo.unpacked_desig, obs.stn, obs.obstime
    FROM neo_list neo
    INNER JOIN obs_sbn obs ON obs.permid = neo.asteroid_number
    WHERE neo.is_numbered AND obs.disc = '*'
    UNION ALL
    SELECT neo.unpacked_desig, obs.stn, obs.obstime
    FROM neo_list neo
    INNER JOIN obs_sbn obs ON obs.provid = neo.provisional_desig
    WHERE NOT neo.is_numbered AND obs.disc = '*'
    UNION ALL
    SELECT neo.unpacked_desig, obs.stn, obs.obstime
    FROM neo_list neo
    INNER JOIN obs_sbn obs ON obs.provid = neo.num_provid
    WHERE neo.num_provid IS NOT NULL AND obs.disc = '*'
),
discovery_info AS (
    SELECT DISTINCT ON (unpacked_desig)
        unpacked_desig, stn, obstime
    FROM discovery_obs_all
    ORDER BY unpacked_desig, obstime
),
neo_discovery AS MATERIALIZED (
    SELECT
        di.unpacked_desig AS designation,
        di.obstime AS disc_obstime,
        neo.asteroid_number,
        COALESCE(neo.provisional_desig, neo.num_provid) AS provid_key
    FROM discovery_info di
    JOIN neo_list neo ON neo.unpacked_desig = di.unpacked_desig
)
SELECT nd.designation, o.station_code, nd.disc_obstime,
       o.first_obs, o.first_post_disc
FROM neo_discovery nd
CROSS JOIN LATERAL (
    SELECT stn AS station_code,
           MIN(obstime) AS first_obs,
           MIN(CASE WHEN obstime >= nd.disc_obstime
                    THEN obstime END) AS first_post_disc
    FROM obs_sbn
    WHERE (permid = nd.asteroid_number OR provid = nd.provid_key)
      AND obstime BETWEEN nd.disc_obstime - INTERVAL '200 days'
                    AND nd.disc_obstime + INTERVAL '200 days'
    GROUP BY stn
) o
"""

# ---------------------------------------------------------------------------
# Boxscore: object catalog from mpc_orbits (all objects, ~1.5M rows)
# ---------------------------------------------------------------------------
BOXSCORE_SQL = """
SELECT
    mo.unpacked_primary_provisional_designation AS provid,
    mo.packed_primary_provisional_designation AS packed_provid,
    ni.permid AS permid,
    mo.orbit_type_int,
    mo.q::double precision,
    mo.e::double precision,
    mo.i::double precision,
    CASE WHEN mo.e < 1 THEN (mo.q / (1 - mo.e))::double precision END AS a,
    mo.h::double precision,
    mo.earth_moid::double precision,
    mo.epoch_mjd::double precision,
    mo.arc_length_total::integer,
    mo.nobs_total::integer
FROM mpc_orbits mo
LEFT JOIN numbered_identifications ni
    ON ni.packed_primary_provisional_designation
     = mo.packed_primary_provisional_designation
"""

CACHE_MAX_AGE_SEC = 86400  # 1 day

# Parse flags once at import time (prevent reloader from re-querying)
_REFRESH_ONLY = "--refresh-only" in sys.argv
if _REFRESH_ONLY:
    sys.argv.remove("--refresh-only")
_FORCE_REFRESH = _REFRESH_ONLY or "--refresh" in sys.argv
if "--refresh" in sys.argv:
    sys.argv.remove("--refresh")
_HOST = "127.0.0.1"
if "--host" in sys.argv:
    idx = sys.argv.index("--host")
    _HOST = sys.argv[idx + 1]
    del sys.argv[idx:idx + 2]
_SERVE_ONLY = "--serve-only" in sys.argv
if _SERVE_ONLY:
    sys.argv.remove("--serve-only")
_DEBUG = "--debug" in sys.argv
if _DEBUG:
    sys.argv.remove("--debug")
_MAINTENANCE_MSG = None
if "--maintenance" in sys.argv:
    idx = sys.argv.index("--maintenance")
    if idx + 1 < len(sys.argv) and not sys.argv[idx + 1].startswith("--"):
        _MAINTENANCE_MSG = sys.argv[idx + 1]
        del sys.argv[idx:idx + 2]
    else:
        _MAINTENANCE_MSG = "System maintenance in progress. Some features may be temporarily unavailable."
        del sys.argv[idx:idx + 1]


def _cache_refresh_label():
    """Return 'Caches refreshed <UTC timestamp>' for the oldest cache
    parquet on disk, or an empty string if no caches exist.  Oldest is
    the right ceiling — the dashboard can be no fresher than its slowest
    cache.  Rsync -a preserves mtimes, so the Mini reports MBP query time.
    """
    import glob
    from datetime import datetime, timezone
    patterns = [
        os.path.join(_APP_DIR, ".neo_cache_*.parquet"),
        os.path.join(_APP_DIR, ".apparition_cache_*.parquet"),
        os.path.join(_APP_DIR, ".boxscore_cache_*.parquet"),
    ]
    mtimes = [os.path.getmtime(p) for pat in patterns for p in glob.glob(pat)]
    if not mtimes:
        return ""
    stamp = datetime.fromtimestamp(min(mtimes), tz=timezone.utc)
    return f"Caches refreshed {stamp.strftime('%Y-%m-%d %H:%M UTC')}"


def _load_cached_query(sql, prefix, label):
    """Load query result from cache file or database.

    Returns (DataFrame, meta_file_path).
    Uses Parquet format for compact storage and fast loads.
    Falls back to legacy CSV cache if Parquet not yet generated.
    """
    sql_hash = hashlib.md5(sql.encode()).hexdigest()[:8]
    cache_file = os.path.join(_APP_DIR, f".{prefix}_{sql_hash}.parquet")
    meta_file = os.path.join(
        _APP_DIR, f".{prefix}_{sql_hash}.meta")
    legacy_csv = os.path.join(_APP_DIR, f".{prefix}_{sql_hash}.csv")

    use_cache = False
    if _SERVE_ONLY:
        # --serve-only: always use existing cache, never query DB
        if os.path.exists(cache_file):
            use_cache = True
            age = time.time() - os.path.getmtime(cache_file)
            print(f"Loading cached {label} from {cache_file} "
                  f"(age: {age/3600:.1f} h, serve-only)")
        elif os.path.exists(legacy_csv):
            use_cache = True
            cache_file = legacy_csv
            age = time.time() - os.path.getmtime(legacy_csv)
            print(f"Loading legacy CSV cache for {label} "
                  f"(age: {age/3600:.1f} h, serve-only)")
        else:
            raise RuntimeError(
                f"--serve-only: no cache found for {label}. "
                "Run deploy_to_mini.sh to sync caches first.")
    elif not _FORCE_REFRESH and os.path.exists(cache_file):
        age = time.time() - os.path.getmtime(cache_file)
        if age < CACHE_MAX_AGE_SEC:
            use_cache = True
            print(f"Loading cached {label} from {cache_file} "
                  f"(age: {age/3600:.1f} h)")
        else:
            print(f"{label} cache is {age/3600:.1f} h old "
                  "\u2014 refreshing")
    elif not _FORCE_REFRESH and os.path.exists(legacy_csv):
        age = time.time() - os.path.getmtime(legacy_csv)
        if age < CACHE_MAX_AGE_SEC:
            use_cache = True
            cache_file = legacy_csv
            print(f"Loading legacy CSV cache for {label} "
                  f"(age: {age/3600:.1f} h)")
        else:
            print(f"{label} legacy cache is {age/3600:.1f} h old "
                  "\u2014 refreshing")
    elif _FORCE_REFRESH:
        print(f"--refresh: re-querying {label}")

    if use_cache:
        if cache_file.endswith(".parquet"):
            return pd.read_parquet(cache_file), meta_file
        return pd.read_csv(cache_file), meta_file

    print(f"Querying database for {label}...")
    from datetime import datetime, timezone
    query_time = datetime.now(timezone.utc)
    with connect() as conn:
        result = timed_query(conn, sql, label=label)
    result.to_parquet(cache_file, index=False)
    with open(meta_file, "w") as f:
        f.write(query_time.strftime("%Y-%m-%d %H:%M UTC"))
    print(f"Cached {len(result):,} rows to {cache_file}")
    return result, meta_file


def load_data():
    """Load NEO discovery data from DB or cache (refreshed daily)."""
    raw, meta_file = _load_cached_query(
        LOAD_SQL, "neo_cache", "NEO discoveries")

    # Sanitize H magnitude: sentinel values (0, -9.99) in mpc_orbits
    # represent missing data, not real measurements.  Treat as unknown.
    raw.loc[raw["h"] <= 0, "h"] = np.nan

    # Override H with NEA.txt values where available.
    # NEA.txt is the MPC's curated NEA catalog and is currently the most
    # reliable H source.  When MPC finishes mpc_orbits cleanup, this
    # override can be removed.
    #
    # Three cases for each object:
    #   - In NEA.txt with valid H  -> use NEA.txt H
    #   - In NEA.txt with H=99.99  -> set H to NaN (unreliable)
    #   - Not in NEA.txt at all    -> NaN for asteroids; keep mpc_orbits H
    #     only for comets and borderline objects (q > 1.30) that aren't
    #     expected to appear in the curated NEA catalog
    try:
        h_lookup = load_nea_h_lookup(_APP_DIR, force_refresh=_FORCE_REFRESH)
        raw["h_mpc"] = raw["h"]  # preserve original for reference
        raw["in_nea_catalog"] = raw["designation"].isin(h_lookup)

        def _resolve_h(row):
            desig = row["designation"]
            if desig in h_lookup:
                val = h_lookup[desig]
                if val is not None:
                    return val  # NEA.txt has a valid H
                return np.nan  # NEA.txt says H=99.99 (unreliable)
            # Not in NEA.txt — keep H only for comets and borderline
            q = row.get("q", 0)
            desig_str = str(desig)
            is_comet = desig_str.startswith(("C/", "P/", "D/"))
            is_borderline = q > 1.30
            if is_comet or is_borderline:
                return row["h_mpc"]
            return np.nan  # asteroid not in NEA catalog

        raw["h_nea"] = raw.apply(_resolve_h, axis=1)
        raw["h"] = raw["h_nea"]

        n_in_nea = raw["in_nea_catalog"].sum()
        n_changed = ((raw["h"] != raw["h_mpc"])
                     | (raw["h"].isna() != raw["h_mpc"].isna())).sum()
        n_nulled = (raw["h"].isna() & raw["h_mpc"].notna()).sum()
        print(f"NEA.txt H applied: {n_in_nea:,} in catalog, "
              f"{n_changed:,} values changed, "
              f"{n_nulled:,} set to unknown")
    except Exception as e:
        print(f"Warning: NEA.txt H override skipped: {e}")

    # Derived columns
    raw["station_name"] = (raw["station_code"].map(STATION_NAMES)
                           .fillna(raw["station_code"]))
    raw["project"] = (raw["station_code"].map(STATION_TO_PROJECT)
                      .fillna("Other Follow-up"))

    def h_bin(h):
        if pd.isna(h):
            return "Unknown H"
        for label, lo, hi in H_BINS:
            if (lo is None or h >= lo) and (hi is None or h < hi):
                return label
        return "Unknown H"

    raw["size_class"] = raw["h"].apply(h_bin)

    # Compute signed solar elongation at discovery
    if "disc_date" in raw.columns and "avg_ra_deg" in raw.columns:
        obs_dates = pd.to_datetime(raw["disc_date"])
        jd_offset = (obs_dates - pd.Timestamp("2000-01-01")).dt.days.values
        T = jd_offset / 36525.0
        L0 = (280.466 + 36000.77 * T) % 360
        M = (357.529 + 35999.05 * T) % 360
        C = 1.915 * np.sin(np.radians(M))
        sun_lon = (L0 + C) % 360
        obliquity = 23.439 - 0.013 * T
        sun_ra = np.degrees(np.arctan2(
            np.cos(np.radians(obliquity)) * np.sin(np.radians(sun_lon)),
            np.cos(np.radians(sun_lon)))) % 360
        sun_dec = np.degrees(np.arcsin(
            np.sin(np.radians(obliquity)) * np.sin(np.radians(sun_lon))))
        ra = raw["avg_ra_deg"].astype(float).values
        dec = raw["avg_dec_deg"].astype(float).values
        cos_elong = (np.sin(np.radians(dec)) * np.sin(np.radians(sun_dec))
                     + np.cos(np.radians(dec)) * np.cos(np.radians(sun_dec))
                       * np.cos(np.radians(ra - sun_ra)))
        elong = np.degrees(np.arccos(np.clip(cos_elong, -1, 1)))
        dra = (ra - sun_ra + 360) % 360
        sign = np.where(dra <= 180, 1, -1)
        raw["solar_elong_deg"] = np.where(
            raw["avg_ra_deg"].notna(), sign * elong, np.nan)

    # Compute solar altitude and twilight class at discovery
    if "disc_obstime" in raw.columns and "stn_longitude" in raw.columns:
        is_sat = (raw["stn_type"] == "satellite").values
        lon = raw["stn_longitude"].values.astype(float)
        lat = _observer_latitude(
            raw["stn_rhocosphi"].values.astype(float),
            raw["stn_rhosinphi"].values.astype(float))
        alt = sun_altitude(raw["disc_obstime"], lon, lat)
        raw["sun_alt_deg"] = np.where(is_sat, np.nan,
                                      np.round(alt, 2))
        raw["twilight_class"] = classify_twilight(alt, is_sat)

    # Pre-compute half-magnitude bin index
    raw["h_bin_idx"] = np.where(
        raw["h"].notna(),
        np.digitize(raw["h"], H_BIN_EDGES) - 1,
        -1,
    ).astype(int)

    # Read query timestamp
    if os.path.exists(meta_file):
        with open(meta_file) as f:
            timestamp = f.read().strip()
    else:
        timestamp = "unknown"

    return raw, timestamp


# ---------------------------------------------------------------------------
# Apparition data: lazy-loaded on first Tab 3 access
# ---------------------------------------------------------------------------

def _postprocess_apparition(df_raw):
    """Add derived columns to station-level apparition data from SQL."""
    df_raw = df_raw.copy()
    df_raw["disc_obstime"] = pd.to_datetime(df_raw["disc_obstime"])
    df_raw["first_obs"] = pd.to_datetime(df_raw["first_obs"])
    df_raw["first_post_disc"] = pd.to_datetime(df_raw["first_post_disc"])
    df_raw["project"] = (df_raw["station_code"].map(STATION_TO_PROJECT)
                         .fillna("Other Follow-up"))
    df_raw["days_from_disc"] = (
        (df_raw["first_obs"] - df_raw["disc_obstime"])
        .dt.total_seconds() / 86400
    )
    df_raw["post_disc_days"] = (
        (df_raw["first_post_disc"] - df_raw["disc_obstime"])
        .dt.total_seconds() / 86400