Real-Time-Disaster-Information-Aggregation-System./app.py at main · Harshad071/Real-Time-Disaster-Information-Aggregation-System. · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
from flask import Flask, jsonify, render_template, request, redirect, url_for, flash, session
import requests
import pymysql
from pymysql import Error
import json
from datetime import datetime, timedelta
import threading
import time
import re
from threading import Lock
import random

app = Flask(__name__)
app.secret_key = 'flipkart-secret-key-2024'

# Database connection function
def get_db_connection():
    try:
        conn = pymysql.connect(
            host="localhost",
            user="root",
            password="root",
            database="disasters_db"
        )
        return conn
    except Exception as e:
        print(f"Database connection error: {e}")
        return None


NEWS_API_KEY = "b352d15e97cb4328aacf55eb74c3dd3e"
WEATHER_API_KEY = "b020f18983b41a0d411e14d2c9850d68"

DB_CONFIG = {
    'host': 'localhost',
    'user': 'root',      # Replace with your MySQL username
    'password': 'root',      # Replace with your MySQL password
    'database': 'disasters_db'  # Database name
}

db_lock = Lock()

MAX_EVENTS = 50

MAX_NEWS_AGE_DAYS = 4

USE_MOCK_DATA = True

try:
    import spacy
    nlp = spacy.load("en_core_web_sm")
    USE_NER = True
    print("Using spaCy NER for location extraction")
except (ImportError, OSError):
    USE_NER = False
    print("spaCy not available, using regex for location extraction")
    print("To install spaCy: pip install spacy && python -m spacy download en_core_web_sm")

DISASTER_TYPES = {
    'earthquake': [
        'earthquake', 'seismic', 'tremor', 'quake', 'magnitude', 'richter',
        'tectonic', 'aftershock', 'epicenter', 'seismology', 'fault line',
        'ground shaking', 'seismic activity', 'geological', 'crustal movement'
    ],
    'flood': [
        'flood', 'flooding', 'deluge', 'inundation', 'overflow', 'waterlogged',
        'flash flood', 'river overflow', 'dam burst', 'levee breach', 'monsoon',
        'heavy rainfall', 'water surge', 'submersion', 'aquatic disaster'
    ],
    'hurricane': [
        'hurricane', 'cyclone', 'typhoon', 'storm', 'wind speed', 'landfall',
        'tropical storm', 'eye wall', 'storm surge', 'category', 'wind damage',
        'meteorological', 'atmospheric pressure', 'weather system', 'gale force'
    ],
    'wildfire': [
        'wildfire', 'forest fire', 'bushfire', 'blaze', 'burning', 'fire outbreak',
        'brush fire', 'grass fire', 'conflagration', 'fire spread', 'smoke',
        'evacuation fire', 'fire danger', 'combustion', 'fire suppression'
    ],
    'tornado': [
        'tornado', 'twister', 'funnel cloud', 'wind damage', 'vortex',
        'supercell', 'tornado alley', 'rotation', 'mesocyclone', 'downdraft',
        'debris field', 'tornado warning', 'severe weather', 'wind shear'
    ],
    'tsunami': [
        'tsunami', 'tidal wave', 'sea surge', 'ocean wave', 'seismic sea wave',
        'underwater earthquake', 'wave height', 'coastal flooding', 'marine disaster',
        'wave propagation', 'tsunami warning', 'wave impact', 'coastal evacuation'
    ],
    'volcanic': [
        'volcano', 'volcanic', 'eruption', 'lava', 'ash cloud', 'magma',
        'pyroclastic flow', 'volcanic ash', 'crater', 'molten rock', 'tephra',
        'volcanic activity', 'lava flow', 'volcanic explosion', 'geological hazard'
    ],
    'drought': [
        'drought', 'water shortage', 'dry spell', 'arid', 'water crisis',
        'precipitation deficit', 'water scarcity', 'agricultural drought',
        'meteorological drought', 'hydrological drought', 'desertification'
    ],
    'landslide': [
        'landslide', 'mudslide', 'rockslide', 'slope failure', 'debris flow',
        'mass wasting', 'soil erosion', 'geological hazard', 'slope instability',
        'rock fall', 'earth movement', 'terrain collapse', 'hillside collapse'
    ],
    'avalanche': [
        'avalanche', 'snow slide', 'snow avalanche', 'snow mass', 'alpine hazard',
        'snow pack', 'snow instability', 'mountain hazard', 'snow debris'
    ],
    'pandemic': [
        'pandemic', 'epidemic', 'outbreak', 'virus', 'disease spread', 'contagion',
        'infectious disease', 'public health emergency', 'viral outbreak',
        'health crisis', 'pathogen', 'transmission', 'quarantine', 'isolation'
    ],
    'terrorist': [
        'terrorist', 'bombing', 'attack', 'explosion', 'shooting', 'terrorism',
        'security threat', 'extremist', 'militant', 'insurgent', 'violence',
        'armed attack', 'security incident', 'threat assessment'
    ],
    'accident': [
        'accident', 'crash', 'collision', 'derailment', 'plane crash', 'incident',
        'transportation accident', 'industrial accident', 'vehicle accident',
        'train accident', 'maritime accident', 'aviation accident', 'mishap'
    ]
}

SEARCH_QUERIES = [
    # Earthquake queries
    'earthquake OR seismic activity OR tremor',
    'magnitude earthquake OR richter scale',
    'aftershock OR tectonic OR fault line',
    'ground shaking OR seismic event',
    'earthquake damage OR seismic disaster',

    # Flood queries
    'flood OR flooding OR flash flood',
    'river overflow OR dam burst OR levee breach',
    'heavy rainfall OR monsoon flooding',
    'water surge OR inundation OR deluge',
    'flood damage OR flood warning OR flood emergency',

    # Hurricane/Storm queries
    'hurricane OR tropical storm OR cyclone',
    'typhoon OR storm surge OR landfall',
    'category hurricane OR wind speed',
    'storm damage OR hurricane warning',
    'meteorological disaster OR severe weather',

    # Wildfire queries
    'wildfire OR forest fire OR bushfire',
    'fire outbreak OR blaze OR conflagration',
    'fire evacuation OR fire danger OR fire spread',
    'smoke OR fire suppression OR fire damage',
    'brush fire OR grass fire OR fire emergency',

    # Tornado queries
    'tornado OR twister OR funnel cloud',
    'tornado warning OR severe weather OR wind damage',
    'supercell OR mesocyclone OR tornado alley',
    'tornado outbreak OR tornado damage',
    'severe thunderstorm OR wind shear',

    # Tsunami queries
    'tsunami OR tidal wave OR seismic sea wave',
    'tsunami warning OR ocean wave OR sea surge',
    'coastal flooding OR marine disaster',
    'underwater earthquake OR wave impact',
    'tsunami evacuation OR wave height',

    # Volcanic queries
    'volcano OR volcanic eruption OR lava',
    'ash cloud OR pyroclastic flow OR magma',
    'volcanic ash OR volcanic activity',
    'lava flow OR volcanic explosion',
    'crater OR volcanic hazard OR tephra',

    # Other disaster queries
    'landslide OR mudslide OR rockslide',
    'drought OR water shortage OR water crisis',
    'avalanche OR snow slide OR alpine hazard',
    'pandemic OR epidemic OR disease outbreak',
    'terrorist attack OR bombing OR security threat',
    'accident OR crash OR collision OR derailment',
    'disaster OR emergency OR catastrophe OR crisis',
    'natural disaster OR man-made disaster',
    'emergency response OR disaster relief'
]

# Mock data for generating realistic disaster events
MOCK_DATA = {
    'earthquake': {
        'titles': [
            "Magnitude {magnitude} earthquake strikes {location}",
            "Strong earthquake hits {location}, buildings damaged",
            "Powerful {magnitude} quake shakes {location}",
            "{magnitude} earthquake reported near {location}",
            "Seismic activity: {magnitude} earthquake in {location}"
        ],
        'descriptions': [
            "A magnitude {magnitude} earthquake has struck {location}, causing {damage_level} damage to buildings and infrastructure. {casualty_info}",
            "Officials report a {magnitude} earthquake in {location}. {casualty_info} Emergency services are responding to the affected areas.",
            "A powerful earthquake measuring {magnitude} on the Richter scale has hit {location}. {damage_level} damage reported across the region. {casualty_info}",
            "Residents of {location} experienced strong shaking as a {magnitude} earthquake struck the area. {damage_level} damage has been reported. {casualty_info}",
            "Seismologists recorded a {magnitude} earthquake near {location}. {damage_level} damage to infrastructure reported. {casualty_info}"
        ],
        'magnitudes': ["4.5", "5.2", "5.8", "6.1", "6.7", "7.2", "7.9"],
        'additional_info': {
            'affected_people': lambda: str(random.randint(5, 10000)),
            'damage_estimate': lambda: f"${random.randint(1, 500)} million"
        }
    },
    'flood': {
        'titles': [
            "Severe flooding affects {location} after heavy rainfall",
            "Flash floods force evacuations in {location}",
            "{location} underwater as rivers overflow",
            "Monsoon floods devastate communities in {location}",
            "Flood emergency declared in {location}"
        ],
        'descriptions': [
            "Heavy rainfall has caused severe flooding in {location}, with water levels rising to {water_level} meters. {casualty_info} Thousands have been evacuated.",
            "Flash floods have swept through {location}, causing {damage_level} damage to homes and businesses. {casualty_info}",
            "Rivers have overflowed in {location}, flooding residential areas and farmland. {casualty_info} Emergency services are conducting rescue operations.",
            "Continuous rainfall has led to widespread flooding across {location}. {damage_level} damage reported to infrastructure. {casualty_info}",
            "Authorities in {location} have declared a flood emergency as water levels continue to rise. {casualty_info} Evacuation centers have been established."
        ],
        'water_levels': ["1.5", "2", "3", "4", "5"],
        'additional_info': {
            'affected_people': lambda: str(random.randint(100, 50000)),
            'damage_estimate': lambda: f"${random.randint(5, 800)} million"
        }
    },
    'hurricane': {
        'titles': [
            "Hurricane {name} makes landfall in {location}",
            "Category {category} hurricane {name} batters {location}",
            "{location} braces for Hurricane {name}",
            "Hurricane {name} causes widespread damage in {location}",
            "Tropical Storm {name} upgraded to hurricane as it approaches {location}"
        ],
        'descriptions': [
            "Hurricane {name}, a Category {category} storm with winds of {wind_speed} mph, has made landfall in {location}. {casualty_info}",
            "{location} is experiencing {damage_level} damage as Hurricane {name} moves through the region with sustained winds of {wind_speed} mph. {casualty_info}",
            "Authorities have ordered evacuations in {location} as Hurricane {name} approaches with wind speeds of {wind_speed} mph. {casualty_info}",
            "Hurricane {name} has caused widespread destruction in {location}. The Category {category} storm brought winds of {wind_speed} mph and heavy rainfall. {casualty_info}",
            "Residents of {location} are dealing with the aftermath of Hurricane {name}. The Category {category} storm caused {damage_level} damage across the region. {casualty_info}"
        ],
        'names': ["Maria", "Irma", "Harvey", "Dorian", "Florence", "Michael", "Laura", "Ida", "Katrina", "Sandy"],
        'categories': ["1", "2", "3", "4", "5"],
        'wind_speeds': ["75", "90", "110", "130", "155", "175"],
        'additional_info': {
            'affected_people': lambda: str(random.randint(1000, 100000)),
            'damage_estimate': lambda: f"${random.randint(100, 5000)} million",
            'wind_speed': lambda: f"{random.randint(75, 175)} mph"
        }
    },
    'wildfire': {
        'titles': [
            "Massive wildfire spreads across {location}",
            "Forest fire forces evacuations in {location}",
            "Wildfire destroys homes in {location}",
            "Firefighters battle blaze in {location}",
            "Uncontained wildfire threatens communities in {location}"
        ],
        'descriptions': [
            "A fast-moving wildfire has spread across {location}, burning {acres} acres of land. {casualty_info} Evacuation orders have been issued for nearby communities.",
            "Firefighters are battling a large forest fire in {location} that has already consumed {acres} acres. {casualty_info}",
            "A wildfire in {location} has destroyed numerous homes and forced thousands to evacuate. The fire has burned {acres} acres and is {containment}% contained. {casualty_info}",
            "High winds are fueling a wildfire in {location} that has grown to {acres} acres. {casualty_info} Fire crews are working to establish containment lines.",
            "Authorities have declared a state of emergency in {location} as a wildfire continues to spread. The fire has burned {acres} acres and is {containment}% contained. {casualty_info}"
        ],
        'acres': ["500", "1,200", "5,000", "10,000", "25,000", "50,000", "100,000"],
        'containment': ["0", "5", "10", "25", "40", "60"],
        'additional_info': {
            'affected_people': lambda: str(random.randint(100, 10000)),
            'damage_estimate': lambda: f"${random.randint(10, 1000)} million"
        }
    },
    'tornado': {
        'titles': [
            "Tornado tears through {location}",
            "Deadly tornado strikes {location}",
            "{location} devastated by powerful tornado",
            "Multiple tornadoes reported in {location}",
            "Tornado outbreak causes destruction in {location}"
        ],
        'descriptions': [
            "A {ef_rating} tornado has torn through {location}, causing {damage_level} damage to homes and businesses. {casualty_info}",
            "A powerful tornado struck {location} yesterday, leaving a path of destruction {path_width} miles wide. {casualty_info}",
            "Residents of {location} are assessing the damage after a {ef_rating} tornado hit the area. {casualty_info}",
            "Multiple tornadoes were reported in {location}, with one confirmed {ef_rating} tornado causing significant damage. {casualty_info}",
            "A tornado outbreak has caused widespread destruction across {location}. The strongest was rated {ef_rating}. {casualty_info}"
        ],
        'ef_ratings': ["EF-1", "EF-2", "EF-3", "EF-4", "EF-5"],
        'path_widths': ["0.5", "0.75", "1", "1.5", "2"],
        'additional_info': {
            'affected_people': lambda: str(random.randint(10, 5000)),
            'damage_estimate': lambda: f"${random.randint(1, 200)} million"
        }
    },
    'tsunami': {
        'titles': [
            "Tsunami hits coastal areas of {location}",
            "Underwater earthquake triggers tsunami in {location}",
            "Tsunami warning issued for {location} after strong earthquake",
            "Coastal communities in {location} devastated by tsunami",
            "{location} on alert as tsunami approaches"
        ],
        'descriptions': [
            "A tsunami with waves up to {wave_height} meters high has struck coastal areas of {location}. {casualty_info}",
            "An underwater earthquake has triggered a tsunami that hit {location} with waves reaching {wave_height} meters. {casualty_info}",
            "Authorities have issued a tsunami warning for {location} after a strong offshore earthquake. Waves of up to {wave_height} meters are expected. {casualty_info}",
            "Coastal communities in {location} have been devastated by a tsunami with waves as high as {wave_height} meters. {casualty_info}",
            "A tsunami has caused {damage_level} damage along the coast of {location}. Wave heights of {wave_height} meters were recorded. {casualty_info}"
        ],
        'wave_heights': ["2", "3", "5", "8", "10", "15"],
        'additional_info': {
            'affected_people': lambda: str(random.randint(100, 50000)),
            'damage_estimate': lambda: f"${random.randint(50, 2000)} million"
        }
    },
    'volcanic': {
        'titles': [
            "Volcano erupts in {location}",
            "{location} volcano shows increased activity",
            "Volcanic eruption forces evacuations in {location}",
            "Ash cloud from {location} volcano disrupts air travel",
            "Scientists monitor volcanic activity in {location}"
        ],
        'descriptions': [
            "The volcano in {location} has erupted, sending ash {ash_height} kilometers into the atmosphere. {casualty_info} Nearby communities have been evacuated.",
            "Increased volcanic activity has been reported at the {location} volcano. Authorities have raised the alert level to {alert_level}. {casualty_info}",
            "A volcanic eruption in {location} has forced thousands to evacuate. The volcano is spewing ash {ash_height} kilometers high. {casualty_info}",
            "An ash cloud from the {location} volcano has disrupted air travel in the region. The eruption sent ash {ash_height} kilometers into the sky. {casualty_info}",
            "Scientists are closely monitoring the {location} volcano after it showed signs of increased activity. The alert level has been raised to {alert_level}. {casualty_info}"
        ],
        'ash_heights': ["2", "5", "8", "10", "15", "20"],
        'alert_levels': ["Yellow", "Orange", "Red"],
        'additional_info': {
            'affected_people': lambda: str(random.randint(100, 20000)),
            'damage_estimate': lambda: f"${random.randint(5, 500)} million"
        }
    },
    'drought': {
        'titles': [
            "Severe drought affects {location}",
            "Water shortage crisis worsens in {location}",
            "{location} declares emergency amid worst drought in decades",
            "Drought conditions intensify across {location}",
            "Agricultural sector in {location} devastated by prolonged drought"
        ],
        'descriptions': [
            "{location} is experiencing its worst drought in {years} years, with water reservoirs at {percentage}% capacity. {casualty_info}",
            "Authorities in {location} have implemented water restrictions as the drought enters its {months} month. {casualty_info}",
            "The agricultural sector in {location} has been severely affected by the ongoing drought, with crop losses estimated at {percentage}%. {casualty_info}",
            "Water shortages in {location} have reached critical levels as the drought continues. Reservoirs are at {percentage}% of normal levels. {casualty_info}",
            "Officials in {location} have declared a state of emergency due to the severe drought conditions that have persisted for {months} months. {casualty_info}"
        ],
        'years': ["10", "20", "30", "50", "100"],
        'months': ["3", "6", "9", "12", "18", "24"],
        'percentages': ["10", "20", "30", "40", "50"],
        'additional_info': {
            'affected_people': lambda: str(random.randint(1000, 1000000)),
            'damage_estimate': lambda: f"${random.randint(10, 5000)} million"
        }
    },
    'landslide': {
        'titles': [
            "Landslide buries homes in {location}",
            "Heavy rains trigger deadly landslide in {location}",
            "Massive landslide blocks highway in {location}",
            "Mudslide destroys village in {location}",
            "Landslide risk high in {location} after continuous rainfall"
        ],
        'descriptions': [
            "A landslide has buried several homes in {location} following days of heavy rainfall. {casualty_info}",
            "Heavy rains have triggered a deadly landslide in {location}, causing {damage_level} damage to infrastructure. {casualty_info}",
            "A massive landslide has blocked a major highway in {location}, stranding hundreds of travelers. {casualty_info}",
            "A village in {location} has been partially destroyed by a mudslide that occurred after {rainfall} mm of rain fell in 24 hours. {casualty_info}",
            "Authorities in {location} have warned of high landslide risk after {rainfall} mm of continuous rainfall in the region. {casualty_info}"
        ],
        'rainfalls': ["100", "150", "200", "250", "300", "400"],
        'additional_info': {
            'affected_people': lambda: str(random.randint(10, 1000)),
            'damage_estimate': lambda: f"${random.randint(1, 100)} million"
        }
    },
    'avalanche': {
        'titles': [
            "Avalanche strikes ski resort in {location}",
            "Massive snow avalanche reported in {location}",
            "Skiers caught in avalanche in {location}",
            "Avalanche warning issued for {location} mountains",
            "Search and rescue operations underway after {location} avalanche"
        ],
        'descriptions': [
            "An avalanche has struck a ski resort in {location}, burying several people under {snow_depth} meters of snow. {casualty_info}",
            "A massive snow avalanche has been reported in the mountains of {location}. Search and rescue teams have been deployed. {casualty_info}",
            "Several skiers were caught in an avalanche in {location}. The avalanche was triggered after heavy snowfall of {snow_depth} meters. {casualty_info}",
            "Authorities have issued an avalanche warning for the mountains in {location} following recent heavy snowfall. {casualty_info}",
            "Search and rescue operations are underway after an avalanche in {location} buried a group of mountaineers under {snow_depth} meters of snow. {casualty_info}"
        ],
        'snow_depths': ["1.5", "2", "3", "4", "5"],
        'additional_info': {
            'affected_people': lambda: str(random.randint(2, 50)),
            'damage_estimate': lambda: f"${random.randint(1, 20)} million"
        }
    },
    'pandemic': {
        'titles': [
            "New outbreak of {disease} reported in {location}",
            "{location} implements measures to contain {disease} spread",
            "Health emergency declared in {location} amid {disease} outbreak",
            "{disease} cases surge in {location}",
            "{location} begins vaccination campaign against {disease}"
        ],
        'descriptions': [
            "Health officials in {location} have reported a new outbreak of {disease}, with {cases} cases confirmed so far. {casualty_info}",
            "Authorities in {location} have implemented strict measures to contain the spread of {disease} after {cases} new cases were reported. {casualty_info}",
            "A health emergency has been declared in {location} as {disease} cases reach {cases}. {casualty_info}",
            "{location} is experiencing a surge in {disease} cases, with {cases} new infections reported in the past 24 hours. {casualty_info}",
            "A vaccination campaign against {disease} has begun in {location} after the number of cases reached {cases}. {casualty_info}"
        ],
        'diseases': ["COVID-19", "Influenza", "Dengue fever", "Ebola", "Zika virus", "Cholera", "Measles"],
        'cases': ["50", "100", "500", "1,000", "5,000", "10,000", "50,000"],
        'additional_info': {
            'affected_people': lambda: str(random.randint(50, 1000000)),
            'damage_estimate': lambda: f"${random.randint(1, 10000)} million"
        }
    },
    'terrorist': {
        'titles': [
            "Explosion reported in {location}",
            "Security incident under investigation in {location}",
            "Authorities respond to incident in {location}",
            "Emergency services deployed to {location} following incident",
            "Security alert in {location} after incident"
        ],
        'descriptions': [
            "An explosion has been reported in {location}. Emergency services are at the scene. {casualty_info}",
            "Security forces are investigating an incident in {location}. The area has been cordoned off. {casualty_info}",
            "Authorities have responded to a security incident in {location}. {casualty_info} The public is advised to avoid the area.",
            "Emergency services have been deployed to {location} following a security incident. {casualty_info}",
            "A security alert has been issued in {location} after an incident. {casualty_info} Investigations are ongoing."
        ],
        'additional_info': {
            'affected_people': lambda: str(random.randint(1, 500)),
            'damage_estimate': lambda: f"${random.randint(1, 100)} million"
        }
    },
    'accident': {
        'titles': [
            "Major traffic accident on highway near {location}",
            "Train derailment reported in {location}",
            "Plane crash near {location} airport",
            "Industrial accident at factory in {location}",
            "Multi-vehicle collision causes chaos on {location} roads"
        ],
        'descriptions': [
            "A major traffic accident involving {vehicles} vehicles has occurred on the highway near {location}. {casualty_info}",
            "A train has derailed in {location}, causing significant disruption to rail services. {casualty_info}",
            "A plane carrying {passengers} passengers has crashed near {location} airport. Emergency services are at the scene. {casualty_info}",
            "An industrial accident has occurred at a factory in {location}. {casualty_info} Authorities are investigating the cause.",
            "A multi-vehicle collision has caused chaos on roads in {location}. {casualty_info} Traffic diversions are in place."
        ],
        'vehicles': ["2", "3", "4", "5", "multiple"],
        'passengers': ["10", "50", "100", "150", "200"],
        'additional_info': {
            'affected_people': lambda: str(random.randint(1, 200)),
            'damage_estimate': lambda: f"${random.randint(1, 50)} million"
        }
    }
}

# Location data for mock generation
LOCATIONS = [
    # North America
    "Los Angeles, California", "New York City, New York", "Miami, Florida", "Chicago, Illinois",
    "Houston, Texas", "Toronto, Canada", "Mexico City, Mexico", "Vancouver, Canada",
    "Seattle, Washington", "Denver, Colorado", "Phoenix, Arizona", "Atlanta, Georgia",

    # South America
    "Rio de Janeiro, Brazil", "Buenos Aires, Argentina", "Lima, Peru", "Santiago, Chile",
    "Bogotá, Colombia", "Caracas, Venezuela", "Quito, Ecuador", "La Paz, Bolivia",

    # Europe
    "London, United Kingdom", "Paris, France", "Berlin, Germany", "Rome, Italy",
    "Madrid, Spain", "Amsterdam, Netherlands", "Brussels, Belgium", "Vienna, Austria",
    "Athens, Greece", "Stockholm, Sweden", "Oslo, Norway", "Helsinki, Finland",
    "Warsaw, Poland", "Prague, Czech Republic", "Budapest, Hungary", "Istanbul, Turkey",

    # Asia
    "Tokyo, Japan", "Beijing, China", "Shanghai, China", "Seoul, South Korea",
    "Mumbai, India", "Delhi, India", "Bangkok, Thailand", "Manila, Philippines",
    "Jakarta, Indonesia", "Kuala Lumpur, Malaysia", "Singapore", "Hanoi, Vietnam",
    "Dhaka, Bangladesh", "Karachi, Pakistan", "Dubai, UAE", "Riyadh, Saudi Arabia",

    # Africa
    "Cairo, Egypt", "Lagos, Nigeria", "Nairobi, Kenya", "Cape Town, South Africa",
    "Johannesburg, South Africa", "Casablanca, Morocco", "Tunis, Tunisia", "Algiers, Algeria",
    "Addis Ababa, Ethiopia", "Dar es Salaam, Tanzania", "Accra, Ghana", "Dakar, Senegal",

    # Oceania
    "Sydney, Australia", "Melbourne, Australia", "Brisbane, Australia", "Perth, Australia",
    "Auckland, New Zealand", "Wellington, New Zealand", "Port Moresby, Papua New Guinea",
    "Suva, Fiji", "Honolulu, Hawaii"
]

NEWS_SOURCES = [
    "Associated Press", "Reuters", "BBC News", "CNN", "Al Jazeera",
    "The Guardian", "The New York Times", "Washington Post", "Fox News",
    "CNBC", "Bloomberg", "AFP", "NPR", "CBS News", "NBC News", "ABC News"
]

def execute_db_query(query, params=None, fetch=False):
    """Execute database query with proper locking"""
    with db_lock:
        try:
            conn = pymysql.connect(**DB_CONFIG)
            cursor = conn.cursor()

            if params:
                cursor.execute(query, params)
            else:
                cursor.execute(query)

            if fetch:
                if fetch == 'all':
                    result = cursor.fetchall()
                else:
                    result = cursor.fetchone()
            else:
                result = True

            conn.commit()
            cursor.close()
            conn.close()
            return result
        except Error as e:
            print(f"Database error: {e}")
            return None

def get_event_count():
    """Get current number of events in database"""
    result = execute_db_query("SELECT COUNT(*) FROM disasters", fetch='one')
    return result[0] if result else 0

def cleanup_old_events():
    """Remove oldest events if we exceed MAX_EVENTS"""
    current_count = get_event_count()
    if current_count >= MAX_EVENTS:
        # Delete oldest events to make room for new ones
        events_to_delete = current_count - MAX_EVENTS + 5  # Delete 5 extra to avoid frequent cleanup
        execute_db_query("""
            DELETE FROM disasters
            WHERE id IN (
                SELECT id FROM (
                    SELECT id FROM disasters
                    ORDER BY timestamp ASC
                    LIMIT %s
                ) as temp
            )
        """, (events_to_delete,))
        print(f"Cleaned up {events_to_delete} old events. Current count: {get_event_count()}")

def cleanup_old_news():
    """Remove news articles older than MAX_NEWS_AGE_DAYS"""
    cutoff_date = (datetime.now() - timedelta(days=MAX_NEWS_AGE_DAYS)).isoformat()

    # Delete events older than cutoff date
    execute_db_query("""
        DELETE FROM disasters
        WHERE timestamp < %s
    """, (cutoff_date,))

    print(f"Removed news articles older than {MAX_NEWS_AGE_DAYS} days")

# Initialize database
def init_db():
    with db_lock:
        try:
            # First, try to connect to the database
            conn =pymysql.connect(
                host=DB_CONFIG['host'],
                user=DB_CONFIG['user'],
                password=DB_CONFIG['password']
            )
            cursor = conn.cursor()

            # Create database if it doesn't exist
            cursor.execute(f"CREATE DATABASE IF NOT EXISTS {DB_CONFIG['database']}")
            cursor.execute(f"USE {DB_CONFIG['database']}")

            # Check if table exists
            cursor.execute("SHOW TABLES LIKE 'disasters'")
            if not cursor.fetchone():
                # Create new table with all columns
                cursor.execute('''CREATE TABLE disasters
                             (id INT AUTO_INCREMENT PRIMARY KEY,
                              title TEXT,
                              description TEXT,
                              location TEXT,
                              country TEXT,
                              severity TEXT,
                              disaster_type TEXT,
                              source TEXT,
                              timestamp TEXT,
                              url TEXT,
                              affected_people TEXT,
                              damage_estimate TEXT,
                              additional_info TEXT)''')
                print("Created disasters table")
            else:
                # Table exists, check for missing columns
                # MySQL doesn't support easy column existence check like SQLite
                # We'll use a different approach by getting column info
                cursor.execute("SHOW COLUMNS FROM disasters")
                columns = [column[0] for column in cursor.fetchall()]

                # Check and add missing columns
                required_columns = ['country', 'disaster_type', 'url', 'affected_people',
                                   'damage_estimate', 'additional_info']

                for col in required_columns:
                    if col not in columns:
                        cursor.execute(f"ALTER TABLE disasters ADD COLUMN {col} TEXT")
                        print(f"Added missing column: {col}")

            conn.commit()
            cursor.close()
            conn.close()
            print("Database initialized successfully")

        except Error as e:
            print(f"Error initializing database: {e}")
            raise

# Generate mock disaster data with recent dates only
def generate_mock_disaster():
    """Generate a realistic mock disaster event within the last MAX_NEWS_AGE_DAYS days"""
    # Select random disaster type
    disaster_type = random.choice(list(MOCK_DATA.keys()))
    disaster_data = MOCK_DATA[disaster_type]

    # Select random location
    location = random.choice(LOCATIONS)
    country = location.split(", ")[-1] if ", " in location else location

    # Generate severity
    severity_options = ["Low", "Medium", "High"]
    severity_weights = [0.5, 0.3, 0.2]  # 50% Low, 30% Medium, 20% High
    severity = random.choices(severity_options, weights=severity_weights)[0]

    # Generate damage level based on severity
    damage_levels = {
        "Low": ["minor", "limited", "minimal", "slight"],
        "Medium": ["moderate", "significant", "considerable", "substantial"],
        "High": ["severe", "extensive", "major", "catastrophic", "devastating"]
    }
    damage_level = random.choice(damage_levels[severity])

    # Generate casualty info based on severity
    casualty_info = ""
    if severity == "Low":
        if random.random() < 0.7:  # 70% chance of no casualties for low severity
            casualty_info = "No casualties have been reported."
        else:
            casualty_info = f"{random.randint(1, 5)} minor injuries have been reported."
    elif severity == "Medium":
        if random.random() < 0.5:  # 50% chance of injuries only
            casualty_info = f"{random.randint(5, 50)} people have been injured."
        else:
            casualty_info = f"{random.randint(1, 10)} people have died and {random.randint(10, 100)} have been injured."
    else:  # High severity
        casualty_info = f"{random.randint(10, 1000)} people have died and {random.randint(50, 5000)} have been injured."

    # Generate title and description with placeholders replaced
    title_template = random.choice(disaster_data['titles'])
    description_template = random.choice(disaster_data['descriptions'])

    # Replace placeholders in templates
    replacements = {
        '{location}': location,
        '{damage_level}': damage_level,
        '{casualty_info}': casualty_info
    }

    # Add disaster-specific replacements
    if disaster_type == 'earthquake':
        replacements['{magnitude}'] = random.choice(disaster_data['magnitudes'])
    elif disaster_type == 'flood':
        replacements['{water_level}'] = random.choice(disaster_data['water_levels'])
    elif disaster_type == 'hurricane':
        replacements['{name}'] = random.choice(disaster_data['names'])
        replacements['{category}'] = random.choice(disaster_data['categories'])
        replacements['{wind_speed}'] = random.choice(disaster_data['wind_speeds'])
    elif disaster_type == 'wildfire':
        replacements['{acres}'] = random.choice(disaster_data['acres'])
        replacements['{containment}'] = random.choice(disaster_data['containment'])
    elif disaster_type == 'tornado':
        replacements['{ef_rating}'] = random.choice(disaster_data['ef_ratings'])
        replacements['{path_width}'] = random.choice(disaster_data['path_widths'])
    elif disaster_type == 'tsunami':
        replacements['{wave_height}'] = random.choice(disaster_data['wave_heights'])
    elif disaster_type == 'volcanic':
        replacements['{ash_height}'] = random.choice(disaster_data['ash_heights'])
        replacements['{alert_level}'] = random.choice(disaster_data['alert_levels'])
    elif disaster_type == 'drought':
        replacements['{years}'] = random.choice(disaster_data['years'])
        replacements['{months}'] = random.choice(disaster_data['months'])
        replacements['{percentage}'] = random.choice(disaster_data['percentages'])
    elif disaster_type == 'landslide':
        replacements['{rainfall}'] = random.choice(disaster_data['rainfalls'])
    elif disaster_type == 'avalanche':
        replacements['{snow_depth}'] = random.choice(disaster_data['snow_depths'])
    elif disaster_type == 'pandemic':
        replacements['{disease}'] = random.choice(disaster_data['diseases'])
        replacements['{cases}'] = random.choice(disaster_data['cases'])
    elif disaster_type == 'accident':
        replacements['{vehicles}'] = random.choice(disaster_data['vehicles'])
        replacements['{passengers}'] = random.choice(disaster_data['passengers'])

    # Replace all placeholders
    for placeholder, value in replacements.items():
        title_template = title_template.replace(placeholder, value)
        description_template = description_template.replace(placeholder, value)

    title = title_template
    description = description_template

    # Generate additional info
    additional_info = {}
    if 'additional_info' in disaster_data:
        for key, func in disaster_data['additional_info'].items():
            additional_info[key] = func()

    # Generate random timestamp within the last MAX_NEWS_AGE_DAYS days
    days_ago = random.randint(0, MAX_NEWS_AGE_DAYS - 1)  # Only within MAX_NEWS_AGE_DAYS
    hours_ago = random.randint(0, 23)
    minutes_ago = random.randint(0, 59)
    timestamp = (datetime.now() - timedelta(days=days_ago, hours=hours_ago, minutes=minutes_ago)).isoformat()

    # Generate random source
    source = random.choice(NEWS_SOURCES)

    return {
        'title': title,
        'description': description,
        'location': location,
        'country': country,
        'severity': severity,
        'disaster_type': disaster_type.title(),
        'source': source,
        'timestamp': timestamp,
        'url': f"https://example.com/news/{int(time.time())}-{disaster_type.replace(' ', '-')}",
        'affected_people': additional_info.get('affected_people', ''),
        'damage_estimate': additional_info.get('damage_estimate', ''),
        'additional_info': json.dumps(additional_info)
    }

# Generate and add mock disasters to database
def add_mock_disasters(count=10):
    """Generate and add mock disasters to the database"""
    # Check if we've reached the limit
    current_count = get_event_count()
    if current_count >= MAX_EVENTS:
        print(f"Event limit reached ({current_count}/{MAX_EVENTS}). Not adding more mock data.")
        return 0

    # Calculate how many we can add
    count = min(count, MAX_EVENTS - current_count)

    added = 0
    for _ in range(count):
        mock_disaster = generate_mock_disaster()

        # Insert into database - Note the %s placeholders for MySQL
        success = execute_db_query('''INSERT INTO disasters
                    (title, description, location, country, severity, disaster_type,
                     source, timestamp, url, affected_people, damage_estimate, additional_info)
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)''',
                 (mock_disaster['title'], mock_disaster['description'],
                  mock_disaster['location'], mock_disaster['country'],
                  mock_disaster['severity'], mock_disaster['disaster_type'],
                  mock_disaster['source'], mock_disaster['timestamp'],
                  mock_disaster['url'], mock_disaster['affected_people'],
                  mock_disaster['damage_estimate'], mock_disaster['additional_info']))

        if success is not None:
            added += 1

    print(f"Added {added} mock disasters to the database. Total: {get_event_count()}/{MAX_EVENTS}")
    return added

# Enhanced location extraction using NER
def extract_location_ner(text):
    """Extract locations using spaCy Named Entity Recognition"""
    try:
        doc = nlp(text)
        locations = []
        countries = []

        for ent in doc.ents:
            if ent.label_ in ["GPE", "LOC"]:  # Geopolitical entities and locations
                location_text = ent.text.strip()
                if len(location_text) > 2 and location_text not in locations:
                    # Check if it's likely a country
                    if ent.label_ == "GPE" and any(country.lower() in location_text.lower()
                                                 for country in get_country_list()):
                        countries.append(location_text)
                    else:
                        locations.append(location_text)

        # Combine locations and return
        all_locations = locations + countries
        return ', '.join(all_locations[:3]) if all_locations else 'Unknown'
    except Exception as e:
        print(f"NER extraction error: {e}")
        return extract_location_regex(text)

# Fallback regex-based location extraction
def extract_location_regex(text):
    """Fallback location extraction using regex patterns"""
    location_patterns = [
        r'\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*),\s*([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\b',
        r'\bin\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\b',
        r'\bat\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\b',
        r'\bnear\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\b'
    ]

    locations = []
    for pattern in location_patterns:
        matches = re.findall(pattern, text)
        for match in matches:
            if isinstance(match, tuple):
                locations.extend([m for m in match if len(m) > 2])
            else:
                if len(match) > 2:
                    locations.append(match)

    non_locations = {'The', 'This', 'That', 'With', 'From', 'After', 'Before', 'During', 'Since'}
    locations = [loc for loc in locations if loc not in non_locations]

    return ', '.join(list(set(locations))[:3]) if locations else 'Unknown'

# Main location extraction function
def extract_location(text):
    """Extract location using NER if available, otherwise use regex"""
    if USE_NER:
        return extract_location_ner(text)
    else:
        return extract_location_regex(text)

# Enhanced country list
def get_country_list():
    return [
        'United States', 'USA', 'America', 'China', 'India', 'Japan', 'Germany', 'France',
        'United Kingdom', 'UK', 'Brazil', 'Russia', 'Canada', 'Australia', 'Mexico',
        'Italy', 'Spain', 'Turkey', 'Indonesia', 'Philippines', 'Pakistan', 'Bangladesh',
        'Nigeria', 'Egypt', 'Iran', 'Thailand', 'South Korea', 'Vietnam', 'Argentina',
        'Poland', 'Ukraine', 'Malaysia', 'Nepal', 'Afghanistan', 'Iraq', 'Syria',
        'South Africa', 'Kenya', 'Ethiopia', 'Morocco', 'Algeria', 'Sudan', 'Ghana',
        'Tanzania', 'Uganda', 'Mozambique', 'Madagascar', 'Cameroon', 'Angola', 'Chile',
        'Peru', 'Colombia', 'Venezuela', 'Ecuador', 'Bolivia', 'Paraguay', 'Uruguay',
        'Norway', 'Sweden', 'Finland', 'Denmark', 'Netherlands', 'Belgium', 'Switzerland',
        'Austria', 'Portugal', 'Greece', 'Czech Republic', 'Hungary', 'Romania', 'Bulgaria'
    ]

# Extract country from location using NER
def extract_country(text):
    if USE_NER:
        try:
            doc = nlp(text)
            countries = get_country_list()

            # First try to find countries in NER entities
            for ent in doc.ents:
                if ent.label_ == "GPE":
                    for country in countries:
                        if country.lower() in ent.text.lower():
                            return country

            # Fallback to text search
            text_upper = text.upper()
            for country in countries:
                if country.upper() in text_upper:
                    return country
        except Exception as e:
            print(f"Country extraction error: {e}")

    # Fallback method
    text_upper = text.upper()
    for country in get_country_list():
        if country.upper() in text_upper:
            return country
    return 'Unknown'

# Enhanced disaster type classification
def classify_disaster_type(text):
    text_lower = text.lower()

    # Score each disaster type based on keyword matches
    scores = {}
    for disaster_type, keywords in DISASTER_TYPES.items():
        score = 0
        for keyword in keywords:
            if keyword in text_lower:
                # Give higher weight to exact matches and longer keywords
                weight = len(keyword.split()) * 2 if len(keyword.split()) > 1 else 1
                score += weight
        scores[disaster_type] = score

    # Return the disaster type with the highest score
    if scores and max(scores.values()) > 0:
        best_match = max(scores, key=scores.get)
        return best_match.title()

    return 'Other'

# Enhanced additional information extraction
def extract_additional_info(text):
    info = {}

    # Enhanced people patterns
    people_patterns = [
        r'(\d+(?:,\d+)*)\s+(?:people|persons|individuals|victims|casualties)',
        r'(\d+(?:,\d+)*)\s+(?:killed|dead|deaths|fatalities)',
        r'(\d+(?:,\d+)*)\s+(?:injured|wounded|hurt)',
        r'(\d+(?:,\d+)*)\s+(?:missing|displaced|evacuated|affected)',
        r'(?:death toll|fatalities|casualties).*?(\d+(?:,\d+)*)',
        r'(\d+(?:,\d+)*)\s+(?:confirmed dead|reported dead)'
    ]

    for pattern in people_patterns:
        matches = re.findall(pattern, text.lower())
        if matches:
            info['affected_people'] = matches[0]
            break

    # Enhanced damage patterns
    damage_patterns = [
        r'\$(\d+(?:,\d+)*(?:\.\d+)?)\s*(?:million|billion|thousand|M|B|K)?',
        r'(\d+(?:,\d+)*(?:\.\d+)?)\s*(?:million|billion|thousand)\s*(?:dollars|USD|\$)',
        r'(?:damage|loss|cost).*?\$(\d+(?:,\d+)*(?:\.\d+)?)',
        r'(?:economic loss|financial impact).*?(\d+(?:,\d+)*)'
    ]

    for pattern in damage_patterns:
        matches = re.findall(pattern, text)
        if matches:
            info['damage_estimate'] = f"${matches[0]}"
            break

    # Extract magnitude for earthquakes
    magnitude_pattern = r'magnitude\s*(\d+(?:\.\d+)?)'
    magnitude_matches = re.findall(magnitude_pattern, text.lower())
    if magnitude_matches:
        info['magnitude'] = magnitude_matches[0]

    # Extract wind speed for hurricanes
    wind_pattern = r'wind.*?(\d+)\s*(?:mph|km/h|knots)'
    wind_matches = re.findall(wind_pattern, text.lower())
    if wind_matches:
        info['wind_speed'] = f"{wind_matches[0]} mph"

    return info

# Enhanced severity classification
def classify_severity(text, disaster_type):
    text = text.lower()

    # High severity keywords
    high_keywords = [
        'major', 'severe', 'catastrophic', 'emergency', 'devastating', 'massive',
        'deadly', 'fatal', 'critical', 'extreme', 'unprecedented', 'worst',
        'magnitude 7', 'magnitude 8', 'magnitude 9', 'category 4', 'category 5',
        'state of emergency', 'disaster declaration', 'evacuation order',
        'hundreds killed', 'thousands affected', 'widespread destruction'
    ]

    # Medium severity keywords
    medium_keywords = [
        'moderate', 'warning', 'alert', 'significant', 'notable', 'considerable',
        'magnitude 5', 'magnitude 6', 'category 2', 'category 3',
        'dozens injured', 'property damage', 'local emergency'
    ]

    # Calculate severity score
    high_score = sum(2 for word in high_keywords if word in text)
    medium_score = sum(1 for word in medium_keywords if word in text)

    if high_score >= 2 or any(word in text for word in ['catastrophic', 'devastating', 'emergency']):
        return 'High'
    elif high_score >= 1 or medium_score >= 2:
        return 'Medium'
    else:
        return 'Low'

# Enhanced data collection with event limit and mock data fallback
def collect_news_data():
    # Check if we've reached the limit