Dynamic-Spectra/DownloadCallistoData at main · anshusolar/Dynamic-Spectra · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# download callsito data

import numpy as np
import matplotlib.pyplot as plt
from astropy.io import fits
from scipy.ndimage import gaussian_filter
from skimage.measure import label, regionprops
import matplotlib.patches as patches
from datetime import datetime, timedelta
import matplotlib.dates as mdates
from matplotlib.ticker import LogLocator, ScalarFormatter, NullFormatter
import requests
from bs4 import BeautifulSoup
import os
import re
import time
from urllib.parse import urljoin

# Function to download Udaipur FITS files for a given date
def download_udaipur_files(date_str, download_dir="udaipur_data", max_retries=3):
    """
    Download all INDIA-UDAIPUR FITS files for a given date from e-CALLISTO archive.

    Parameters:
    date_str (str): YYYYMMDD (e.g., '20250131')
    download_dir (str): Directory to save downloaded files
    max_retries (int): Maximum number of download retries

    Returns:
    list: List of downloaded file paths
    """
    # Construct the e-CALLISTO archive URL
    year, month, day = date_str[:4], date_str[4:6], date_str[6:8]
    base_url = f"https://soleil.i4ds.ch/solarradio/data/2002-20yy_Callisto/{year}/{month}/{day}/"

    # Create download directory if it doesn't exist
    os.makedirs(download_dir, exist_ok=True)

    # Scrape the directory listing with retries
    for attempt in range(max_retries):
        try:
            print(f"Attempt {attempt + 1} to access {base_url}")
            response = requests.get(base_url, timeout=30)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            break
        except (requests.RequestException, requests.Timeout) as e:
            print(f"Error accessing {base_url}: {e}")
            if attempt == max_retries - 1:
                print("Max retries reached. Could not access directory listing.")
                return []
            time.sleep(5)  # Wait before retrying

    # Find all links to Udaipur FITS files
    file_pattern = re.compile(rf"INDIA-UDAIPUR_{date_str}_\d{{6}}_0[23]\.fit\.gz")
    file_links = []

    for link in soup.find_all('a', href=True):
        href = link['href']
        if file_pattern.match(href):
            file_links.append(href)

    if not file_links:
        print(f"No matching files found for date {date_str}")
        return []

    # Download files with retries
    downloaded_files = []
    for file_link in file_links:
        file_url = urljoin(base_url, file_link)
        filename = os.path.join(download_dir, os.path.basename(file_link))

        # Skip if file already exists
        if os.path.exists(filename):
            print(f"File {filename} already exists. Skipping download.")
            downloaded_files.append(filename)
            continue

        for attempt in range(max_retries):
            try:
                print(f"Downloading {file_url}... (Attempt {attempt + 1})")
                response = requests.get(file_url, timeout=60)
                response.raise_for_status()

                # Save file
                with open(filename, 'wb') as f:
                    f.write(response.content)

                downloaded_files.append(filename)
                print(f"Successfully saved {filename}")
                break
            except (requests.RequestException, requests.Timeout) as e:
                print(f"Error downloading {file_url}: {e}")
                if attempt == max_retries - 1:
                    print(f"Failed to download {file_url} after {max_retries} attempts")
                time.sleep(5)  # Wait before retrying

    return downloaded_files

# Modified main processing function with better date handling
def process_day(date_str=None, download_dir=None):
    print("Data credit: University of Applied Sciences and Arts Northwestern Switzerland (FHNW), Institute for Data Science.")
    print("Contact: Christian Monstein, Istituto Ricerche Solari Locarno (IRSOL), monstein@irsol.ch")

    # Default to today's date if not provided
    if date_str is None:
        today = datetime.now().strftime("%Y%m%d")
        date_str = input(f"Enter date in YYYYMMDD format (default: {today}): ") or today

    # Validate date format
    try:
        datetime.strptime(date_str, "%Y%m%d")
    except ValueError:
        print("Invalid date format. Please use YYYYMMDD.")
        return

    # Set download directory
    if download_dir is None:
        download_dir = f"udaipur_{date_str}"

    # Download files
    print(f"\nDownloading files for {date_str}...")
    downloaded_files = download_udaipur_files(date_str, download_dir)

    if not downloaded_files:
        print("No files downloaded. Exiting.")
        return

    # [Rest of your process_day function remains the same]

# Run the processing
if __name__ == "__main__":
    process_day()