Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,6 @@ jobs:
env:
FLOWS_CONFIG: ${{ secrets.FLOWS_CONFIG }}
FLOWS_API_TOKEN: ${{ secrets.FLOWS_API_TOKEN }}
CASSJOBS_WSID: ${{ secrets.CASSJOBS_WSID }}
CASSJOBS_PASSWORD: ${{ secrets.CASSJOBS_PASSWORD }}
run: |
python -m pip install --upgrade pip wheel
pip install -r requirements.txt
Expand Down
11 changes: 0 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,17 +80,6 @@ Text coming soon...
# pipeline = False
token = None

# casjobs:
# wsid and password required for run_catalogs.py,
# user registration at
# https://galex.stsci.edu/casjobs/CreateAccount.aspx
# wsid can be found at
# https://galex.stsci.edu/casjobs/changedetails.aspx
# after login
[casjobs]
# wsid = $CASJOBS_WSID/None
# password = $CASJOBS_PASSWORD/None

# database:
# username and password required for run_catalogs.py,
# the user is a registered user in the flows database
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
master-v1.1.1
master-v1.2.0
274 changes: 73 additions & 201 deletions flows/catalogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@
from bottleneck import anynan
from tendrils.utils import load_config, query_ztf_id

import mastcasjobs

from .aadc_db import AADC_DB

logger = logging.getLogger(__name__)
Expand All @@ -50,203 +48,6 @@ def floatval(value):
def intval(value):
return None if value == '' else int(value)


# --------------------------------------------------------------------------------------------------
def casjobs_configured(config: configparser.ConfigParser) -> bool:
"""
Check if casjobs credentials are available.
"""
wsid = config.get('casjobs', 'wsid', fallback=os.environ.get("CASJOBS_WSID", None))
passwd = config.get('casjobs', 'password', fallback=os.environ.get("CASJOBS_PASSWORD", None))
if wsid is None or passwd is None:
raise CasjobsError("CasJobs WSID and PASSWORD not in config.ini / 'CASJOBS_WSID' and 'CASJOBS_PASSWORD' not defined as environment variables")
return True

# --------------------------------------------------------------------------------------------------
def query_casjobs_refcat2(coo_centre, radius=24 * u.arcmin):
"""
Uses the CasJobs program to do a cone-search around the position.

Will first attempt to do single large cone-search, and if that
fails because of CasJobs memory limits, will sub-divide the cone
into smaller queries.

Parameters:
coo_centre (:class:`astropy.coordinates.SkyCoord`): Coordinates of centre of search cone.
radius (Angle, optional): Search radius. Default is 24 arcmin.

Returns:
list: List of dicts with the REFCAT2 information.

.. codeauthor:: Rasmus Handberg <rasmush@phys.au.dk>
"""
if isinstance(radius, (float, int)):
radius *= u.deg

try:
results = _query_casjobs_refcat2(coo_centre, radius=radius)
except CasjobsMemoryError:
logger.debug("CasJobs failed with memory error. Trying to use smaller radii.")
results = _query_casjobs_refcat2_divide_and_conquer(coo_centre, radius=radius)

# Remove duplicate entries:
results = unique(results, keys='starid')

# Trim away anything outside radius:
ra = [res['ra'] for res in results]
decl = [res['decl'] for res in results]
coords = SkyCoord(ra=ra, dec=decl, unit='deg', frame='icrs')
sep = coords.separation(coo_centre)
mask = sep <= radius

logger.debug("Found %d unique results", np.count_nonzero(mask))
return results[mask]


# --------------------------------------------------------------------------------------------------
def _query_casjobs_refcat2_divide_and_conquer(coo_centre, radius):

# Just put in a stop criterion to avoid infinite recursion:
if radius < 0.04 * u.deg:
raise Exception("Too many subdivides")

# Search central cone:
try:
results = _query_casjobs_refcat2(coo_centre, radius=0.5 * radius)
except CasjobsMemoryError:
logger.debug("CasJobs failed with memory error. Trying to use smaller radii.")
results = _query_casjobs_refcat2_divide_and_conquer(coo_centre, radius=0.5 * radius)

# Search six cones around central cone:
for n in range(6):
# FIXME: The 0.8 here is kind of a guess. There should be an analytic solution
new = SkyCoord(ra=coo_centre.ra.deg + 0.8 * Angle(radius).deg * np.cos(n * 60 * np.pi / 180),
dec=coo_centre.dec.deg + 0.8 * Angle(radius).deg * np.sin(n * 60 * np.pi / 180), unit='deg',
frame='icrs')

try:
results += _query_casjobs_refcat2(new, radius=0.5 * radius)
except CasjobsMemoryError:
logger.debug("CasJobs failed with memory error. Trying to use smaller radii.")
results += _query_casjobs_refcat2_divide_and_conquer(new, radius=0.5 * radius)

return results


# --------------------------------------------------------------------------------------------------
def _query_casjobs_refcat2(coo_centre, radius=24 * u.arcmin):
"""
Uses the CasJobs program to do a cone-search around the position.

Parameters:
coo_centre (:class:`astropy.coordinates.SkyCoord`): Coordinates of centre of search cone.
radius (Angle, optional): Search radius. Default is 24 arcmin.

Returns:
list: List of dicts with the REFCAT2 information.

.. codeauthor:: Rasmus Handberg <rasmush@phys.au.dk>
"""

if isinstance(radius, (float, int)):
radius *= u.deg

# prepare refcat2 query to find all objects near position
casjobs_context = "HLSP_ATLAS_REFCAT2"
casjobs_personal_table = "flows"
sql = ("""SELECT r.* INTO {table:s} FROM fGetNearbyObjEq({ra:f}, {dec:f}, {radius:f}) AS n
INNER JOIN {context:s}.refcat2 AS r ON n.objid=r.objid ORDER BY n.distance;"""
.format(table=casjobs_personal_table, context=casjobs_context,
ra=coo_centre.ra.deg, dec=coo_centre.dec.deg, radius=Angle(radius).deg))

config = load_config()
casjobs_configured(config)
jobs = mastcasjobs.MastCasJobs(userid=config.get('casjobs', 'wsid'),
password=config.get('casjobs', 'password'),
context=casjobs_context)

# limited storage space at remote casjobs, drop table, if it exists, before making a new one
for table in jobs.list_tables():
if table == casjobs_personal_table:
jobs.drop_table(casjobs_personal_table) # more graceful than drop_table_if_exists()

# submit job, wait for it to finish and retrieve the results
job_id = jobs.submit(sql, context=casjobs_context, task_name=casjobs_personal_table)
logger.debug("Submitted query '%s' to MAST CasJobs context '%s' with task name '%s'",
sql, casjobs_context, casjobs_personal_table)
code, status = jobs.monitor(job_id, timeout=5)
logger.debug("MAST CasJobs query exited with status %d: %s", code, status)
results = jobs.get_table(casjobs_personal_table)

# Contents of HLSP_ATLAS_REFCAT2.refcat2 data table - i.e. the columns of 'results' just
# above; from https://galex.stsci.edu/casjobs/MyDB.aspx -> HLSP_ATLAS_REFCAT2.refcat2
# -----------------------------------------------------------------------------
# Name Unit Data Type Size Default Value Description
# objid dimensionless bigint 8 Unique object identifier.
# RA degrees float 8 RA from Gaia DR2, J2000, epoch 2015.5
# Dec degrees float 8 Dec from Gaia DR2, J2000, epoch 2015.5
# plx mas real 4 Parallax from Gaia DR2
# dplx mas real 4 Parallax uncertainty
# pmra mas/yr real 4 Proper motion in RA from Gaia DR2
# dpmra mas/yr real 4 Proper motion uncertainty in RA
# pmdec mas/yr real 4 Proper motion in Dec from Gaia DR2
# dpmdec mas/yr real 4 Proper motion uncertainty in Dec
# Gaia mag real 4 Gaia DR2 G magnitude
# dGaia mag real 4 Gaia DR2 G magnitude uncertainty
# BP mag real 4 Gaia G_BP magnitude
# dBP mag real 4 Gaia G_BP magnitude uncertainty
# RP mag real 4 Gaia G_RP magnitude
# dRP mag real 4 Gaia G_RP magnitude uncertainty
# Teff [K] int 4 Gaia stellar effective temperature
# AGaia mag real 4 Gaia estimate of G-band extinction for this star
# dupvar dimensionless int 4 Gaia flags coded as CONSTANT (0), VARIABLE (1), or NOT_AVAILABLE (2) + 4*DUPLICATE
# Ag mag real 4 SFD estimate of total column g-band extinction
# rp1 arc seconds real 4 Radius where cumulative G flux exceeds 0.1x this star
# r1 arc seconds real 4 Radius where cumulative G flux exceeds 1x this star
# r10 arc seconds real 4 Radius where cumulative G flux exceeds 10x this star
# g mag real 4 Pan-STARRS g_P1 magnitude
# dg mag real 4 Pan-STARRS g_P1 magnitude uncertainty
# gchi dimensionless real 4 chi^2/DOF for contributors to g
# gcontrib dimensionless int 4 Bitmap of contributing catalogs to g
# r mag real 4 Pan-STARRS r_P1 magnitude
# dr mag real 4 Pan-STARRS r_P1 magnitude uncertainty
# rchi dimensionless real 4 chi^2/DOF for contributors to r
# rcontrib dimensionless int 4 Bitmap of contributing catalogs to r
# i mag real 4 Pan-STARRS i_P1 magnitude
# di mag real 4 Pan-STARRS i_P1 magnitude uncertainty
# ichi dimensionless real 4 chi^2/DOF for contributors to i
# icontrib dimensionless int 4 Bitmap of contributing catalogs to i
# z mag real 4 Pan-STARRS z_P1 magnitude
# dz mag real 4 Pan-STARRS z_P1 magnitude uncertainty
# zchi dimensionless real 4 chi^2/DOF for contributors to z
# zcontrib dimensionless int 4 Bitmap of contributing catalogs to z
# nstat dimensionless int 4 Count of griz deweighted outliers
# J mag real 4 2MASS J magnitude
# dJ mag real 4 2MASS J magnitude uncertainty
# H mag real 4 2MASS H magnitude
# dH mag real 4 2MASS H magnitude uncertainty
# K mag real 4 2MASS K magnitude
# dK mag real 4 2MASS K magnitude uncertainty

# remove unused columns from results
results.remove_columns(['plx', 'dplx', 'dpmra', 'dpmdec', 'dGaia', 'dBP', 'dRP', 'Teff', 'AGaia', 'Ag',
'rp1', 'r1', 'r10', 'dg', 'gchi', 'gcontrib', 'dr', 'rchi', 'rcontrib',
'di', 'ichi', 'icontrib', 'dz', 'zchi', 'zcontrib', 'nstat', 'dJ', 'dH', 'dK'])

# rename columns to match older java-based implementation which had different naming conventions
# TODO: refactor dependent functions to expect the default namings and get rid of these renamings
# to start: comment out the renamings below and see where the flows pipeline breaks, then fix it
results = Table(results,
names=('starid', 'ra', 'decl', 'pm_ra', 'pm_dec', 'gaia_mag', 'gaia_bp_mag', 'gaia_rp_mag',
'gaia_variability', 'g_mag', 'r_mag', 'i_mag', 'z_mag', 'J_mag', 'H_mag', 'K_mag'))

if not results:
raise CasjobsError("Could not find anything on CasJobs")

logger.debug("Found %d results", len(results))
return results


# --------------------------------------------------------------------------------------------------
def query_apass(coo_centre, radius=24 * u.arcmin):
"""
Expand Down Expand Up @@ -356,7 +157,6 @@ def query_simbad(coo_centre, radius=24 * u.arcmin):

s = Simbad()
s.ROW_LIMIT = 0
s.remove_votable_fields('coordinates')
s.add_votable_fields('ra(d;A;ICRS;J2000)', 'dec(d;D;ICRS;2000)', 'pmra', 'pmdec')
s.add_votable_fields('otype')
s.add_votable_fields('flux(B)', 'flux(V)', 'flux(R)', 'flux(I)', 'flux(J)', 'flux(H)', 'flux(K)')
Expand Down Expand Up @@ -456,6 +256,78 @@ def query_skymapper(coo_centre, radius=24 * u.arcmin):


# --------------------------------------------------------------------------------------------------

def query_local_refcat2(coo_centre, radius=24 * u.arcmin):
"""
Uses refcat.c from https://archive.stsci.edu/hlsp/atlas-refcat2 to do a cone-search around the position.
NOTE: In going from mast casjobs to local refcat.c, we have lost the unique object identifier, 'objid'
----> See https://archive.stsci.edu/hlsps/atlas-refcat2/hlsp_atlas-refcat2_atlas_ccd_all_multi_v1_readme.txt
We have to invent our own objids.
They get assigned incrementally from the value "300,000,000,000,000,000" onwards, which is comfortably
above any objids in our database on 2025-01-25.
TODO: Something less hacky! Assign every entry in the local catalog a unique objid somehow...

Parameters:
coo_centre (:class:`astropy.coordinates.SkyCoord`): Coordinates of centre of search cone.
radius (Angle, optional): Search radius. Default is 24 arcmin.

Returns:
list: List of dicts with the REFCAT2 information.

.. codeauthor:: Erik Jensen
"""

current_max_objid = 300_000_000_000_000_000
with AADC_DB() as db:
db.cursor.execute("SELECT MAX(starid) FROM flows.refcat2;")
for row in db.cursor.fetchall():
if row[0] > current_max_objid:
current_max_objid = row[0]

refcat_output = subprocess.run(["refcat",
str(coo_centre.ra.deg), str(coo_centre.dec.deg),
"-rad", str(Angle(radius).deg),
"-var", "ra,dec,pmra,pmdec,gaia,bp,rp,dupvar,g,r,i,z,j,h,k",
"-nohdr"],
encoding="utf-8", capture_output=True, check=True)

# TODO: feel free to make this less awful!
OBJID = RA = DEC = PMRA = PMDEC = GAIA = BP = RP = DUPVAR = G = R = I = Z = J = H = K = np.array([])
for line in refcat_output.stdout.splitlines():
OBJID = np.append(OBJID, current_max_objid)
current_max_objid += 1

ra, dec, pmra, pmdec, gaia, bp, rp, dupvar, g, r, i, z, j, h, k = line.split()

RA = np.append(RA, ra)
DEC = np.append(DEC, dec)
PMRA = np.append(PMRA, pmra)
PMDEC = np.append(PMDEC, pmdec)
GAIA = np.append(GAIA, gaia)
BP = np.append(BP, bp)
RP = np.append(RP, rp)
DUPVAR = np.append(DUPVAR, dupvar)
G = np.append(G, g)
R = np.append(R, r)
I = np.append(I, i)
Z = np.append(Z, z)
J = np.append(J, j)
H = np.append(H, h)
K = np.append(K, k)

# rename columns to match older java-based implementation which had different naming conventions
# TODO: refactor dependent functions to expect the default namings and get rid of these renamings
# to start: comment out the renamings below and see where the flows pipeline breaks, then fix it
results = Table({'starid': OBJID, 'ra': RA, 'decl': DEC, 'pm_ra': PMRA, 'pm_dec': PMDEC, 'gaia_mag': GAIA,
'gaia_bp_mag': BP, 'gaia_rp_mag': RP, 'gaia_variability': DUPVAR, 'g_mag': G, 'r_mag': R, 'i_mag': I,
'z_mag': Z, 'J_mag': J, 'H_mag': H, 'K_mag': K})

if not results:
raise CasjobsError("Something went wrong in local refcat.c binary")

logger.debug("Found %d results", len(results))
return results

def query_all(coo_centre, radius=24 * u.arcmin, dist_cutoff=2 * u.arcsec):
"""
Query all catalogs (REFCAT2, APASS, SDSS and SkyMapper) and return merged catalog.
Expand All @@ -479,7 +351,7 @@ def query_all(coo_centre, radius=24 * u.arcmin, dist_cutoff=2 * u.arcsec):
"""

# Query the REFCAT2 catalog using CasJobs around the target position:
results = query_casjobs_refcat2(coo_centre, radius=radius)
results = query_local_refcat2(coo_centre, radius=radius)
AT_results = Table(results)
refcat = SkyCoord(ra=AT_results['ra'], dec=AT_results['decl'], unit=u.deg, frame='icrs')

Expand Down
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ pytz >= 2024.2
sep-pjw >= 1.3.5
astroalign >= 2.5.1
networkx >= 3.3
astroquery >= 0.4.7
astroquery == 0.4.7 # TODO version >= 0.4.9 brings changes which messes with catalogs.py:query_simbad() - any expert want to have a crack at this?
tendrils >= 0.3.2
mastcasjobs >= 0.0.6
healpy >= 1.17.3
21 changes: 0 additions & 21 deletions requirements.txt.erik

This file was deleted.

Loading
Loading