diff --git a/HighEnergyObsCoreExt.tex b/HighEnergyObsCoreExt.tex index 36acfda..de5c374 100644 --- a/HighEnergyObsCoreExt.tex +++ b/HighEnergyObsCoreExt.tex @@ -9,18 +9,18 @@ \ivoagroup{High Energy Interest Group} \author{ - I. Evans (SAO/CXC, \href{mailto:ievans@cfa.harvard.edu}{ievans@cfa.harvard.edu},\\ - M. Servillat (LUX - Observatoire de Paris, \href{mailto:mathieu.servillat@obspm.fr}{mathieu.servillat@obspm.fr},\\ - B. Khélifi (APC - Université de Paris Cité/CNRS, \href{mailto:khelifi@in2p3.fr}{khelifi@in2p3.fr}),\\ - J. Evans (SAO/CXC, \href{mailto:janet@cfa.harvard.edu}{janet@cfa.harvard.edu}),\\ - M. Louys (CDS and ICube - Université de Strasbourg, \href{mailto:mireille.louys@unistra.fr}{mireille.louys@unistra.fr}),\\ + I. Evans (CXC -- Smithsonian Astrophysical Observatory, \href{mailto:ievans@cfa.harvard.edu}{ievans@cfa.harvard.edu}),\\ + M. Servillat (LUX -- Observatoire de Paris, \href{mailto:mathieu.servillat@obspm.fr}{mathieu.servillat@obspm.fr}),\\ + B. Kh\'elifi (APC -- Universit\'e de Paris Cit\'e/CNRS, \href{mailto:khelifi@in2p3.fr}{khelifi@in2p3.fr}),\\ + J. Evans (CXC -- Smithsonian Astrophysical Observatory, \href{mailto:janet@cfa.harvard.edu}{janet@cfa.harvard.edu}),\\ + M. Louys (CDS and ICube -- Universit\'e de Strasbourg, \href{mailto:mireille.louys@unistra.fr}{mireille.louys@unistra.fr}),\\ M. Kettenis (Joint Institute for VLBI ERIC, \href{mailto:kettenis@jive.eu}{kettenis@jive.eu}),\\ F. Bonnarel (IVOA, \href{mailto:francois.bonnarel@gmail.com}{francois.bonnarel@gmail.com}),\\ - L. Michel (SSC-XMM/SVOM - Strasbourg Observatory, \href{mailto:laurent.michel@astro.unistra.fr}{laurent.michel@astro.unistra.fr}),\\ - C. Boisson (LUX - Observatoire de Paris, \href{mailto:catherine.boisson@obspm.fr}{catherine.boisson@obspm.fr}),\\ - M. Cresitello-Dittmar (SAO/CXC, \href{mailto:mdittmar@cfa.harvard.edu}{mdittmar@cfa.harvard.edu}),\\ - O. Ates (LUX - ObsParis, \href{mailto:onur.ates@obspm.fr}{onur.ates@obspm.fr}),\\ - K. Kosack (IRFU - CEA/Université Paris-Saclay, \href{mailto:karl.kosack@cea.fr}{karl.kosack@cea.fr}),\\ + L. Michel (SSC-XMM/SVOM -- Strasbourg Observatory,\\ \href{mailto:laurent.michel@astro.unistra.fr}{\qquad laurent.michel@astro.unistra.fr}),\\ + C. Boisson (LUX -- Observatoire de Paris, \href{mailto:catherine.boisson@obspm.fr}{catherine.boisson@obspm.fr}),\\ + M. Cresitello-Dittmar (CXC -- Smithsonian Astrophysical \mbox{Observatory,} \href{mailto:mdittmar@cfa.harvard.edu}{\qquad mdittmar@cfa.harvard.edu}),\\ + O. Ates (LUX -- ObsParis, \href{mailto:onur.ates@obspm.fr}{onur.ates@obspm.fr}),\\ + K. Kosack (IRFU -- CEA/Université Paris-Saclay, \href{mailto:karl.kosack@cea.fr}{karl.kosack@cea.fr}),\\ J. Schnabel (ECAP, FAU Erlangen-N{\"u}rnberg, \href{mailto:jutta.schnabel@fau.de}{jutta.schnabel@fau.de}),\\ S. Hallmann (ECAP, FAU Erlangen-N{\"u}rnberg, \href{mailto:steffen.hallmann@fau.de}{steffen.hallmann@fau.de}) } @@ -140,9 +140,9 @@ \section{High Energy Astrophysics Data} \gls{HEA} data include observations obtained using photon detectors covering X-ray (from $\sim$0.1 keV to $\sim$120 keV) through gamma-ray (from 120 keV up to $\gtrsim$ PeV) energies, as well as cosmic-ray and astrophysical neutrino ($\gtrsim$ GeV) detectors, or other messengers related to \gls{HEA} phenomena. The domain is now sufficiently mature to provide open data that are science-ready and work with open analysis tools ({\em e.g.\/}, CIAO \citep{2006SPIE.6270E..1VF} or Gammapy \citep{gammapy:2023}). The science output of the \gls{HEA} domain already includes advanced products such as images, cubes, spectra, and time series such as light curves and time-resolved spectra. Additional data products include fitted sky models with spatial, spectral, and/or temporal component(s), along with their confidence intervals or confidence limits, and covariance matrices. Finally, multiple \gls{HEA} instruments produce source catalogs and surveys covering up to the full the sky, which include maps of photon or particle flux, exposure, sensitivity, and aperture-photometry likelihood profiles. -Observations of the universe at the highest energies are based on techniques that are radically different compared to the UV through radio domains. \gls{HEA} observatories\footnote{For example, Chandra, XMM-Newton, Fermi, H.E.S.S., MAGIC, VERITAS, HAWC, LHAASO, IceCube, ANTARES, Auger and soon CTAO and KM3NeT, SWGO.} are generally designed to detect particles ({\em e.g.\/}, individual photons, cosmic-rays, or neutrinos) with the ability to estimate multiple observables for those particles. These detection techniques all rely on {\em event counting\/}\footnote{As opposed to signal integrating ({\em e.g.\/}, using a detector that accumulates the total photon signal during an exposure).}, where an event has some probability of being due to the interaction of a particle from an astrophysical source with the detectors, but also has some probability of being from instrumental or background effects. The data corresponding to an event are first an instrumental signal, which is then calibrated and processed to estimate physical quantities such as a time of arrival, point-of-origin on the sky, and an energy proxy associated with the event. Several other intermediate and qualifying characteristics may be associated with a detected event, depending on the detection technique. The ensemble of events detected over a given time interval and spatial field-of-view is referred to as an {\em event list\/}, which we designate an {\bf event-list} in this document. +Observations of the universe at the highest energies are based on techniques that are radically different compared to the UV through radio domains. \gls{HEA} observatories\footnote{For example, Chandra, XMM-Newton, Fermi, H.E.S.S., MAGIC, VERITAS, HAWC, LHAASO, IceCube, ANTARES, Auger, and soon CTAO, KM3NeT, and SWGO.} are generally designed to detect particles ({\em e.g.\/}, individual photons, cosmic-rays, or neutrinos) with the ability to estimate multiple observables for those particles. These detection techniques all rely on {\em event counting\/}\footnote{As opposed to signal integrating ({\em e.g.\/}, using a detector that accumulates the total photon signal during an exposure).}, where an event has some probability of being due to the interaction of a particle from an astrophysical source with the detectors, but also has some probability of being from instrumental or background effects. The data corresponding to an event are first an instrumental signal, which is then calibrated and processed to estimate physical quantities such as a time of arrival, point-of-origin on the sky, and an energy proxy associated with the event. Several other intermediate and qualifying characteristics may be associated with a detected event, depending on the detection technique. The ensemble of events detected over a given time interval and spatial field-of-view is referred to as an {\em event list\/}, which we designate an {\bf event-list} in this document. -Though {\bf event-list}s {\em may\/} include estimators for calibrated physical values, they typically still have to be corrected for the photometric, spectral, spatial, and/or temporal responses of the telescope and detector combination to yield scientifically interpretable information. The mappings between physical measurements of the source properties and the observables are called Instrument Response Functions (\glspl{IRF}\footnote{We try to avoid using the term \gls{IRF} in a normative sense since historical usage across the broad \gls{HEA} community (and from facility to facility) varies. In some cases, \gls{IRF} has been used to mean specifically the X-ray product of the \gls{ARF} and \gls{RMF}, whereas in other cases \gls{IRF} has been used more generally to mean any instrumental response function regardless of type.}). Some \glspl{IRF} are probabilistic in nature\footnote{For example, the energy matrix is a probability density function.}, and in addition may depend on the set of events selected for analysis by the end user. They are usually not invertible, so methods such as forward-folding fitting (using source models with any combination of spectral, spatial, temporal, and/or polarization components that are estimated) are needed to estimate physical properties, such as the true flux of particles from a source arriving at the instrument, given the measured observable quantities. The \glspl{IRF} generally evolve over time with the instrument and observation characteristics, and are usually defined for a specific time interval and may be decomposed into a standard set of independent components (see \S~3.1.5 of \citep{2024ivoa.note.heig}), such as the spatial point-spread function or the energy-migration matrix or different messenger particle types, where each component may be stored or computed separately. Since both \glspl{IRF} and {\bf event-list}s are required to analyze \gls{HEA} data, some \gls{IVOA} standards must be modified in order to expose both of them via the \gls{VO}. +Though {\bf event-list}s {\em may\/} include estimators for calibrated physical values, they typically still have to be corrected for the photometric, spectral, spatial, and/or temporal responses of the telescope and detector combination to yield scientifically interpretable information. The mappings between physical measurements of the source properties and the observables are called Instrument Response Functions (\glspl{IRF}\footnote{We try to avoid using the term \gls{IRF} in a normative sense since historical usage across the broad \gls{HEA} community (and from facility to facility) varies. In some cases, \gls{IRF} has been used to mean specifically the product of the \gls{ARF} and \gls{RMF}, whereas in other cases \gls{IRF} has been used more generally to mean any instrumental response function regardless of type.}). Some \glspl{IRF} are probabilistic in nature\footnote{For example, the energy matrix is a probability density function.}, and in addition may depend on the set of events selected for analysis by the end user. They are usually not invertible, so methods such as forward-folding fitting (using source models with any combination of spectral, spatial, temporal, and/or polarization components that are estimated) are needed to estimate physical properties, such as the true flux of particles from a source arriving at the instrument, given the measured observable quantities. The \glspl{IRF} generally evolve over time with the instrument and observation characteristics, and are usually defined for a specific time interval and may be decomposed into a standard set of independent components (see \S~3.1.5 of \citep{2024ivoa.note.heig}), such as the spatial point-spread function or the energy-migration matrix or different messenger particle types, where each component may be stored or computed separately. Since both \glspl{IRF} and {\bf event-list}s are required to analyze \gls{HEA} data, some \gls{IVOA} standards must be modified in order to expose both of them via the \gls{VO}. In the following, the current ObsCore standard will be discussed in \S~\ref{sec:obscore}, focusing on attributes that need to be modified. Then, we propose the creation of a \gls{HEA} extension of ObsCore in \S~\ref{sec:obscoreext}, as some attributes are very specific to our domain. In these two sections, the discussion focuses on the attribute definitions rather on the attribute values. In \S~\ref{sec:voc}, enhancement of vocabulary is proposed for some ObsCore attributes, DataLink semantics, UCDs, and MIME-types. @@ -231,7 +231,7 @@ \subsection{{\em s\_calib\_status}} Under the (reasonable) assumption that an end-user searching for {\bf event-bundle} datasets is typically querying based on the properties of the primary {\bf event-list}, we suggest that those values also be used for the {\bf event-bundle}. However, the data provider should ultimately decide which value best describes their {\bf event-bundle} dataset. -For dataset types that do not encode sky coordinates or observations without dedicated spacial axes like non-pointing observatories, we suggest setting this value to ``NULL''. +For dataset types that do not encode sky coordinates or observations without dedicated spatial axes like non-pointing observatories, we suggest setting this value to ``NULL''. \subsection{{\em t\_calib\_status}} @@ -253,7 +253,7 @@ \subsection{{\em o\_ucd}} For an {\bf event-list}, we can consider that all measures stored in column values are observables. This is {\em the\/} fundamental difference between \gls{HEA} {\bf event-list}s and typical pixelated datasets. The current ObsCore Recommendation suggests that {\em o\_ucd\/} be set to ``NULL'' for event lists. However this significantly hampers data discovery for \gls{HEA} datasets. Since the data content of {\bf event-list}s may vary significantly from facility to facility, meaningful discovery of \gls{HEA} datasets {\em requires\/} the user be able to query the UCDs of the set of observables included in an {\bf event-list}. -A natural way of doing this that is consistent with current usage would be to extend {\em o\_ucd\/} to allow specification of {\em multiple\/} observables for {\bf event-list}s (and {\bf event-bundle}s), for example, {\em o\_ucd\/} = {\em 'pos.eq;time;instr.event.pulse\-Height'\/}. +A natural way of doing this that is consistent with current usage would be to extend {\em o\_ucd\/} to allow specification of {\em multiple\/} observables for {\bf event-list}s (and {\bf event-bundle}s), for example, {\em o\_ucd\/} = {\em `pos.eq\#time\#instr.event.pulse\-Height'\/}. We propose using the {\em hash symbol\/} (`\#') to separate UCDs for the multiple observables to distinguish from the case where multiple UCD words separated by semicolons may be needed to define the UCD for a single observable. This follows a suggestion from the EPN-TAP Recommendation \citep{2022ivoa.spec.0822E} to use the hash symbol as a separator. Doing so can simplify ADQL queries since ADQL includes a {\tt ivo\_hashlist\_has} IVOA-standardized user defined function that can be used to validate if a particular UCD is included. One can also perform an ADQL query similar to ``o\_ucd LIKE `\%string\%'\null'' if all that is desired is to verify the presence of a specific UCD `string'. We note that extending {\em o\_ucd\/} to allow specification of multiple observables would require similar adjustments to the other observable axis attributes {\em o\_unit\/}, {\em o\_calib\_status\/}, and {\em o\_stat\_err\/}. @@ -290,7 +290,7 @@ \subsection{{\em t\_intervals}} \subsection{{\em energy\_min\/}/{\em energy\_max\/}} -The existing attributes {\em em\_min\/} and {\em em\_max\/} that define the coverage of the spectral axis (defined as wavelength expressed in units of m) are not user friendly for \gls{HEA} where datasets are generally selected according to an energy range ({\em i.e.\/}, inverse wavelength) in units of eV (or scaled units of eV, for example keV, MeV, GeV, TeV, PeV). Unlike the radio domain where $\lambda = c/\nu$, where $c$ is an almost universally remembered physical constant, the conversion $\lambda = hc/E$ is not simple for the user to express. As the spectral range covered by \gls{HEA} data is many decades larger than for other wavebands, the accurate numerical representations of typical \gls{HEA} spectral ranges as {\em em\_min\/}/{\em em\_max\/} requires quantities with many digits of precision and exponents ranging from $\sim\!10^{-5}$--$10^{-22}$, and are misleading when used for energy ranges of massive particles. Since specification of the spectral range is largely fundamental to data discovery in the \gls{HEA} regime, we propose to add attributes {\em energy\_min\/} and {\em energy\_max\/} that specify the minimum and maximum spectral range values in units of eV\null. Note that the sense of these attributes is {\em opposite\/} that of {\em em\_min\/} and {\em em\_max\/} because of the inverse wavelength relationship between energy and wavelength, so numerical comparisons must be transposed ({\em e.g.\/}, $E>E_{\rm thresh}$ becomes $\lambdaE_{\rm thresh}$ becomes $\lambda 100} @@ -289,14 +291,14 @@ \subsubsection{Use Case --- Get all the \glspl{IRF} for a given CTAO observation \subsubsection{Use Case --- Search for all ANTARES neutrino events for a given dataset in the direction of a point source} -{\em Using the ANTARES 2007-2017 point source data set, retrieve all events, background estimate, and detector acceptance to calculate the expected neutrino flux from a given point source, e.g. HESSJ0632+057, as in \textit{G. Illuminati for the ANTARES Collaboration, PoS(ICRC2019)920} and recalculate the significance of the neutrino flux.} +{\em Using the ANTARES 2007--2017 point source data set, retrieve all events, background estimate, and detector acceptance to calculate the expected neutrino flux from a given point source, e.g. HESSJ0632+057, as in \textit{G. Illuminati for the ANTARES Collaboration, PoS(ICRC2019)920} and recalculate the significance of the neutrino flux.} \medskip \noindent Find all datasets satisfying \begin{enumerate}[(i)] \item Position inside 5 degrees from (98.24, 5.81), \item dataproduct\_type = ``event-bundle'' or ``event-list'' or ``response-function'', - \item obs\_collection = ANTARES-2017-PS'', + \item obs\_collection = ``ANTARES-2017-PS''. \end{enumerate} \begin{verbatim} @@ -317,9 +319,9 @@ \subsubsection{Use Case --- Retrieve the instrument response functions for a com \begin{enumerate}[(i)] \item Position inside 5 degrees from (0.8, -45.19), \item dataproduct\_type = ``response-function'', - \item instrument\_name = ``KM3NeT-ARCA'' - \item t\_min/t\_max from 2027-2030 - \item event\_type = ``track`` + \item instrument\_name = ``KM3NeT-ARCA'', + \item t\_min/t\_max from 2027--2030, ({\em i.e.\/}, MJD 61406--62870), + \item event\_type = ``track''. \end{enumerate} \begin{verbatim} @@ -327,11 +329,11 @@ \subsubsection{Use Case --- Retrieve the instrument response functions for a com NATURAL JOIN ivoa.obscore_hea WHERE (CONTAINS(POINT(s_ra, s_dec), CIRCLE, 0.8, -45.19, 5.0) = 1) -AND (dataproduct_type IN ('aeff', 'edisp', 'psf')) +AND (dataproduct_type = 'response-function')) AND (instrument_name = '%ARCA%') -AND (tmin <= 2027) -AND (t_max >= 2032) -AND (event_type = ``track``) +AND (t_min >= 61406) +AND (t_max <= 62870) +AND (event_type = 'track') \end{verbatim} \subsubsection{Use Case --- Study the combined neutrino flux for the Galactic plane} @@ -343,7 +345,7 @@ \subsubsection{Use Case --- Study the combined neutrino flux for the Galactic pl \begin{enumerate}[(i)] \item messenger = ``neutrino'', \item dataproduct\_type = ``event-bundle'', - \item analysis\_mode = ``diffuse'' + \item analysis\_mode = ``diffuse''. \end{enumerate} % diffuse @@ -358,15 +360,15 @@ \subsubsection{Use Case --- Study the combined neutrino flux for the Galactic pl \subsubsection{Use Case --- Calculate the probability for a source class to be emitters of tau neutrinos} -{\em Using a catalog of potential sources, calculate the probability of measuring a $\nu_{\tau}$ neutrino flux from a stacking of all sources of that type with 10 years of data taking with widely spaced, i.e. high energy detectors like ARCA.} +{\em Using a catalog of potential sources, calculate the probability of measuring a $\nu_{\tau}$ neutrino flux from a stacking of all sources of that type with 10 years of data taking with widely spaced, high energy detectors like ARCA.} \medskip \noindent Find all neutrino datasets satisfying: \begin{enumerate}[(i)] \item dataproduct\_type = ``response-function'', - \item messenger contains ``pdgid16'' or ``pdgid18'', - \item obs\_mode = ``wide-array'' - \item analysis\_mode = ``pointsource'' + \item messenger contains ``pdgid-16'' or ``pdgid-18'', + \item obs\_mode = ``wide-array'', + \item analysis\_mode = ``pointsource''. \end{enumerate} \begin{verbatim} @@ -374,7 +376,7 @@ \subsubsection{Use Case --- Calculate the probability for a source class to be e NATURAL JOIN ivoa.obscore_hea WHERE (dataproduct_type = 'response-function') -AND (messenger = '%pdgid16%' OR messenger = '%pdgid18%') +AND (messenger = '%pdgid-16%' OR messenger = '%pdgid-18%') AND (obs_mode LIKE '%wide-array%') AND (analysis_mode LIKE '%pointsource%') \end{verbatim} @@ -424,8 +426,8 @@ \subsubsection{Use Case --- Search for flux maps for CTAO-North observations bet SELECT * FROM ivoa.obscore NATURAL JOIN ivoa.obscore_hea WHERE -(dataproduct_type = 'image') -AND (dataproduct_subtype = 'fluxmap') +((dataproduct_type = 'image') +OR (dataproduct_subtype = 'fluxmap')) AND (obs_collection = 'CTAO-DR1') AND (instrument_name LIKE 'CTAO-N') AND (CAST(obs_id AS INTEGER) > 4374) @@ -478,20 +480,26 @@ \subsubsection{Use Case --- Search for the CTAO flux light curves of PKS 2155-30 SELECT * FROM ivoa.obscore NATURAL JOIN ivoa.obscore_hea WHERE +(dataproduct_type = 'timeseries') +AND (dataproduct_subtype = 'flux') +AND (obs_collection = 'CTAO-DR1' +AND t_min >= 62502 +AND t_max <= 62866 +AND target_name = 'PKS 2155-304' \end{verbatim} -\subsubsection{Use Case --- Search for the \glspl{IRF} for a given direction and observation duration regardless of a neutrino event observation} - -{\em With the detector taking data, a given search might result in a non-observation of neutrinos although the detector could have observed events. The sensitivity of the detector towards a given neutrino flux is then calculated from IRFs to set an upper limit on a neutrino flux model.\/} - -\medskip -\noindent Find all datasets satisfying: -\begin{enumerate}[(i)] - \item dataproduct\_type = , -\end{enumerate} - -\begin{verbatim} -SELECT * FROM ivoa.obscore -NATURAL JOIN ivoa.obscore_hea -WHERE -\end{verbatim} +%\subsubsection{Use Case --- Search for the \glspl{IRF} for a given direction and observation duration regardless of a neutrino event observation} +% +%{\em With the detector taking data, a given search might result in a non-observation of neutrinos although the detector could have observed events. The sensitivity of the detector towards a given neutrino flux is then calculated from IRFs to set an upper limit on a neutrino flux model.\/} +% +%\medskip +%\noindent Find all datasets satisfying: +%\begin{enumerate}[(i)] +% \item dataproduct\_type = , +%\end{enumerate} +% +%\begin{verbatim} +%SELECT * FROM ivoa.obscore +%NATURAL JOIN ivoa.obscore_hea +%WHERE +%\end{verbatim}