diff --git a/HighEnergyObsCoreExt.tex b/HighEnergyObsCoreExt.tex index 9325ec2..36acfda 100644 --- a/HighEnergyObsCoreExt.tex +++ b/HighEnergyObsCoreExt.tex @@ -21,8 +21,8 @@ M. Cresitello-Dittmar (SAO/CXC, \href{mailto:mdittmar@cfa.harvard.edu}{mdittmar@cfa.harvard.edu}),\\ O. Ates (LUX - ObsParis, \href{mailto:onur.ates@obspm.fr}{onur.ates@obspm.fr}),\\ K. Kosack (IRFU - CEA/Université Paris-Saclay, \href{mailto:karl.kosack@cea.fr}{karl.kosack@cea.fr}),\\ - J. Schnabel (ECAP, \href{mailto:jutta.schnabel@fau.de}{jutta.schnabel@fau.de}),\\ - S. Hallmann (DESY Zeuthen, \href{mailto:steffen.hallmann@desy.de}{steffen.hallmann@desy.de}), + J. Schnabel (ECAP, FAU Erlangen-N{\"u}rnberg, \href{mailto:jutta.schnabel@fau.de}{jutta.schnabel@fau.de}),\\ + S. Hallmann (ECAP, FAU Erlangen-N{\"u}rnberg, \href{mailto:steffen.hallmann@fau.de}{steffen.hallmann@fau.de}) } \editor{Ian Evans, Mathieu Servillat, Bruno Kh\'elifi, Janet Evans} @@ -45,7 +45,6 @@ \usepackage[nopostdot,style=super,nonumberlist,toc]{glossaries} \usepackage{hyperref} - % mireille : in order to flag changes to fill \newcommand{\TODO}[1]{% \noindent% @@ -139,11 +138,11 @@ \section{Introduction} \section{High Energy Astrophysics Data} -\gls{HEA} data include observations obtained using photon detectors covering X-ray (from $\sim$0.1 keV to $\sim$120 keV) through gamma-ray (from 120 keV up to $\gtrsim$ PeV) energies, as well as cosmic-ray and astrophysical neutrino ($\gtrsim$ TeV) detectors, or other messengers related to \gls{HEA} phenomena. The domain is now sufficiently mature to provide open data that are science-ready and work with open analysis tools ({\em e.g.\/}, CIAO \citep{2006SPIE.6270E..1VF} or Gammapy \citep{gammapy:2023}). The science output of the \gls{HEA} domain already includes advanced products such as images, cubes, spectra, and time series such as light curves and time-resolved spectra. Additional data products include fitted sky models with spatial, spectral, and/or temporal component(s), along with their confidence intervals or confidence limits, and covariance matrices. Finally, multiple \gls{HEA} instruments produce source catalogs and surveys covering up to the full the sky, which include maps of photon or particle flux, exposure, sensitivity, and aperture-photometry likelihood profiles. +\gls{HEA} data include observations obtained using photon detectors covering X-ray (from $\sim$0.1 keV to $\sim$120 keV) through gamma-ray (from 120 keV up to $\gtrsim$ PeV) energies, as well as cosmic-ray and astrophysical neutrino ($\gtrsim$ GeV) detectors, or other messengers related to \gls{HEA} phenomena. The domain is now sufficiently mature to provide open data that are science-ready and work with open analysis tools ({\em e.g.\/}, CIAO \citep{2006SPIE.6270E..1VF} or Gammapy \citep{gammapy:2023}). The science output of the \gls{HEA} domain already includes advanced products such as images, cubes, spectra, and time series such as light curves and time-resolved spectra. Additional data products include fitted sky models with spatial, spectral, and/or temporal component(s), along with their confidence intervals or confidence limits, and covariance matrices. Finally, multiple \gls{HEA} instruments produce source catalogs and surveys covering up to the full the sky, which include maps of photon or particle flux, exposure, sensitivity, and aperture-photometry likelihood profiles. -Observations of the universe at the highest energies are based on techniques that are radically different compared to the UV through radio domains. \gls{HEA} observatories\footnote{For example, Chandra, XMM-Newton, Fermi, H.E.S.S., MAGIC, VERITAS, HAWC, LHAASO, IceCube, Auger and soon CTAO and KM3NeT, SWGO.} are generally designed to detect particles ({\em e.g.\/}, individual photons, cosmic-rays, or neutrinos) with the ability to estimate multiple observables for those particles. These detection techniques all rely on {\em event counting\/}\footnote{As opposed to signal integrating ({\em e.g.\/}, using a detector that accumulates the total photon signal during an exposure).}, where an event has some probability of being due to the interaction of a particle from an astrophysical source with the detectors, but also has some probability of being from instrumental or background effects. The data corresponding to an event are first an instrumental signal, which is then calibrated and processed to estimate physical quantities such as a time of arrival, point-of-origin on the sky, and an energy proxy associated with the event. Several other intermediate and qualifying characteristics may be associated with a detected event, depending on the detection technique. The ensemble of events detected over a given time interval and spatial field-of-view is referred to as an {\em event list\/}, which we designate an {\bf event-list} in this document. +Observations of the universe at the highest energies are based on techniques that are radically different compared to the UV through radio domains. \gls{HEA} observatories\footnote{For example, Chandra, XMM-Newton, Fermi, H.E.S.S., MAGIC, VERITAS, HAWC, LHAASO, IceCube, ANTARES, Auger and soon CTAO and KM3NeT, SWGO.} are generally designed to detect particles ({\em e.g.\/}, individual photons, cosmic-rays, or neutrinos) with the ability to estimate multiple observables for those particles. These detection techniques all rely on {\em event counting\/}\footnote{As opposed to signal integrating ({\em e.g.\/}, using a detector that accumulates the total photon signal during an exposure).}, where an event has some probability of being due to the interaction of a particle from an astrophysical source with the detectors, but also has some probability of being from instrumental or background effects. The data corresponding to an event are first an instrumental signal, which is then calibrated and processed to estimate physical quantities such as a time of arrival, point-of-origin on the sky, and an energy proxy associated with the event. Several other intermediate and qualifying characteristics may be associated with a detected event, depending on the detection technique. The ensemble of events detected over a given time interval and spatial field-of-view is referred to as an {\em event list\/}, which we designate an {\bf event-list} in this document. -Though {\bf event-list}s {\em may\/} include estimators for calibrated physical values, they typically still have to be corrected for the photometric, spectral, spatial, and/or temporal responses of the telescope and detector combination to yield scientifically interpretable information. The mappings between physical measurements of the source properties and the observables are called Instrument Response Functions (\glspl{IRF}\footnote{We try to avoid using the term \gls{IRF} in a normative sense since historical usage across the broad \gls{HEA} community (and from facility to facility) varies. In some cases, \gls{IRF} has been used to mean specifically the X-ray product of the \gls{ARF} and \gls{RMF}, whereas in other cases \gls{IRF} has been used more generally to mean any instrumental response function regardless of type.}). Some \glspl{IRF} are probabilistic in nature\footnote{For example, the energy matrix is a probability density function.}, and in addition may depend on the set of events selected for analysis by the end user. They are usually not invertible, so methods such as forward-folding fitting (using source models with any combination of spectral, spatial, temporal, and/or polarization components that are estimated) are needed to estimate physical properties, such as the true flux of particles from a source arriving at the instrument, given the measured observable quantities. The \glspl{IRF} generally evolve over time with the instrument and observation characteristics, and are usually defined for a specific time interval and may be decomposed into a standard set of independent components (see \S~3.1.5 of \citep{2024ivoa.note.heig}), such as the spatial point-spread function or the energy-migration matrix, where each component may be stored or computed separately. Since both \glspl{IRF} and {\bf event-list}s are required to analyze \gls{HEA} data, some \gls{IVOA} standards must be modified in order to expose both of them via the \gls{VO}. +Though {\bf event-list}s {\em may\/} include estimators for calibrated physical values, they typically still have to be corrected for the photometric, spectral, spatial, and/or temporal responses of the telescope and detector combination to yield scientifically interpretable information. The mappings between physical measurements of the source properties and the observables are called Instrument Response Functions (\glspl{IRF}\footnote{We try to avoid using the term \gls{IRF} in a normative sense since historical usage across the broad \gls{HEA} community (and from facility to facility) varies. In some cases, \gls{IRF} has been used to mean specifically the X-ray product of the \gls{ARF} and \gls{RMF}, whereas in other cases \gls{IRF} has been used more generally to mean any instrumental response function regardless of type.}). Some \glspl{IRF} are probabilistic in nature\footnote{For example, the energy matrix is a probability density function.}, and in addition may depend on the set of events selected for analysis by the end user. They are usually not invertible, so methods such as forward-folding fitting (using source models with any combination of spectral, spatial, temporal, and/or polarization components that are estimated) are needed to estimate physical properties, such as the true flux of particles from a source arriving at the instrument, given the measured observable quantities. The \glspl{IRF} generally evolve over time with the instrument and observation characteristics, and are usually defined for a specific time interval and may be decomposed into a standard set of independent components (see \S~3.1.5 of \citep{2024ivoa.note.heig}), such as the spatial point-spread function or the energy-migration matrix or different messenger particle types, where each component may be stored or computed separately. Since both \glspl{IRF} and {\bf event-list}s are required to analyze \gls{HEA} data, some \gls{IVOA} standards must be modified in order to expose both of them via the \gls{VO}. In the following, the current ObsCore standard will be discussed in \S~\ref{sec:obscore}, focusing on attributes that need to be modified. Then, we propose the creation of a \gls{HEA} extension of ObsCore in \S~\ref{sec:obscoreext}, as some attributes are very specific to our domain. In these two sections, the discussion focuses on the attribute definitions rather on the attribute values. In \S~\ref{sec:voc}, enhancement of vocabulary is proposed for some ObsCore attributes, DataLink semantics, UCDs, and MIME-types. @@ -224,7 +223,7 @@ \subsection{{\em s\_ra\/}/{\em s\_dec}} We propose that the attributes {\em s\_ra\/}/{\em s\_dec\/} be redefined to be the ICRS right ascension and ICRS declination of ``a reference position (typically the center)'' of an observation on the sky, rather than the ICRS right ascension and ICRS declination of ``the center'' of the observation. For some facilities, the center (RA, Dec) may have a specific meaning (such as the location of the optical axis of the telescope), which often is not useful for advanced data products that may be extracted from a cut-out from the progenitor observation. Some facilities also allow an instrument to be displaced from the center of the focal plane, which means that the definition of ``the center'' of an observation may be unclear (especially when not tracking at sidereal rate or for facilities for which the PSF varies strongly across the telescope field of view). Since these cases effectively displace the observation field-of-view, ObsCore attributes such as {\em s\_fov\/} that are implicitly referenced to ({\em s\_ra\/}, {\em s\_dec\/}) will continue to behave as expected using the revised definition. -For non-pointing instruments (which may include all-sky instruments such as KM3NeT or HAWC), these fields are poorly defined (as is the case, generally for observations that are drift scans). For the time duration of the observation, one can compute an effective center position of the exposure skymap and the maximum radius of the covered area ({\em i.e.\/}, for an all-sky instrument this would be $2\pi\,\rm Sr$ solid angle in Alt/Az, which can be converted into a rotated area in RA/Dec). However, the utility of such a characterization depends on both the duration of the observation and the use case. +For non-pointing instruments (which may include all-sky instruments such as KM3NeT or HAWC), these fields are poorly defined (as is the case, generally for observations that are drift scans). For the time duration of the observation, one can compute an effective center position of the exposure skymap and the maximum radius of the covered area ({\em i.e.\/}, for an all-sky instrument this would be $2\pi\,\rm Sr$ solid angle in Alt/Az, which can be converted into a rotated area in RA/Dec). However, the utility of such a characterization depends on both the duration of the observation and the use case, and can be assumed to describe the center of the exposure skymap for typical aggregated datasets with observation duration spanning weeks up to years. \subsection{{\em s\_calib\_status}} @@ -232,7 +231,7 @@ \subsection{{\em s\_calib\_status}} Under the (reasonable) assumption that an end-user searching for {\bf event-bundle} datasets is typically querying based on the properties of the primary {\bf event-list}, we suggest that those values also be used for the {\bf event-bundle}. However, the data provider should ultimately decide which value best describes their {\bf event-bundle} dataset. -For dataset types that do not encode sky coordinates, we suggest setting this value to ``NULL''. +For dataset types that do not encode sky coordinates or observations without dedicated spacial axes like non-pointing observatories, we suggest setting this value to ``NULL''. \subsection{{\em t\_calib\_status}} @@ -291,9 +290,9 @@ \subsection{{\em t\_intervals}} \subsection{{\em energy\_min\/}/{\em energy\_max\/}} -The existing attributes {\em em\_min\/} and {\em em\_max\/} that define the coverage of the spectral axis (defined as wavelength expressed in units of m) are not user friendly for \gls{HEA} where datasets are generally selected according to an energy range ({\em i.e.\/}, inverse wavelength) in units of eV (or scaled units of eV, for example keV, MeV, GeV, TeV, PeV). Unlike the radio domain where $\lambda = c/\nu$, where $c$ is an almost universally remembered physical constant, the conversion $\lambda = hc/E$ is not simple for the user to express. As the spectral range covered by \gls{HEA} data is many decades larger than for other wavebands, the accurate numerical representations of typical \gls{HEA} spectral ranges as {\em em\_min\/}/{\em em\_max\/} requires quantities with many digits of precision and exponents ranging from $\sim\!10^{-5}$--$10^{-22}$. Since specification of the spectral range is largely fundamental to data discovery in the \gls{HEA} regime, we propose to add attributes {\em energy\_min\/} and {\em energy\_max\/} that specify the minimum and maximum spectral range values in units of eV\null. Note that the sense of these attributes is {\em opposite\/} that of {\em em\_min\/} and {\em em\_max\/} because of the inverse wavelength relationship between energy and wavelength, so numerical comparisons must be transposed ({\em e.g.\/}, $E>E_{\rm thresh}$ becomes $\lambdaE_{\rm thresh}$ becomes $\lambda= 2032) +AND (event_type = ``track``) +\end{verbatim} + +\subsubsection{Use Case --- Study the combined neutrino flux for the Galactic plane} + +{\em Identify all neutrino data sets in the region of the Galactic plane to perform a combined neutrino flux study extending a study like the IceCube \textbf{Observation of high-energy neutrinos from the Galactic plane. DOI: 10.1126/science.adc9818}.}, using event lists and IRFs of datasets optimized for the analysis of diffuse neutrino emission. + +\medskip +\noindent Find all neutrino datasets satisfying: +\begin{enumerate}[(i)] + \item messenger = ``neutrino'', + \item dataproduct\_type = ``event-bundle'', + \item analysis\_mode = ``diffuse'' +\end{enumerate} +% diffuse + +\begin{verbatim} +SELECT * FROM ivoa.obscore +NATURAL JOIN ivoa.obscore_hea +WHERE +(dataproduct_type = 'event-bundle') +AND (messenger = '%neutrino%') +AND (analysis_mode = 'diffuse') +\end{verbatim} + +\subsubsection{Use Case --- Calculate the probability for a source class to be emitters of tau neutrinos} + +{\em Using a catalog of potential sources, calculate the probability of measuring a $\nu_{\tau}$ neutrino flux from a stacking of all sources of that type with 10 years of data taking with widely spaced, i.e. high energy detectors like ARCA.} + +\medskip +\noindent Find all neutrino datasets satisfying: +\begin{enumerate}[(i)] + \item dataproduct\_type = ``response-function'', + \item messenger contains ``pdgid16'' or ``pdgid18'', + \item obs\_mode = ``wide-array'' + \item analysis\_mode = ``pointsource'' +\end{enumerate} + +\begin{verbatim} +SELECT * FROM ivoa.obscore +NATURAL JOIN ivoa.obscore_hea +WHERE +(dataproduct_type = 'response-function') +AND (messenger = '%pdgid16%' OR messenger = '%pdgid18%') +AND (obs_mode LIKE '%wide-array%') +AND (analysis_mode LIKE '%pointsource%') +\end{verbatim} \subsection{Advanced Data Products} @@ -387,10 +478,20 @@ \subsubsection{Use Case --- Search for the CTAO flux light curves of PKS 2155-30 SELECT * FROM ivoa.obscore NATURAL JOIN ivoa.obscore_hea WHERE -(dataproduct_type = 'timeseries') -AND (dataproduct_subtype = 'flux') -AND (obs_collection = 'CTAO-DR1') -AND (target_name = 'PKS 2155-304') -AND (tmin >= 62502) -AND (tmax <= 62866) +\end{verbatim} + +\subsubsection{Use Case --- Search for the \glspl{IRF} for a given direction and observation duration regardless of a neutrino event observation} + +{\em With the detector taking data, a given search might result in a non-observation of neutrinos although the detector could have observed events. The sensitivity of the detector towards a given neutrino flux is then calculated from IRFs to set an upper limit on a neutrino flux model.\/} + +\medskip +\noindent Find all datasets satisfying: +\begin{enumerate}[(i)] + \item dataproduct\_type = , +\end{enumerate} + +\begin{verbatim} +SELECT * FROM ivoa.obscore +NATURAL JOIN ivoa.obscore_hea +WHERE \end{verbatim}