From 08571f4b15408771e8fd98b3109eac618fc510a3 Mon Sep 17 00:00:00 2001 From: loumir Date: Mon, 1 Jul 2024 19:24:47 +0200 Subject: [PATCH] updates use case appendix --- Makefile | 4 +- ObscoreTimeExtension.tex | 45 ++++++++------- Time_domain_discovery_Use-cases.tex | 89 +++++++++++++++++++++++++++++ myref.bib | 28 +++++++-- 4 files changed, 137 insertions(+), 29 deletions(-) create mode 100644 Time_domain_discovery_Use-cases.tex diff --git a/Makefile b/Makefile index 48542f8..dadd79f 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DOCNAME = ObscoreTimeExtension DOCVERSION = 1.0 # Publication date, ISO format; update manually for "releases" -DOCDATE = 2024-06-24 +DOCDATE = 2024-07-01 # What is it you're writing: NOTE, WD, PR, REC, PEN, or EN DOCTYPE = WD @@ -27,7 +27,7 @@ FIGURES = role_diagram.pdf # List of PDF figures (figures that must be converted to pixel images to # work in web browsers). -VECTORFIGURES = +VECTORFIGURES = role_diagram.svg # Additional files to distribute (e.g., CSS, schema files, examples...) AUX_FILES = diff --git a/ObscoreTimeExtension.tex b/ObscoreTimeExtension.tex index 6b74961..ae62218 100644 --- a/ObscoreTimeExtension.tex +++ b/ObscoreTimeExtension.tex @@ -93,7 +93,7 @@ \begin{abstract} This IVOA specification details a list of metadata dealing with time-related features needed for discovery of time series data sets, in the context of ObsTAP services. -It is based on science cases explained in a previous IVOA note prepared in 2018 and recently revised \citep{note:TSSerialisationNote}. +It is based on science cases explained in a previous IVOA note prepared in 2018 and recently revised \citep{TSNoteSerialisation}. Here we discuss various use-cases. We highlight first which existing time related metadata in the ObsCore standard version 1.1 can be used, and second propose new features needed for an ObsCore time extension in order to allow more search criteria @@ -132,7 +132,7 @@ \section{Introduction} In this specification we examine how to enhance data discovery and data selection of time sampled data sets in the context of the ObsCore data model and its TAP implementations. The ObsCore Specification \cite{2017ivoa.spec.0509L} proposes a set of features to describe the data present in a data set as well as metadata about its acquisition, creation and publication (curation). -The physical in terms of spatial, spectral, temporal, polarimetry, and observable measure are also described by a group of features dedicated to each axis, and considered independant from each other. The idea is to provide a physical feature profile for each axis with coverage, sampling, resolution, etc. +The physical properties in terms of spatial, spectral, temporal, polarimetry, and observable measure are also described by a group of features dedicated to each axis, considered independent from others. The idea is to provide a physical feature profile for each axis with coverage, sampling, resolution, etc. Search criteria in ObsTAP are based on these features. We examine in section \ref{sec:alreadythere} how the set of time parameters already present in ObsCore v1.1 can be used for time series discovery. @@ -168,8 +168,8 @@ \section{Time Series} \subsection{Definition} Time Series can be defined in a very large sense as a collection of any kind of data over time for a particular source (e.~g. star, binary, QSO) or part of a source (e.~g. sun spots), independent on the type of data (images, light-curves, radial velocity, polarisation states or degrees, positions, number of sunspots, densities,...), the duration of the signal integration or the cadence. -To clarify the vocabulary here we consider a time series as a sequence of signal integrations, or snap-shots observing an object or phenomenon over time, so diffrent observations over time. -Considering how observations in general can be spanned along the time axis, we can sketch Time Series data as shown in Fig.~\ref{fig:time-series}. Time Series data is composed of a set of observations (n\_observations = 3 in this example), each with a different exposure or integration time (t\_exp). Although in some cases the cadence or time span between each signal intergration (delta\_t) is fixed, in the general case it can be different and we can therefore define a minimum and a maximum value (delta\_t\_min, delta\_t\_max). Each observation has it's own time stamp (\emph{t\_i)} with a given precision or resolution (t\_resolution). As can be seen from this figure the duration of the observation can be defined in different ways: a) as the total integration or exposure time, i.~e. the sum of all the exposure times: \emph{t\_exp\_total }= $\sum$ \emph{t\_exp} ; or b) as the time span between the beginning and the end of the observations: \emph{t\_exp\_total} = \emph{t\_max} - \emph{time\_min}). Note that in the case that the exposure time is constant for all the observations then \emph{t\_exp\_total }= n\_observations $\times$ \emph{t\_exp}. The situation can be more complicated, for instance during the observation there could be clouds and we therefore pause the exposure for a while and resume once the cloud has passed or we might want to remove parts of the observation due to artefacts in the data. In any case these values can be taken as approximative of the minimum and the maximum value this specific field can have. +To clarify the vocabulary here we consider a time series as a sequence of signal integrations, or snap-shots observing an object or phenomenon over time, so different observations over time. +Considering how observations in general can be spanned along the time axis, we can sketch Time Series data as shown in Fig.~\ref{fig:time-series}. Time Series data is composed of a set of observations (n\_observations = 3 in this example), each with a different exposure or integration time (t\_exp). Although in some cases the cadence or time span between each signal integration (delta\_t) is fixed, in the general case it can be different and we can therefore define a minimum and a maximum value (delta\_t\_min, delta\_t\_max). Each observation has it's own time stamp (\emph{t\_i)} with a given precision or resolution (t\_resolution). As can be seen from this figure the duration of the observation can be defined in different ways: a) as the total integration or exposure time, i.~e. the sum of all the exposure times: \emph{t\_exp\_total }= $\sum$ \emph{t\_exp} ; this represents the support along the time axis and is definitely different from the elapsed time emph{t\_elapsed} = \emph{t\_max} - \emph{time\_min}). Note that in the case that the exposure time is constant for all the observations then \emph{t\_exp\_total }= n\_observations $\times$ \emph{t\_exp}. The situation can be more complicated, for instance during the observation there could be clouds and we therefore pause the exposure for a while and resume once the cloud has passed or we might want to remove parts of the observation due to artefacts in the data. In any case these values can be taken as approximative of the minimum and the maximum value this specific field can have. The most relevant fields of Time Series metadata are summarized in Table~\ref{tab:fields}. @@ -184,7 +184,7 @@ \subsection{Definition} \begin{table}[hb] \begin{center} - \caption{Time Series metadata fields.} + \caption{Time Series metadata fields needed for discovery.} \label{tab:fields} \begin{tabular}{p{0.35\textwidth}p{0.64\textwidth}} \sptablerule @@ -213,7 +213,7 @@ \subsection{Definition} For this data to be fully exploitable and reusable (interoperable) it has to be properly documented. In this specific case the minimum information that needs to be provided is: the object coordinates (or name), the filter in which the observations have been carried out, and the time frame and offset (if applicable). However, the dimensionality of what is observed at the time stamps' sequence may correspond to 1D or 2D observations, like spectra or images as well. -That's why the dataproduct type defined in ObsCore 1.1 should be more precise and eventually rely on the IVOA product-type vocabulary. +That's why the data product type defined in ObsCore 1.1 should be more precise and eventually rely on the IVOA product-type vocabulary. In addition, a mechanism should be defined to clarify what part of the data is varying with time, as described further in section \ref{sec:timevariant}. @@ -249,7 +249,7 @@ \subsection{Science use cases} \item \emph{Is it possible to discover long/short term variability within the data?} \end{enumerate} -To answer the first question a user needs to be sure that dates are comparable, that is time has to be brought into a common time frame. +To answer the first question a user needs to be sure that dates are comparable, which means time has to be brought into a common time frame. To answer the second question we need to keep track of the minimum and maximum time span. \subsection{Using a common time frame} @@ -276,7 +276,7 @@ \subsection{Using a common time frame} \end{center} \end{table} -We recommend to be specific on the time frame and we suggest to use: +xxxxxx Common practice is to be specific on the time frame and we suggest to use: \begin{center} JD(TT;BARYCENTER) \end{center} @@ -286,7 +286,7 @@ \section{Extension of ObsCore} ObsCore has a normalized description of the data content along the various physical axes where the data are projected. The spatial properties are described in the \emph{s\_*} group, the spectral ones in \emph{em\_*} group, the temporal ones in \emph{t\_*}, etc. For each data set there is a minimal set of metadata to describe its sky position, spectral band, time interval, etc. which are independent from each other. -This allows to enhance time sampling description by adding new parameters to the time group without putting the ObsCore existing model at risk. +This allows to enhance time sampling description by adding new parameters to the time group, in order to warrant backward compatibility to ObsCore 1.1 . \subsection{Extension of ObsCore based on EPNCore} Astronomy and space science both consider time series data and have proposed metadata data description for it. Some metadata have already been defined and used in the context of data discovery using ObsCore \cite{2017ivoa.spec.0509L}, and the remaining ones have been defined in the context of planetary data in the EPNcore specification \cite{2022ivoa.spec.0822E}. In Table~\ref{tab:obs_epn} we show the equivalence between the fields we require here and those existing in ObsCore and EPNcore specifications. @@ -313,7 +313,7 @@ \subsection{Extension of ObsCore based on EPNCore} \hline t\_exp\_max & - & time\_exp\_max \\ \hline - t\_exp\_total & t\_exp & - \\ + t\_exp\_total & t\_exptime & - \\ \hline delta\_min & - & time\_sampling\_step\_min \\ \hline @@ -352,8 +352,8 @@ \subsection{Mentioning what part of the dataset varies with time } light curve & phot.flux & scalar value \\ \hline velocity curve & doppler.veloc & scalar value \\ \hline trajectory & pos.eq & sky position (vector) \\ \hline -dynamic spectrum & phot.flux & spectrum \\ \hline -movie & phot.flux & image \\ \hline +spatial profile& phot.flux & sky position \\ \hline +movie & phot.flux & image \\ \hline time cube & phot.flux & cube \\ \hline \end{tabular} \end{small} @@ -404,21 +404,21 @@ \subsection{Mentioning what part of the dataset varies with time } \end{flushleft} \end{table} - \subsection{Time series uses cases already covered by ObsCore1.1} + \subsection{Time series use cases already covered by ObsCore1.1} Several uses-cases for time series discoveries were considered in the ObsCore 1.1 specification, built on its short list of time related features. They are available in appendix A in section A.4. Discovering time series. -Here the \emph{dataproduct\_type} value is "timeseries", very general, but the same uses cases can be applied for more specific time sampled datasets like "time-cube" or or "light-curve" available now in the \textbf{product-type} vocabulary . -ObsCore uses cases are also provided in a web page available at : \url{http://saada.unistra.fr/voexamples/show/ObsCore/}. +Here the \emph{dataproduct\_type} value is "timeseries", very general, but the same use cases can be applied for more specific time sampled datasets like "time-cube" or or "light-curve" available now in the \textbf{product-type} vocabulary . +ObsCore use cases are also provided in a web page available at : \url{http://saada.unistra.fr/voexamples/show/ObsCore/}. \section{Time parameters proposed for ObsCore Extension } \label{sec:timeext} \subsection{Time Frame description} - As mentioned in section \ref{sec:comtimeframe} the Time Frame description is essential for comparing various time series data sets. + As mentioned in section \ref{sec:comtimeframe} the Time Frame description used for the data is essential for comparing various time series data sets. This metadata was described first in the STC data model \citep{2007ivoa.spec.1030R}, then in the Coords DM \citep{2022ivoa.specQ1004R}, and serialized in the VOTABLE format in the TimeSYS element. Up to now, this metadata was not defined in ObsCore1.1. It is coded into the VOTable metadata of the dataset. Having it as part of the query response coming back for a search for time series would help the user application to interpret time stamps precisely. -MJD is the time format used for an ObsTAP query related to time. +%MJD is the time format used for an ObsTAP query related to time. We propose to add the time frame parameters in the Time ObsCore extension. These various definitions are harmonized in the proposal given in table \ref{tab:timereff}. We list the corresponding terms used in the Coords Data model and in the UCD vocabulary, as well as the attribute of the TIMESYS param defined for VOTable serialization. All terms are proposed as mandatory, but can be set to UNKNOWN if not available. @@ -547,9 +547,9 @@ \subsubsection{ t\_fold\_period, t\_fold\_phaseReference} Therefore the Time extension for ObsCore should rely on mandatory parameters. If they cannot be retrieved nor calculated from the data they may be set to UNKNOWN. - In order to warn users that extra time parameters have been included in ObsTAP, we propose to gather them in another table named \emph{ivoa.t-obs} + In order to warn users that extra time parameters have been included in ObsTAP, we propose to gather them in another table named \emph{ivoa.time-obscore} for services that distribute time sampled data sets. - The utype column in \emph{ivoa.t\_obs} should be the standard identifier of this specification, so here \texttt{ivo://ivoa.net/std/obscore\#t-obs-1.0}. + The utype column in \emph{ivoa.t\_obs} should be the standard identifier of this specification, so here \texttt{ivo://ivoa.net/std/obscore\#time-obs-1.0}. If this table contains an identifier for the corresponding dataset described in main \emph{ivoa.obscore} table, then it is easy to join general ObsCore properties to the time specific ones in an ADQL query. Here is a query example : ( to be checked) @@ -567,7 +567,7 @@ \subsubsection{ t\_fold\_period, t\_fold\_phaseReference} Other examples of queries using these extra parameters are proposed in Appendix \ref{sec:query_examples}. More generally, other extensions can be considered in ObsTAP, like the radio extension or high energy extension specific to these spectral domains and instrumentations. -In an extended ObsTAP service the main ObsCore table and the other extension tables must be gathered in a TAP\_SCHEMA with utype \\ \texttt{ivo://ivoa.net/std/obscore1.1}, for version 1.1 and containing the different tables : ivoa.obscore, ivoa.t-obs, ivoa.radio, ivoa.heig etc.... when needed. +In an extended ObsTAP service the main ObsCore table and the other extension tables must be gathered in a TAP\_SCHEMA with utype \\ \texttt{ivo://ivoa.net/std/obscore1.1}, for version 1.1 and containing the different tables : ivoa.obscore, ivoa.time-obscore, ivoa.radio-obscore, ivoa.heig-obscore etc.... when needed. This would help to identify ObsCore services with their version and discover all ObsCore table extensions in the TAP service description in order to write up queries with JOIN. % exemples of joins @@ -580,7 +580,7 @@ \subsubsection{ t\_fold\_period, t\_fold\_phaseReference} % NOTE: IVOA recommendations must be cited from docrepo rather than ivoabib % (REC entries there are for legacy documents only) %\section{References} -\bibliography{ivoatex/ivoabib, ivoatex/docrepo, myref} + % note:TSSerialisationNote @@ -589,8 +589,9 @@ \subsubsection{ t\_fold\_period, t\_fold\_phaseReference} \section{Query examples for join tables}\label{sec:query_examples} -\todo{Other examples of join and uses cases} +\include{Time_domain_discovery_Use-cases} +\bibliography{ivoatex/ivoabib, ivoatex/docrepo, myref} \section{Previous work on the Time series characterization and description}. \begin{itemize} diff --git a/Time_domain_discovery_Use-cases.tex b/Time_domain_discovery_Use-cases.tex new file mode 100644 index 0000000..bb241f0 --- /dev/null +++ b/Time_domain_discovery_Use-cases.tex @@ -0,0 +1,89 @@ + +%% Discovery of data products for Time domain use cases +\lstset{captionpos=t} +\begin{itemize} +\item Finding a light curve in a time interval for a sky position + \begin{lstlisting} [language=SQL, captionpos=t, caption=Show me a list of all data matching a particular event (gamma ray burst) in time interval and space ] + I. DataType=light-curve + II. RA includes 16.00 hours + III. DEC includes +41.00 + IV. Time start > MJD 55220 and Time stop < MJD 55221 + V. Number of time slots > 1000 + \end{lstlisting} + +\item Times series for a sky position, with date, length and exposure constraints +\begin{lstlisting} [language=SQL, caption=Show me a list of all data which satisfies] + I. DataType=time-series + II. RA includes 16.00 hours + III. DEC includes +41.00 + IV. Time resolution better than 1 minute + V. Time interval (start of series to end of series) > 1 week + VI. Observation data before June 10, 2008 + VII. Observation data after June 10, 2007 + \end{lstlisting} + +\item Finding a light curve in folded mode for pulsar analysis +\begin{lstlisting} [language=SQL, caption=Show me a list of all data matching a light curve for a pulsar candidate] + I. DataType=light-curve + II. time resolution < 0.001 s + III. time axis is folded + IV. exposure time > 5s + \end{lstlisting} + +\item Finding MUSE cube time series +\begin{lstlisting} [language=SQL, caption=Show me a list of all data products from MUSE data collection with more than 30 items] + I. DataType=time-cube + II. Data collection like 'MUSE' + III. Number of time slots > 30 + \end{lstlisting} + + % trouver des MASER sources radio variables avec un SNR suffisant --> convertit en t_exp_min > seuil +\begin{lstlisting} [language=SQL, caption= Show me a list of all data matching a light curve for a radio source ] + I. DataType=light-curve + II. Band corresponds to Radio %em_min > radio_min and em_max < radiomax xxx + III. Minimum time sample > 3s + IV. Number of time slots > 10 + \end{lstlisting} + % trouver des light_curve comparables à celles de ma liste de source qui sont en TDB Barycenter + \begin{lstlisting} [language=SQL, caption=Show me a list of all data products using a specified Time system ] + Show me a list of all data products using a specified Time system + I. DataType=light-curve or time-series + II. time scale=TDB + III. time reference position=BARYCENTER + \end{lstlisting} + + % identifier des transits de planetes + TESS ?? + + % identifier des systemes d'étoiles binaires + ADA ?? + + % nature article https://doi.org/10.1038/s41586-023-06787-x + \item Here is an example of the data discovery steps one would launch in the VO for looking at specific binary systems + in the supernova SN 2022jli \citep{2024Natur.625..253C} + + % A 12.4-day periodicity in a close binary system after a supernova + % target position in ICRS 00 34 45.690 -08 23 12.16 % + % object name = SN 2022jli + \begin{lstlisting} [language=SQL, caption=Show me a list of light curves around object \emph{SN 2022jli}] + I. DataType=light-curve + II. target position close to SN 2022jli + III. em\_min > 10 and em\_max < 1.0E-8 % radio and Xray , gammaray + VI. Observation data before Sept 31, 2023 + VII. Observation data after Sept 01, 2022 + \end{lstlisting} + + Check what the Fermi-Lat telescope may have seen in the mean time + \begin{lstlisting} [language=SQL, caption=Show me a list of light curves around object \emph{SN 2022jli}] + I. DataType=light-curve + II. Data collection like Fermi-Lat + IV. t\_min > 59823 %Observation data before sept 31, 2023 + V. t\_max < 60218 % Observation data after sept 01, 2022 + \end{lstlisting} + + \begin{lstlisting} [language=SQL, caption=Show me a list of dynamic spectra around object \emph{SN 2022jli}]] + I. DataType=dynamic-spectrum + II. target position close to SN 2022jli + \end{lstlisting} + + \end{itemize} \ No newline at end of file diff --git a/myref.bib b/myref.bib index 1b5d450..6af9a19 100644 --- a/myref.bib +++ b/myref.bib @@ -19,21 +19,39 @@ @ARTICLE{2015A&A...574A..36R @Misc{SPC_UC, author = {{Solano, Enrique}}, title = {Science priorities for the IVOA }, - howpublished = {{Ivoa Wiki }}, + howpublished = {{IVOA Wiki }}, year = 2017, month = jun, url = {http://wiki.ivoa.net/twiki/bin/view/IVOA/CSPTimeSeries} } @Misc{TSNoteSerialisation, - author = {{Nebot, A. , et al. }}, - title = { IVOA TimeSeries data modeling and representation -Version 1.0}, + author = {{Nebot, A., et al. }}, + title = {IVOA TimeSeries data modeling and representation Version 1.0}, howpublished = {{IVOA Wiki internal draft }}, - year = 2017, + year = 2018, month = jun, url = {https://wiki.ivoa.net/internal/IVOA/TimeSeries/TSSerializationNote.pdf} } +% example for a search situation +@ARTICLE{2024Natur.625..253C, + author = {{Chen}, Ping and {Gal-Yam}, Avishay and {Sollerman}, Jesper and {Schulze}, Steve and {Post}, Richard S. and {Liu}, Chang and {Ofek}, Eran O. and {Das}, Kaustav K. and {Fremling}, Christoffer and {Horesh}, Assaf and {Katz}, Boaz and {Kushnir}, Doron and {Kasliwal}, Mansi M. and {Kulkarni}, Shri R. and {Liu}, Dezi and {Liu}, Xiangkun and {Miller}, Adam A. and {Rose}, Kovi and {Waxman}, Eli and {Yang}, Sheng and {Yao}, Yuhan and {Zackay}, Barak and {Bellm}, Eric C. and {Dekany}, Richard and {Drake}, Andrew J. and {Fang}, Yuan and {Fynbo}, Johan P.~U. and {Groom}, Steven L. and {Helou}, George and {Irani}, Ido and {Jegou du Laz}, Theophile and {Liu}, Xiaowei and {Mazzali}, Paolo A. and {Neill}, James D. and {Qin}, Yu-Jing and {Riddle}, Reed L. and {Sharon}, Amir and {Strotjohann}, Nora L. and {Wold}, Avery and {Yan}, Lin}, + title = "{A 12.4-day periodicity in a close binary system after a supernova}", + journal = {\nat}, + keywords = {Astrophysics - High Energy Astrophysical Phenomena, Astrophysics - Solar and Stellar Astrophysics}, + year = 2024, + month = jan, + volume = {625}, + number = {7994}, + pages = {253-258}, + doi = {10.1038/s41586-023-06787-x}, +archivePrefix = {arXiv}, + eprint = {2310.07784}, + primaryClass = {astro-ph.HE}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2024Natur.625..253C}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} +