forked from ivoa-std/VOTable
-
Notifications
You must be signed in to change notification settings - Fork 0
/
VOTable.tex
2625 lines (2270 loc) · 114 KB
/
VOTable.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
\documentclass[11pt,a4paper]{ivoa}
\input tthdefs
\usepackage{verbatim}
\let\A=\href
\def\Aref#1{section~\ref{#1}}
\def\Arefs#1{section~\ref{#1}}
\def\Arefx#1{appendix~\ref{#1}}
\def\Tref#1{Table~\ref{#1}}
\def\Fref#1{Figure~\ref{#1}}
\let\fg=\color
\topmargin=-1cm
\raggedbottom
\oddsidemargin=-0.8cm
\evensidemargin=-0.8cm
\textwidth=17.5cm
\textheight=23.5cm
\parindent=0pt
\arrayrulewidth=0.75pt\renewcommand{\arraystretch}{1.2}
\definecolor{DarkRed}{rgb}{0.5,0,0}
\definecolor{DarkBlue}{rgb}{0,0,0.5}
\definecolor{DarkPurple}{rgb}{0.3,0.1,0.5}
\definecolor{DarkGoldenrod}{rgb}{0.72,0.5,0.05}
\def\slash {{\fg{blue}/}}
\def\attr#1{{\tt{\fg{DarkRed}#1}}}
\def\requiredattr#1{{\tt\bf{\fg{DarkBlue}#1}}}
\def\elem#1{{\tt{\fg{DarkRed}#1}}}
\def\attrval#1#2{{\tt{\fg{DarkRed}#1}="{\fg{DarkPurple}#2}"}}
\def\elemdef#1#2{{\tt\fg{blue}<}{\tt{\fg{DarkRed}#1}#2}{\tt\fg{blue}>}}
\def\literalvalue#1{{\tt"}{{\fg{DarkPurple}#1}}{\tt"}}
\def\order{$\oplus$ }
\def\unorder{{\large $\circ$ }}
\def\deprecated {$\dagger$ }
\def\choice{{$\mapsto$ }}
\newenvironment{plain}{\begin{quote}}{\end{quote}}
\title{VOTable Format Definition}
% see ivoatexDoc for what group names to use here
\ivoagroup{Applications}
\author[http://www.ivoa.net/twiki/bin/view/IVOA/FrancoisOchsenbein]
{Fran\c{c}ois Ochsenbein}
\author[http://www.ivoa.net/twiki/bin/view/IVOA/RoyWilliams]{Roy Williams}
\author[http://www.ivoa.net/twiki/bin/view/IVOA/CliveDavenhall]{Clive Davenhall}
\author[http://www.ivoa.net/twiki/bin/view/IVOA/MarkusDemleitner]
{Markus Demleitner}
\author[http://www.ivoa.net/twiki/bin/view/IVOA/TomDonaldson]{Tom Donaldson}
\author[http://www.ivoa.net/twiki/bin/view/IVOA/DanielDurand]{Daniel Durand}
\author[http://www.ivoa.net/twiki/bin/view/IVOA/PierreFernique]{Pierre Fernique}
\author[http://www.ivoa.net/twiki/bin/view/IVOA/DavidGiaretta]{David Giaretta}
\author[http://www.ivoa.net/twiki/bin/view/IVOA/BobHanisch]{Robert Hanisch}
\author[http://www.ivoa.net/twiki/bin/view/IVOA/TomMcGlynn]{Tom McGlynn}
\author[http://www.ivoa.net/twiki/bin/view/IVOA/AlexSzalay]{Alex Szalay}
\author[http://www.ivoa.net/twiki/bin/view/IVOA/MarkTaylor]{Mark Taylor}
\author[http://www.ivoa.net/twiki/bin/view/IVOA/AndreasWicenec]{Andreas Wicenec}
\editor[http://www.ivoa.net/twiki/bin/view/IVOA/FrancoisOchsenbein]
{Fran\c{c}ois Ochsenbein}
\editor[http://www.ivoa.net/twiki/bin/view/IVOA/MarkTaylor]{Mark Taylor}
\editor[http://www.ivoa.net/twiki/bin/view/IVOA/TomDonaldson]{Tom Donaldson}
\previousversion[http://www.ivoa.net/documents/VOTable/20130920/]
{http://www.ivoa.net/documents/VOTable/20130920/
(V1.3 2013-09-20)}
\previousversion[http://www.ivoa.net/documents/VOTable/20091130/]
{http://www.ivoa.net/documents/VOTable/20091130/
(V1.2 2009-11-30)}
\previousversion[http://www.ivoa.net/documents/cover/VOT-20040811.html]
{http://www.ivoa.net/documents/cover/VOT-20040811.html
(V1.1 2004-08-11)}
\previousversion[http://www.ivoa.net/documents/PR/VOTable/VOTable-20031017.html]
{http://www.ivoa.net/documents/PR/VOTable/VOTable-20031017.html
(V1.0 2003-10-17)}
\begin{document}
\begin{abstract}
This document describes the structures making up
the VOTable standard.
The main part of this document describes the adopted part of the
VOTable standard; it is followed by appendices presenting extensions
which have been proposed and/or discussed, but which are not part of
the standard.
\end{abstract}
% This section not included in REC-VOTable-1.4
% Possibly should be introduced into later versions for conformity
% with other IVOA docs; but the content would have to be reviewed
% to determine whether these terms actually are used like this.
%
% \section*{Conformance-related definitions}
%
% The words ``MUST'', ``SHALL'', ``SHOULD'', ``MAY'', ``RECOMMENDED'', and
% ``OPTIONAL'' (in upper or lower case) used in this document are to be
% interpreted as described in IETF standard RFC2119 \citep{std:RFC2119}.
%
% The \emph{Virtual Observatory (VO)} is a
% general term for a collection of federated resources that can be used
% to conduct astronomical research, education, and outreach.
% The \href{http://www.ivoa.net}{International
% Virtual Observatory Alliance (IVOA)} is a global
% collaboration of separately funded projects to develop standards and
% infrastructure that enable VO applications.
\section{Introduction}
The VOTable format is an XML standard for the interchange of data
represented as a set of tables.
In this context, a table is an unordered set of rows, each of
a uniform structure, as specified in the table description
(the table {\em metadata}).
Each row in a table is a sequence of table cells, and each of these contains
either a primitive data type, or an array of such primitives.
VOTable is derived from the
Astrores format \citep{astrores}, itself modeled on the FITS Table format
\citep{std:FITS};
VOTable was designed to be close to the FITS Binary Table format.
\subsection{Why VOTable?}
Astronomers have always been at the forefront of developments in
information technology, and funding agencies across the world have
recognized this by supporting the Virtual Observatory movement, in
the hopes that other sciences and business can follow their lead in
making online data both {\it interoperable} and {\it scalable}.
VOTable is designed as a flexible storage and exchange format for
tabular data, with particular emphasis on astronomical tables.
Interoperability is encouraged through the use of standards (XML).
The XML fabric
allows applications to easily validate an input document, as well as
facilitating transformations through XSLT (eXtensible Style Language
Transformation) engines.
\subsubsection*{Grid Computing}
VOTable has built-in features for big-data and Grid computing. It
allows metadata and data to be stored separately, with the remote
data linked. % according to the Xlink model.
Processes can then use
metadata to `get ready' for their input data, or to organize
third-party or parallel transfers of the data. Remote data allow the
metadata to be sent in email and referenced in documents without
pulling the whole dataset with it: just as we are used to the idea of
sending a pointer to a document (URL) in place of the document, so we
can now send metadata-rich pointers to data tables in place of the
tables themselves. The remote data is referenced with the URL syntax
\textsf{protocol://location},
meaning that arbitrarily complex protocols are allowed.
When we are working with very large tables in a
distributed-computing environment (``the Grid"), the data
stream between processors, with flows being filtered, joined, and
cached in different geographic locations. It would be very difficult
if the number of rows of the table were required in the header --
we would need to stream in the whole table into a cache, compute the
number of rows, then stream it again for the computation. In the
Grid-data environment, the component in short supply is not the
computers, but rather these very large caches. Furthermore, these
remote data streams may be created dynamically by another process or
cached in temporary storage: for this reason VOTable can express that
remote data may not be available after a certain time (\attr{expires}).
Data on the net may require authentication for access, so VOTable
allows expression of password or other identity information (the
`{\attr{rights}}'
attribute).
\subsubsection*{Data Storage: Flexible and Efficient}
The data part in a VOTable may be represented using one of four
different formats: TABLEDATA, FITS, BINARY and BINARY2. TABLEDATA is a
pure XML format so that small tables can be easily handled in their
entirety by XML tools. The FITS binary table format is well-known to
astronomers, and VOTable can be used either to encapsulate such a
file, or to re-encode the metadata; unfortunately it is difficult to
stream FITS, since the dataset size is required in the header
(NAXIS2 keyword), and FITS requires a specification up front of the maximum
size of its variable-length arrays. The BINARY and BINARY2 formats
are supported for efficiency and ease of programming: no FITS
library is required, and the streaming paradigm is supported.
VOTable can be used in different ways, as a data
storage and transport format, and also as a way to store metadata
alone (table structure only). In the latter case, a
VOTable structure can be sent to a server, which can then open a
high-bandwidth connection to receive the actual data, using the
previously-digested structure as a way to interpret the stream of
bytes from the data socket.
VOTable can be used for small numbers of small records (pure XML
tables), or for large numbers of simple records (streaming data), or
it can be used for small numbers of larger objects. In the latter
case, there will be software to spread large data blocks among
multiple processors on the Grid. Currently the most complex structure
that can be in a VOTable Cell is a multidimensional array.
\subsection{XML Conventions}
VOTable is constructed with \A{http://www.w3.org/XML/}{XML} (extensible Markup Language), a
powerful standard for structured data throughout the Internet
industries. It derives %through simplification
from SGML, %which has been
a standard used in the publishing industry and for
technical documentation for many years. XML
consists of {\it elements} and payload, where an element consists of
a {\it start tag} (the part in angle brackets), the payload, and an
{\it end tag} (with angle brackets and a slash). Elements can
contain other elements. Elements can also bear
{\attr{attributes}}
(keyword-value combinations).
The payload may be in two forms: parsed or unparsed character
data. Examples are:
\begin{verbatim}
<text>François</text>
<text><![CDATA[ a & (b <= c) ]]></text>
\end{verbatim}
In the first example, the sequence {\tt \&\#231;} is interpreted as
part of the ISO/IEC 10646 character set (Unicode), and translates to an
accented character, so that the text is ``Fran\c{c}ois".
The second example uses the special {\tt CDATA} sequence so that the
characters {\tt <}, {\tt >}, and {\tt\&} can be used without interpretation;
in this case, any ASCII characters are allowed except the terminating
sequence {\tt]]>}. For more information, see any book on
XML.
\subsection{Syntax Policy}
Following the general XML rule, element and attribute names are
case-sensitive and have to be used with the specified
capitalisation. For VOTable, we have adopted the convention that
element names are spelled in uppercase
and attribute names in lowercase (with an
exception for the {\attr{ID}}
attribute).
Element and attribute names are further distinguished in
this paper by being typed with a {\attr{red fixed-width}} font,
and the values of the attributes by being \literalvalue{coloured}.
\subsection{VOTable in the VO Architecture}
\label{sec:voarch}
\begin{figure}
\centering
\includegraphics[width=0.9\textwidth]{role_diagram.pdf}
\caption{Architecture diagram for this document}
\label{fig:archdiag}
\end{figure}
VOTable is a core IVOA standard.
Wherever tabular data is transferred between Virtual Observatory components,
VOTable provides the preferred serialization format.
Since tables are used to list available resources as well as to
represent science data which is itself tabular,
this means that VOTable is used pervasively in the definitions
of the Data Access protocols (e.g.\ SCS, SIA, SSA, TAP),
and hence for exchange of data and metadata
between user layer applications and data-providing services.
VOTable is also used as a serialization format for
some of the IVOA Data Models.
In order to represent semantically rich metadata, VOTable relies on
the other IVOA standards UCD, Utype, VOUnits, and DALI.
This document explains how information structured according to those
standards are managed within the VOTable framework.
\section{Data Model}
In this section we define the data model of a VOTable, and in the
next sections its syntax when expressed as XML. The data model of
VOTable can be expressed as:
\medskip
\begin{tabular}{rrcp{0.7\textwidth}}
\hspace{3em}&{\bf VOTable} &=& hierarchy of {\bf Metadata} + associated
{\bf TableData}, arranged as a set of {\bf Tables}\\
&{\bf Metadata} &=& {\bf Parameters} + {\bf Infos} + {\bf Descriptions}
+ {\bf {\fg{black}Links + Fields + Groups}}\\
&{\bf Table} &=& list of {\bf Fields + TableData}\\
&{\bf TableData}{ } &=& stream of {\bf Rows}\\
&{\bf Row} &=& list of {\bf Cells}\\
&{\bf Cell} &=&
$\left\{
\begin{tabular}{l}
{\bf Primitive} \\
or variable-length list of {\bf Primitives} \\
or multidimensional array of {\bf Primitives}\\
\end{tabular}
\right.$
\\
&{\bf Primitive} &=& integer, character, float, floatComplex, etc
(see \Tref{primitives} below).
\end{tabular}
\medskip
\par\noindent
Metadata is divided into that which concerns the table itself
(parameters), and the definitions of the fields (or column
attributes) of the table.
Each \elem{FIELD} represents the metadata
that can be found at the
top of the column in a paper version of the table:
in the example introduced in \Aref{example1}
below, the first \elem{FIELD} has its \attr{name} attribute
set to \literalvalue{RA}. The Field can be thought of as a class definition,
and the table cells below it are the instances of that class.
A parameter ({\elem{PARAM}})
is similar to a {\elem{FIELD}},
except that it has a \attr{value} attribute.
Parameters can be seen as ``constant columns'', containing for instance
FITS keywords or any other
information pertaining to the table itself or its environment, such as the
{\tt Telescope} parameter in the example of \Arefs{example1}.
An informative parameter ({\elem{INFO}}) (see \Arefs{elem:INFO})
is a restricted form of the {\elem{PARAM}} --- it is always understood
as a {\em string} (i.e. \attrval{datatype}{char}
and \attrval{arraysize}{*} are {\em implied}).
% No idea why, this seems to be required to prevent blue text
% at the top of a page in PDF output.
\mbox{}{\color{black}}%
%
The ordered list of Fields at the top of the table thus provides a
template for a Row object (also called a {\it record}). The
template allows interpretation of the data in the Row.
The
record is a set of Cells, with the number and order of Cells the same for each
Row, and the same as the number of Fields defined in the Metadata.
In VOTable,
there is generally no advance specification of the number of rows in the table:
this is to allow streaming of large tables, as discussed above.
However, if the number of rows is known, it may be specified in a
dedicated \attr{nrows} attribute.
From Version 1.1, columns may be logically grouped, so that it is
possible to define table substructures made of column associations.
Such an association is declared as a \elem{GROUP}, which typically
contains column references (\elem{FIELDref})
and associated parameters (\elem{PARAM}).
\subsection{Primitives}
\begin{table}[hbt]
\begin{center}\begin{tabular}{|r|l|c|r|}
\hline
{\attr{datatype}} & Meaning & \attr{FITS} &
{ Bytes} \\
\hline
\literalvalue{boolean} & Logical &\literalvalue{L}& 1 \\
\literalvalue{bit} & Bit &\literalvalue{X}& * \\
\literalvalue{unsignedByte} & Byte (0 to 255) &\literalvalue{B}& 1 \\
\literalvalue{short} & Short Integer &\literalvalue{I}& 2 \\
\literalvalue{int} & Integer &\literalvalue{J}& 4 \\
\literalvalue{long} & Long integer &\literalvalue{K}& 8 \\
\literalvalue{char} & ASCII Character &\literalvalue{A}& 1 \\
\literalvalue{unicodeChar} & Unicode Character& & 2 \\
\literalvalue{float} & Floating point &\literalvalue{E}& 4 \\
\literalvalue{double} & Double &\literalvalue{D}& 8 \\
\literalvalue{floatComplex} & Float Complex &\literalvalue{C}& 8 \\
\literalvalue{doubleComplex}& Double Complex &\literalvalue{M}& 16 \\
%logical & 1 \\
%bit & * \\
%byte & 1\\
%short & 2 \\
%int & 4 \\
%long & 8 \\
%char & 1 \\
%unicodeChar & 2 \\
%float & 4 \\
%double & 8 \\
%floatComplex & 8 \\
%doubleComplex & 16 \\
\hline\end{tabular}\end{center}
\caption{\label{primitives}List of the Primitives
{\em(details in \Aref{sec:datatypes})}}\end{table}
Each Cell is composed from Primitives, each of which is a datatype
of fixed-length binary representation, as listed in
\Tref{primitives}.
Cells may consist of a single Primitive (this is
the default), or of an {\em array} (which may be multidimensional)
of Primitives (see \Aref{array}).
Except for the Bit type, each primitive has the fixed length in
bytes given in \Tref{primitives}.
Bit scalars and arrays are stored in
the minimum number of bytes feasible (so that $b$ bits take the integer
part of $(b+7)/8$ bytes). These primitives
are described in more detail in \Aref{sec:datatypes}.
VOTables support two kinds of characters: ASCII 1-byte characters
and Unicode (UCS-2) 2-byte characters. Unicode is a way to represent
characters that is an alternative to ASCII. It uses two bytes per
character instead of one, it is strongly supported by XML tools, and
it can handle a large variety of international alphabets. Therefore
VOTable supports not only ASCII strings ({\attrval{datatype}{char}}),
but also Unicode ({\attrval{datatype}{unicodeChar}}).
Note that strings are not a primitive type: strings are
represented in VOTable as an array of characters. %in an characters are.
\subsection{Columns as Arrays}\label{array}
\label{sec:dim}
A table cell can contain an {\em array} of a given primitive type,
with a fixed or variable number of elements; the array may even
be multidimensional. For instance, the position of a point in a
3D space can be defined by the following:
\elemdef{FIELD}{ \attrval{ID}{point\_3D} \attrval{datatype}{double}
\attrval{arraysize}{3}\slash}
\noindent and each cell corresponding to that definition must contain exactly
3 numbers. An asterisk ({\bf\tt*}) may be appended to indicate
a {\em variable} number of elements in the array, as in:
\elemdef{FIELD}{ \attrval{ID}{values} \attrval{datatype}{int}
\attrval{arraysize}{100*}\slash}
\noindent where it is specified that each cell corresponding to that
definition contains 0 to 100 integer numbers. The number may be
omitted to specify an unbounded array
(in practice up to $\simeq 2\times10^9$ elements).
A table cell can also contain a {\em multidimensional array}
of a given primitive type. This is specified by a sequence of dimensions
separated by the {\tt x} character,
with the first dimension changing fastest; as in the case
of a simple array, the last dimension may be variable in length.
As an example, the following definition
declares a table cell which may contain a set of up to 10 images,
each of 64x64 bytes:
\elemdef{FIELD}{ \attrval{ID}{thumbs} \attrval{datatype}{unsignedByte}
\attrval{arraysize}{64x64x10*}\slash}
Strings, which are defined as a set of characters,
can therefore be represented in VOTable as a fixed- or variable-length
array of characters:
\elemdef{FIELD}{ \attrval{name}{unboundedString} \attrval{datatype}{char}
\attrval{arraysize}{*}\slash}
A 1D array of strings can be represented as a 2D array of characters, but
given the logic above, it is possible to define a variable-length array
of fixed-length strings,
but not a fixed-length array of variable-length strings.
A convention to express an array of variable-length strings
exists (see \Aref{sec:arraystring}) but is not
part of this standard.
\textbf{Note:} \attr{arraysize} should be present if, and only if, each table
cell for the \elem{FIELD} is intended to be treated as an array.
\attrval{arraysize}{1}
should not be used, as it is interpreted differently by different
clients at this point. If a future VOTable specification re-encourages
its use, \attrval{arraysize}{1} will mean ``array of length 1''.
\subsection{Compatibility with FITS Binary Tables}
VOTable is closely compatible with the FITS Binary Table format.
Henceforth, we shall abbreviate ``FITS Binary Table and its
Conventions" simply by the word ``FITS". Given a FITS
file that represents a binary table, the header may be converted to
VOTable, with a pointer to the original file, or with the original
file included directly in VOTable. Since the original file is still
present, it is clear that no data has been lost. A {\elem{PARAM}}
element can be used to hold any FITS keyword with its value
and comment string.
We might ask two more significant questions, about how much of
the FITS header and data can be represented in VOTable. The answer is
that there is considerable overlap.
For instance, the recommended formatting of the data for an
edition of the data is expressed by the non-mandatory TDISP keyword:
for example F12.4 means 12 characters are to be used, and 4 decimal
places. This has been converted in VOTable as the attributes {\attr{width}}
and {\attr{precision}}
which, connected with {\bf {\attr{datatype}}},
are semantically identical to the TDISP keyword.
\subsubsection*{What can FITS do but not VOTable?}
FITS has complex semantics, with many conventions
(see {\em e.g.} the Registry of FITS
Conventions\footnote{\url{http://fits.gsfc.nasa.gov/fits_registry.html}})
which have been developed
mainly to be able to cope with the increasing complexity
of astronomical instrumentation. In the frame of the
{\em Virtual Observatory} the complexity is described by
means of {\em data models}, and from its version 1.1,
{\em VOTable} can refer to these data models by means
of the \attr{utype} attribute described in
\Aref{sec:utype}.
\subsubsection*{What can VOTable do but not FITS?}
VOTable supports separating of data from metadata and the
streaming of tables, and other ideas from modern distributed
computing. It bridges two ways to express structured data: XML and
FITS. It uses UCDs -- see \Aref{sec:ucd})
to formally express the semantic
content of a parameter or field. It has the hierarchy and flexibility
of XML: using \elem{GROUP} elements introduced in version 1.1,
columns in a VOTable can be grouped in arbitrarily complex hierarchies;
and the ID attribute can be used in XML
to enable what are essentially pointers.
FITS does not handle Unicode (extended alphabet) characters.
\medskip
\noindent{\fg{black}It should be noticed that the transformation
of FITS to VOTable is reversible:
any FITS table can be converted to a VOTable without loss of
information and the resulting VOTable can be converted back to a
FITS table also without loss of information.
However, it is
possible to create new VOTables which cannot be converted to FITS
tables without loss of information.
}
\section{The VOTable Document Structure}
\label{elem:VOTABLE}
The overall VOTable document structure is described and controlled
by its XML Schema \citep{std:XSD}. The schema for VOTable version 1.4 is
given in \Arefx{XML-schema} of this document. It can also
be retrieved from \url{http://www.ivoa.net/xml/VOTable/votable-1.4.xsd}.
A VOTable document consists of a single all-containing element
called {\elem{VOTABLE}}, which contains descriptive elements and global definitions
({\elem{DESCRIPTION}}, \elem{GROUP}, \elem{PARAM}, \elem{INFO}),
followed by one or more {\elem{RESOURCE}} elements.
Each Resource element contains zero or more \elem{TABLE} elements,
and possibly other \elem{RESOURCE} elements.
The \elem{TABLE} element, the actual heart of VOTable, contains
a description of the columns and parameters
(described in \Aref{sec:field})
followed by the data values
(described in \Aref{sec:data}).
As the root element, \elem{VOTABLE} has attributes which specify the VOTable version
number and XML namespaces used in the document. For VOTable 1.4, the \elem{VOTABLE}
element MUST define \attrval{version}{1.4}. All VOTable 1.4 elements come from the
namespace \nolinkurl{http://www.ivoa.net/xml/VOTable/v1.3}. It is recommended to bind
the empty namespace prefix to this URI, as in
\attrval{xmlns}{http://www.ivoa.net/xml/VOTable/v1.3}, but instance
documents are free to use whatever namespace prefix is convenient for them.
Note that starting with VOTable 1.3, the namespace URI for VOTable will
remain fixed at \nolinkurl{http://www.ivoa.net/xml/VOTable/v1.3}
until the next major version as discussed in \citet{2018ivoa.spec.0529H}.
As per IVOA
recommendations, this namespace URI will always redirect to the latest
recommended schema for VOTable version 1.x.
VOTable consumers doing schema validation
are free to use either this latest recommended schema or the version-specific
schema relevant to the VOTable version. So, while instance documents
may include the \attr{schemaLocation} attribute, consumers are not required
to honor it.
Documents claiming to represent VOTables must validate against the
relevant version of the VOTable schema without error. Notice that the
validation is a necessary, {\em but not sufficient}, condition for correctness.
\subsection{Example}
This simple example of a VOTable document lists 3 galaxies with their
position, velocity and error, and their estimated distance.
\label{example1}
\begingroup\small
\verbatiminput{stc_example1.vot}
\endgroup
This simple \elem{VOTABLE} document shows a single \elem{RESOURCE} made of a single \elem{TABLE};
the table is made of 6 columns, each described by a \elem{FIELD}, and has
one additional \elem{PARAM} parameter (the Telescope). The actual rows are
listed in the \elem{DATA} part of the table, here in XML format
(introduced by \elem{TABLEDATA}); each cell is marked by the \elem{TD} element,
and follow the same order as their \elem{FIELD} description:
{\sl RA, Dec, Name, RVel, e\_RVel, R}.
\subsection{{\attr{name}, \attr{ID} and \attr{ref} attributes}}
\label{sec:name}
Most of the elements defined by VOTable may have or have to have {\em names},
like a \elem{RESOURCE}, a \elem{TABLE}, a \elem{PARAM} or a \elem{FIELD}.
The content of the \attr{name} attribute is defined as a {\em token}
XML type,
that is a string of characters where the blanks and spaces are not
meaningful (no leading or trailing spaces, no multiple spaces):
\attrval{name}{NVSS flux(1.4GHz)} represents therefore
a valid name.
The \attr{ID} and \attr{ref} attributes are defined as XML types {\em ID}
and {\em IDREF} respectively. This means that the contents of \attr{ID}
is an {\em identifier} which must be {unique} throughout a VOTable document,
and that the contents of the \attr{ref} attribute represents a reference to
an identifier which must exist in the VOTable document.
In other terms, if \attrval{ref}{myStar} is found in one element,
there must exist an element in the same document with the
\attrval{ID}{myStar} attribute. The XML standard moreover specifies
that an {\em ID} type is a string beginning with a letter or
underscore ({\tt{\_}}),
followed by a sequence of Unicode letters, digits, or any of the
punctuation characters {\tt.} (dot), {\tt-} (dash) or {\tt\_} (underscore).
The {\tt:} (colon) is reserved for namespace use and should be avoided.
Therefore \attrval{ID}{1} is {\em not} valid,
but \attrval{ID}{\_1} or \attrval{ID}{ref.1} are both valid.
The {\attr{ID}} attribute %(as defined by Xpointer standard)
is therefore required in the elements which {\em have to be referenced},
but the elements having an {\attr{ID}} attribute do not need to be
referenced.
From VOTable 1.2, it is further
recommended to place the \attr{ID} attribute {\em prior to} referencing
it whenever possible.
While the {\attr{ID}} attribute has to be unique in a VOTable document,
the {\attr{name}} attribute need not. It is however recommended,
as a good practice, to assign unique names within a \elem{TABLE} element.
This recommendation means that,
between a \elem{TABLE} and its corresponding closing \elem{\slash TABLE} tag,
{\attr{name}} attributes of \elem{FIELD}, \elem{PARAM} and
optional \elem{GROUP} elements should be all different.
\subsection{\elem{VOTABLE} Element}
\label{sec:definitions}
The \elem{VOTABLE} element may contain definitions consisting of
a \elem{DESCRIPTION}, followed by any mixture of parameters and
informative notes eventually structured in {\em groups}.
These elements represent values which are meaningful over all tables
included in a \elem{VOTABLE} document --- definitions specific to
a \elem{RESOURCE} (\Aref{elem:RESOURCE})
or a \elem{TABLE} (\Aref{elem:TABLE}) are better placed
within their most appropriate element.
Note that version 1.0 of VOTable required the usage of a \elem{DEFINITIONS}
element holding the VOTable global definitions --- this
usage is deprecated since version 1.1.
\subsection{\elem{COOSYS} Element}
\label{elem:COOSYS}
The \elem{COOSYS} element defines a celestial coordinate system, to
which the components of a position on the celestial sphere refer. It has
an \attr{ID} attribute -- required if the \elem{COOSYS} element has to
be referred to via the \attr{ref} attribute of the position components,
which is generally the case --, a \attr{system} attribute which
specifies the coordinate system among \verb|"ICRS"|, \verb|"eq_FK5"|,
\verb|"eq_FK4"|, \verb|"ecl_FK4"|, \verb|"ecl_FK5"|, \verb|"galactic"|,
\verb|"supergalactic"|. The \attr{equinox} attribute fixes the
equatorial or ecliptic systems (as e.g., \verb|"J2000"| as the default
for \verb|"eq_FK5"| or \verb|"B1950"| as the default for
\verb|"eq_FK4"|), and \attr{epoch} specifies the epoch of the positions
if necessary. Note that the \elem{COOSYS} may be deprecated in the
future in favor of a more generic way of describing the conventions used
to define the positions of the objects studied in the enclosed tables.
A \elem{COOSYS} element referenced via a \attr{ref} attribute
SHOULD appear before the element that references it.
\subsection{\elem{TIMESYS} Element}
\label{elem:TIMESYS}
The \elem{TIMESYS} element (introduced in VOTable 1.4) defines metadata
for temporal coordinates. To reference the time system defined by a
\elem{TIMESYS} element, \elem{FIELD}s (and possibly \elem{PARAM}s)
MUST reference the \elem{TIMESYS} using the VOTable \attr{ref} attribute.
If a \elem{FIELD} or \elem{PARAM} represents a time-like quantity but does not
reference a \elem{TIMESYS} element, then no assertion is made about its time
system. A \elem{TIMESYS} element referenced via a \attr{ref} attribute MUST appear
before the element that references it.
\elem{TIMESYS} has the following attributes:
\begin{description}
\item[\attr{ID}] This attribute is used to reference \elem{TIMESYS}
elements from the elements using the time system.
\item[\attr{timeorigin}] This is the time origin of the time coordinate,
given as a Julian Date for the the time scale and reference point
defined. It is usually given as a floating point
literal; for convenience, the magic strings \verb|MJD-origin| (standing
for 2400000.5) and \verb|JD-origin| (standing for 0) are also allowed.
The timeorigin attribute MUST be given unless the time's representation
contains a year of a calendar era, in which case it MUST NOT be present.
In VOTables, these
representations currently are Gregorian calendar years with
\attrval{xtype}{timestamp}, or years in the Julian or Besselian calendar when a column
has \verb|yr|, \verb|a| or \verb|Ba| as its unit and no time origin is
given.
\item[\attr{timescale}] This is the time scale used. Values SHOULD be
taken from the IVOA \emph{timescale}
vocabulary
(\url{http://www.ivoa.net/rdf/timescale}).
This attribute is mandatory.
\item[\attr{refposition}] The reference position again is a simple string,
the values of which SHOULD be taken from the IVOA \emph{refposition}
vocabulary
(\url{http://www.ivoa.net/rdf/refposition}).
This attribute is mandatory.
\end{description}
The example below shows a VOTable in which each row would have an observation
time, a flux, and a magnitude. The observation time values are given in days since
Julian Date 2455197.5 (the time origin for the Gaia observatory) in the Barycentric
Coordinate Time (TCB) time scale, with the reference position being the barycenter
of the solar system.
In the example, the \elem{TIMESYS} element describes that time system. The
\elem{TIMESYS} ID value needs to be unique within the document so that it can be
referenced by \elem{FIELD}s or \elem{PARAM}s. Then the \attr{obs\_time}
\elem{FIELD} indicates that its values should be interpreted in that time system by
referring back to the \elem{TIMESYS} element using \attrval{ref}{time\_frame}.
Similarly, the \elem{COOSYS} element defines the coordinate system, and is
referred to by the \attr{ra} and \attr{dec} \elem{PARAM} elements. Note that
since the sky position is defined by \elem{PARAM}s instead of \elem{FIELD}s,
the same sky position applies to each row of the \elem{TABLE} without the values
appearing in \elem{TD} elements.
Further (non-normative) information on best practices and usage patterns
for \elem{TIMESYS} can be found in \citet{timesys}.
%\label{timesys_example}
\begingroup\small
\verbatiminput{timesys_example.vot}
\endgroup
\subsection{\elem{RESOURCE} Element}
\label{sec:resource}
\label{elem:RESOURCE}
A VOTable document contains one or more {\elem{RESOURCE}}
elements, each of these providing a description and the
data values of some logically independent data structure.
Each \elem{RESOURCE} may include the descriptive element {\elem{DESCRIPTION}},
followed by a mixture of
{\elem{INFO}}, {\elem{GROUP}} and {\elem{PARAM}} elements;
it may also contain {\elem{LINK}}
elements to provide URL-type pointers that give further information.
The main component of a \elem{RESOURCE} is typically one or more \elem{TABLE}
elements -- in other words a \elem{RESOURCE} is basically a set
of related tables. The \elem{RESOURCE} is recursive (it can contain other
\elem{RESOURCE} elements), which means that the set of tables making up
a \elem{RESOURCE} may become a tree structure.
A \elem{RESOURCE} may have one or both of the \attr{name} or \attr{ID}
attributes (see \Aref{sec:name}); it may also be qualified by
\attrval{type}{meta}, meaning that the resource is {\em descriptive}
only, i.e. does not contain any actual data: no \elem{DATA} element
should exist in any of its sub-elements. A \elem{RESOURCE} without
this attribute {\em may} however have no \elem{DATA} sub-element.
Finally, the \elem{RESOURCE} element may have a \attr{utype} attribute
to link the element to some external data model
(introduced in version 1.1, see \Aref{sec:utype}).
\subsection{\elem{LINK} Element}
\label{sec:link}
\label{elem:LINK}
The role of the {\elem{LINK}} element is to provide pointers
to external resources
through a URI. In VOTable, the {\elem{LINK}}
element may be part of a {\elem{RESOURCE}},
{\elem{TABLE}}, \elem{GROUP}, {\elem{FIELD}} or \elem{PARAM} element.
The linked URI is given by the \attr{href} attribute,
and the nature of the link is indicated by the \attr{content-role} attribute.
The URI should ideally be dereferenceable,
but this is not an absolute requirement,
and appropriate use of the URI depends on the content-role.
This document defines two values for the \attr{content-role} attribute:
\begin{itemize}
\item \attrval{content-role}{doc} indicates documentation.
Dereferencing the URI should yield a document suitable for
presentation to the user which describes the LINK's parent element.
If the URI can produce more than one type, a human-readable response
must be the default.
Appropriate behaviour for a client might be to pass the link to a browser
for presentation.
\item \attrval{content-role}{type} indicates a type-like relationship
between the URI and the LINK's parent.
The type is named by the URI string itself,
while the content retrieved by dereferencing it, if any, is secondary.
This content-role value would for instance be appropriate
to mark the LINK's href value as a SKOS concept, e.g.:
\begin{verbatim}
<LINK content-role="type"
href="http://purl.org/astronomy/vocab/PhysicalQuantities/Distance"/>
\end{verbatim}
\end{itemize}
A \attr{content-role} should be provided for all \elem{LINK} elements,
but if it is absent, a doc-like role may be assumed.
Other values of the \attr{content-role} attribute may be defined
as appropriate outside of this VOTable specification,
for instance by the Semantics Working Group or as part of other
standards that make use of VOTable.
In addition the \elem{LINK} element
may announce the MIME type of the data it references
with a \attr{content-type} attribute (e.g.\ \attrval{content-type}{image/fits}).
Although this might be overridden by metadata received during the
retrieval operation (e.g.\ the HTTP Content-Type header)
it can serve as a hint to the application about what to expect.
In the Astrores format, from which VOTable is derived,
there are additional semantics for the {\elem{LINK}}
element; the \elem{href} attribute is used as a template for creating
URLs. This behavior is explained in \Arefx{LINK},
and it represents
a possible extension of VOTable.
\subsection{\elem{TABLE} Element}
\label{elem:TABLE}
The \elem{TABLE} element represents the basic data structure in VOTable;
it comprises a description of the table structure (the {\em metadata})
essentially in the form of \elem{PARAM} and \elem{FIELD} elements
(detailed in \Aref{sec:field}),
followed by the {\em values} of the described fields in a \elem{DATA}
element (detailed in \Aref{sec:data}).
The \elem{TABLE} element is always contained in a \elem{RESOURCE} element:
in other words
any \elem{TABLE} element has a single parent made of the
\elem{RESOURCE} element
in which the table is embedded.
The \elem{TABLE} element contains
a {\elem{DESCRIPTION}} element for descriptive remarks, followed
by a mixed collection of \elem{PARAM}, \elem{FIELD} or \elem{GROUP} elements
which describe a parameter (constant column), a field (column) or a group of
columns respectively. \elem{PARAM} and \elem{FIELD} elements are detailed in
\Aref{sec:field}, and the \elem{GROUP} element
is presented in \Aref{sec:group}.
Furthermore the \elem{TABLE} element may contain {\elem{LINK}} elements
that provide URL-type pointers, exactly like the {\elem{LINK}} elements
existing within a \elem{RESOURCE} element (see \Aref{sec:link}).
The last element included in a \elem{TABLE} is the optional \elem{DATA}
element (see \Aref{sec:data}): a table without any
actual data is quite valid, and is typically used to supply a complete
description of an existing resource e.g. for query purposes.
The \elem{TABLE} element may have the naming attributes \attr{name} and/or
\attr{ID} (see \Aref{sec:name}). A \elem{TABLE}
may also have a \attr{ref} attribute referencing the ID of another
table previously described, which is interpreted as
{\em defining a table having a structure identical to the one referenced}:
this facility avoids a repetition of the definition of tables which
may be present many times in a VOTable document.
It is recommended that the \attr{ref} attribute
references an {\em empty table} (i.e. a table without a
\elem{DATA} part), which avoids any ambiguity
about the referencing.
Finally, the \elem{TABLE} element may have a \attr{utype} and \attr{ucd}
attribute to specify the table semantics, similarly to the \elem{FIELD} and
\elem{PARAM} elements (see \Aref{elem:FIELD}).
\section{\elem{FIELD}s and \elem{PARAM}eters}
\label{sec:field}
The atoms of the table structure are represented by \elem{FIELD} and
\elem{PARAM} elements, where \elem{FIELD} represents the description
of an actual table column, while \elem{PARAM} supplies a value
attached to the table, like the \attr{Telescope}
in the example of \Arefs{example1}. A \elem{PARAM} may be
viewed as a \elem{FIELD} which keeps a {\em constant value} over all
the rows of a table, and the only difference in the set of attributes
of the two elements
is the existence of a \attr{value} attribute in a \elem{PARAM}
which does not exist in a \elem{FIELD}.
The \elem{FIELD} elements describe the actual columns of the table;
the order in which the \elem{FIELD}s are declared is important,
as this order {\em must} be the same one as the order of the
columns in \Aref{sec:data}.
A {\elem{FIELD}} or \elem{PARAM} element may have several sub-elements,
including the informational {\elem{DESCRIPTION}}
and {\elem{LINK}} elements (several descriptions and titles
are possible, see \Arefx{sec:addesc});
it may also include a {\elem{VALUES}} element
that can express limits and ranges of the values that the
corresponding cell can contain, such as minimum (\elem{MIN}),
maximum (\elem{MAX}), or
enumeration of possible values (\elem{OPTION}).
\subsection{Summary of Attributes}
\label{elem:FIELD}
\label{elem:PARAM}
The valid attributes of a \elem{FIELD} or \elem{PARAM} are:
\begin{itemize}
\item The \attr{name} and/or \attr{ID}. The \attr{ID} attribute is required
if the field has to be referenced (see
\Aref{sec:name}).
It may help to include the ordinal number of
the column in the table in the value of the \attr{ID} attribute
as e.g. \attrval{ID}{col3} when a single table is involved:
the connection to the
corresponding column would become
more obvious, especially in the FITS data serialization
which uses the ordinal column number in the keywords containing
the metadata related to that column.
\item The \attr{datatype}, which expresses the nature of the data
that is described as one of the permitted primitives
(see \Tref{primitives} and their exact meaning
in \Aref{sec:datatypes}).
This attribute determines
how data are read and stored internally;
it is {\em required}.
\item The \attr{arraysize} attribute exists when
the corresponding table cell contains more than one of the specified
datatype, as explained in \Aref{sec:dim}.
Note that strings are not a primitive type,
and have to be described as an array of characters. The
arraysize attribute should be omitted unless the corresponding
table cell contents is intended to be understood as an array
(see also \Aref{sec:dim}).
\item {\fg{black}}The \attr{width} and \attr{precision} attributes define the
numerical accuracy associated with the data
(see \Aref{sec:form}).
\item The \attr{xtype} attribute, added in VOTable 1.2, specifies an
{\em extended} (or {\em external}) datatype. It is meant
to give details about the column contents beyond the
primitive \attr{datatype}, like timestamps.
\item The \attr{unit} attribute specifies the units in which
the values of the corresponding column are expressed
(see \Aref{sec:unit})
\item The \attr{ucd} attribute supplies a standardized classification
of the physical quantity expressed in the column
(see \Aref{sec:ucd}).
\item The \attr{utype} attribute, introduced in VOTable 1.1, is meant
to express the role of the column in the context of an external
data model (see \Aref{sec:utype}).
\item The \attr{ref} attribute is used to quote another element of
the document in the definition of a \elem{FIELD} or \elem{PARAM}.
It is used in the example of \Aref{example1}
to indicate the coordinate system in which the coordinates
are expressed
(reference to the \elem{COOSYS} element which specifies the
coordinate frame).
\item The \attr{type} attribute is {\em not} part of this standard,
but is reserved for future extensions (see
\Arefx{LINK},
\Arefx{query} and
\Arefx{location}).
\end{itemize}
In addition, in the \elem{PARAM} element only:
\begin{itemize}
\item the \attr{value} attribute which explicits the \elem{PARAM}eter's