-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathold_main_new.tex
1701 lines (1520 loc) · 89.1 KB
/
old_main_new.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
\documentclass[aoas,preprint]{imsart}
\setattribute{journal}{name}{}
\input{commands.tex}
\usepackage{amsmath}
\usepackage{pstricks,pst-grad}
\usepackage{graphicx}
\usepackage{floatrow}
\usepackage[linesnumbered,ruled,vlined]{algorithm2e}
\floatsetup[table]{capposition=top}
\usepackage{subfigure}
\usepackage[utf8]{inputenc}
\usepackage{booktabs} % horizontal lines in tables
\usepackage{comment}
% == Enable text
\usepackage{textcomp}
\usepackage{amsthm,amsmath,natbib}
\RequirePackage[colorlinks,citecolor=blue,urlcolor=blue]{hyperref}
% == Trygve Test
\usepackage{color, colortbl}
\definecolor{Gray}{gray}{0.9}
% put your definitions there:
\startlocaldefs
\newcommand{\edcomment}[1]{{\color{green}{\{Editor: #1\}}}}
\newcommand{\frevcomment}[1]{{\color{blue}{\{Rev 1: #1\}}}}
\newcommand{\srevcomment}[1]{{\color{red}{\{Rev 2: #1\}}}}
\newcommand{\trevcomment}[1]{{\color{violet}{\{Rev 3: #1\}}}}
\endlocaldefs
% New Marcos.
\usepackage{amssymb}
\usepackage{hyperref}
\usepackage{cleveref}
\usepackage{xcolor}
\input{macros}
\begin{document}
\begin{frontmatter}
% "New Title of the paper"
\title{%Autonomous Oceanographic Data Collection. Learning Excursion
% Sets of Vector-valued Gaussian Random Fields \\or\\
Learning Excursion Sets of Vector-valued Gaussian Random Fields for
Autonomous Ocean Sampling
}
\runtitle{Learning Excursion Sets for Autonomous Data Collection}
\begin{aug}
\author{\fnms{Trygve Olav} \snm{Fossum}\thanksref{t1,t2}, \corref{} \ead[label=e1]{[email protected]}}
\author{\fnms{Cédric} \snm{Travelletti}\thanksref{t3}, \corref{} \ead[label=e2]{[email protected]}}
\author{\fnms{Jo} \snm{Eidsvik}\thanksref{t4}, \ead[label=e3]{[email protected]}}
\author{\fnms{David} \snm{Ginsbourger}\thanksref{t3}, \ead[label=e4]{[email protected]}}
\and
\author{\fnms{Kanna} \snm{Rajan}\thanksref{t5}. \ead[label=e5]{[email protected]}}
\affiliation[t1]{Department of Marine Technology, The Norwegian University of Science and Technology (NTNU), Trondheim, Norway.}
\affiliation[t2]{Centre for Autonomous Marine Operations and Systems, NTNU.}
\affiliation[t3]{Institute of Mathematical Statistics and Actuarial Science, University of Bern, Switzerland.}
\affiliation[t4]{Department of Mathematical Sciences, NTNU.}
\affiliation[t5]{Underwater Systems and Technology Laboratory, Faculty of Engineering, University of Porto, Portugal.}
\address{\\Trygve Olav Fossum \\Department of Marine Technology\\ Otto Nielsens veg. 10, 7491 Trondheim\\ Norway\\
\printead{e1}}
\address{Cédric Travelletti\\ Institute of Mathematical Statistics and Actuarial Science \\ University of Bern \\
Switzerland.
\printead{e2}}
\address{Jo Eidsvik\\Department of Mathematical Sciences\\ Hogskoleringen 1, 7491 Trondheim\\ Norway\\ \printead{e3}}
\address{David Ginsbourger\\ Institute of Mathematical Statistics and Actuarial Science \\ University of Bern \\
Switzerland.
\printead{e4}}
\address{Kanna Rajan\\Underwater Systems and Technology Laboratory,
Faculty of Engineering,\\ Rua Dr. Roberto Frias\\ University of Porto, Portugal\\
\printead{e5}}
\runauthor{TO. Fossum et al.}
\end{aug}
\begin{abstract}
Improving and optimizing oceanographic sampling is a crucial task
for marine science and maritime resource management. Faced with
limited resources in understanding processes in the water-column,
the combination of statistics and autonomous systems provide new
opportunities for experimental design.
%
%In this work we develop methods for efficient spatial sampling applied to the mapping of coastal ocean processes by providing informative descriptions of spatial characteristics of ocean phenomena.
%
% REPLACED BY
%
In this work we develop efficient spatial sampling methods for
characterizing regions defined by simultaneous exceedances above
prescribed thresholds of several responses, with an application
focus on mapping coastal ocean phenomena based on temperature and
salinity measurements.
% EN REPLACEMENT
Specifically, we define a design criterion based on uncertainty in
the excursions of vector-valued Gaussian random fields, and derive
tractable expressions for the expected integrated Bernoulli variance
reduction in such a framework. We demonstrate how this criterion can
be used to prioritize sampling efforts at locations that are
ambiguous, making exploration more effective. We use simulations to
study and compare properties of the considered approaches, followed
by results from field deployments with an autonomous underwater
vehicle as part of a study mapping the boundary of a river
plume. The results demonstrate the potential of combining
statistical methods and robotic platforms to effectively inform and
execute data-driven environmental sampling.
%Motivated by the challenges related to efficient allocation of sampling resources in environmental sensing, the combination of Excursion Probabilities and Gaussian process modeling is explored for autonomous robotic sampling of ocean features; enabling information driven measures sampling efforts to high-interest regions. These regions are usually characterized by gradients of measurable environmental variables, e.g., temperature or salinity gradients, on which EPs subsequently can be used...Correlation among samples and multivariate requirements are typical in environmental studies.
\end{abstract}
\begin{keyword}
\kwd{Excursion Sets}
\kwd{Gaussian Processes}
\kwd{Experimental Design}
\kwd{Autonomous robots}
\kwd{Ocean Sampling}
\kwd{Adaptive Information Gathering}
\end{keyword}
\end{frontmatter}
\section{Introduction}
%NB: The subsections are used as a temporary instrument to highlight the proposed structure and should probably be abolished upon convergence to a stable version.
Motivated by the challenges related to efficient data collection
strategies for our vast oceans, we combine spatial statistics, design
of experiments and marine robotics in this work. The
multidisciplinary efforts enable information-driven data collection in
regions of high-interest.
\subsection{Oceanic data collection and spatial design of experiments}
Monitoring the world's oceans has gained increased importance in light
of the changing climate and increasing anthropogenic impact. Central
to understanding the changes taking place in the upper water-column is
knowledge of the bio-geophysical interaction driven by an
agglomeration of physical forcings (e.g. wind, topography, bathymetry,
tidal influences, etc.) and incipient micro-biology driven by
planktonic and coastal anthropogenic input, such as pollution and
agricultural runoff transported into the ocean by rivers. These often
result in a range of ecosystem-related phenomena such as blooms and
plumes, with direct and indirect effects on society \citep{ryan2017}. One of the
bottlenecks in the study of such phenomena lies however in the lack of
observational data with sufficient resolution. Most of this
\emph{undersampling} can be attributed to the large spatio-temporal
variations in which ocean processes transpire, prompting the need for
effective means of data collection. By \emph{sampling}, we refer here
primarily to the design of observational strategies in the spatial
domain with the aim to pursue measurements with high scientific
relevance. Models and methods from spatial statistics and experimental design can clearly contribute to this challenge.
%applications and beyond.
%By \emph{sampling}, we refer to the design of observational strategies in the spatial domain, where the use autonomous robotic platforms can be combined with statistical methods to pursue measurements with high scientific relevance. This combination, of statistical tools and robotic platforms, constitute a methodological basis for addressing the problem of efficient sampling of the ocean, which is at the core of this work.
Data collection at sea has typically been based on static buoys,
floats, or ship-based methods, with significant logistical limitations
that directly impact coverage and sampling resolution. Modern methods
using satellite remote-sensing provide large-scale coverage but have
limited resolution, are limited to sensing the surface, and are
impacted by cloud cover. Numerical ocean models similarly find it
challenging to provide detail at fine scale \citep{Lermusiaux:2006},
and also come with computational costs that can be limiting. The
advent of robust mobile robotic platforms \citep{Bellingham07} has
resulted in significant contributions to environmental monitoring and
sampling in the ocean (Fig.~\ref{fig:envir1}). In particular,
autonomous underwater vehicles (AUVs) have advanced the state of data
collection and consequently have made robotics an integral part of
ocean observation \citep{das11b,Das2015,fossuminformation,fossum18b}.
\begin{figure}[!h]
\centering
\subfigure[Illustration of a range of ocean sensing opportunities.]{\includegraphics[width =
0.49\textwidth]{Figures/envir.pdf}\label{fig:envir1}}
\hfill
\subfigure[Frontal patterns off of the Nidelva river, Trondheim, Norway.]{\includegraphics[width =
0.49\textwidth]{Figures/river_proccess.pdf}\label{fig:nidelven}}
\caption{\ref{fig:envir1} Traditional ocean observation based on
ship-based sampling has been augmented by autonomous
robotic vehicles such as AUVs. % and their interactions.
%AUV platforms are an integral part of this network being able to reason and make decisions for efficient onboard adaptive sampling.
% using the sense-plan-act control approach to autonomous control.
\ref{fig:nidelven} The interaction of river and ocean creates
processes that are challenging to map, where the combination of
statistics and robotics can play a vital role in enabling more
effective oceanographic observation.}
\label{fig:envir} \end{figure}
Surveys with AUVs are usually limited to observations along fixed
transects that are pre-scripted in mission plans created manually by a
human operator. Missions can be specified operating on a scale of
hundreds of meters to tens of kilometers depending on the scientific
context. Faced with limited coverage capacity, a more effective
approach is to instead use onboard algorithms to continuously
evaluate, update, and refine future sampling locations, making the
data collection \emph{adaptive}. In doing so, the space of sampling
opportunities is still limited by a waypoint graph, which forms a
discretization of the search domain where the AUV can navigate;
however the AUV can now modify its path at each waypoint based on
in-situ measurements and calculations onboard using onboard
deliberation \citep{py10,Rajan12,Rajan12b}. Full numerical ocean
models based on complex differential equations cannot be run onboard
the AUV with limited computational capacity, and statistical models
relying on random field assumptions are relevant as a means to
effectively update the onboard model from in-situ data, and to guide
AUV data collection trajectories.
The work presented here is primarily inspired by a case study
pertaining to using an AUV for spatial characterization of a frontal
system generated by a river plume. Fig.~\ref{fig:nidelven} shows the
survey area in Trondheim, Norway, where cold freshwater enters the fjord from a river, creating a strong gradient in both temperature and
salinity. Because of the local topography and the Coriolis force the
cold fresh water tends to flow east. Depending on the variations in
river discharge, tidal effects, coastal current and wind, this
boundary often gets distorted, and knowledge about its location is
highly uncertain, making deterministic planning challenging. The goal
is therefore to use AUV measurements for improved description of the
interface between fresh and oceanic waters. It is often not possible
to sample the biological variables of fundamental interest in such AUV
operations, but off-the-shelf instruments provide temperature and
salinity measurements which serve as proxies for the underlying
biological phenomenon. With the help of a vector-valued random field
model for temperature and salinity, one can then aim to describe the
plume. The goal of plume characterization, in this way, relates to
that of estimating some regions of the domain, typically excursion
sets (ESs), when implicitly defined by the vector-valued random field.
In our context of environmental sampling, the joint salinity and
temperature excursions of a river plume help characterize the
underlying bio-geochemical processes
\citep{hopkins2013detection,Pinto2018}. Motivating examples for ESs of
multivariate processes are also abundant in other contexts, for
instance in medicine, where physicians do not rely solely on a single
symptom but must see several combined effects before making a
diagnosis.
The questions tackled here hence pertain to the broader area of
spatial data collection for vector-valued random fields.
%Because of limited resources to sample the large oceanographic domains, one must plan for active learning during the operation, where a relevant criterion is used to extract the most valuable designs.
Given the operational constraints on AUV movements and the fact that
surveys rely on successive measurements along a trajectory, addressing
corresponding design problems calls for sequential strategies. Our
main research angle in the present work is to extend sequential design
strategies from the world of spatial statistics and computer
experiments to the setting of both vector-valued observational data
and experimental designs for feasible robotic trajectories. We
leverage and extend recent progress in expected uncertainty reduction
for ESs of Gaussian random fields (GRFs) in order to address this
research
problem. %approach will take substantial advantage of existing work in the field of excursion set estimation mainly dedicated to computer experiments in the scalar-valued case, and where space exploration can be performed without constraints regarding the distance between successive design points.
We briefly review recent advances in targeted sequential design of
experiments based on GRFs before detailing other literature related to
AUV sampling and our contributions prior to outlining the rest of the
paper.
\subsection{Random field modeling and targeted sequential design of experiments}
While random field modeling has been one of the main topics throughout the history of spatial statistics \citep{Krige1951a,Stein1999}, even
for vector-valued random field models with associated prediction
approaches such as co-Kriging \citep[See, e.g.,][]{Wackernagel2003},
there has lately been a renewed interest for random field models in
the context of static or sequential experimental design, be it in the context of spatial data collection \citep{Mueller2007} or in
simulation experiments \citep{Santner.etal2003}. As detailed in
\cite{Ginsbourger2018}, GRF models have been used in particular as a
basis to sequential design of simulations dedicated to various goals
such as global optimization and set estimation. Of particular relevance to our context,
\cite{Bect.etal2012} focuses on strategies to reduce uncertainties related to volumes of excursion exceeding a prescribed threshold, while \cite{chevalier2014fast} concentrates on making the latter strategies computationally efficient and batch-sequential. Rather than focusing on excursion volumes, approaches were investigated in
\cite{French.Sain2013,Chevalier.etal2013b,Bolin.Lindgren2015,Azzimonti.etal2016}
with ambitions of estimating sets themselves. Recently, sequential
designs of experiments for the conservative estimation of ESs based on
GRF models were presented in \citep{Azzimonti.etal}.
Surprisingly less attention has been dedicated to sequential
strategies in the case of vector-valued observations. It has been long
acknowledged that co-Kriging could be updated efficiently in the context of sequential data assimilation \citep{Vargas-Guzman1999}, but sequential
strategies for estimating features of vector-valued random fields are
still in their infancy. \cite{LeGratiet.etal2015} used co-Kriging
based sequential designs to multi-fidelity computer codes and
\cite{Poloczek2017} used related ideas for multi-information source
optimization, but not for ES's like we do here. More relevant to our
setting, \citet[p.82]{stroh} mentions general
possibilities of step-wise uncertainty reduction strategies for ES's in
the context of designing fire simulations, yet outputs are mainly
assumed independent.
% From earlier version; to be relocated?
%Hence statistical proxy models of the environment must be used. For our purpose, we rely on Gaussian Random Field (GRF) representations of the ocean variables of interest because they are computationally convenient and yet provide enough flexibility to realistically model the spatial variability and dependence, and the correlation between multiple processes.
\subsection{Previous work in AUV sampling}
Other statistical work in the oceanographic domain include
\cite{wikle2013modern} focusing on hierarchical statistical models,
\cite{sahu2008space} studying spatio-temporal models for sea surface
temperature and salinity data and \cite{mellucci2018oceanic} looking
at the statistical prediction of features using an underwater glider.
In this work the main focus is not on statistical modeling per se, but
rather on statistical principles and computation underlying efficient
data collection. We combine novel possibilities in marine robotics
with spatial statistics and experimental design to provide useful AUV
sampling designs.
Adaptive in-situ AUV sampling of an evolving frontal feature has been
explored in \cite{fronts11,Smith2016,Pinto2018,costa19}. These
approaches typically use a reactive-adaptive scheme, whereby
exploration does not rely on a statistical model of the environment,
but rather adaptation is based on closing the sensing and actuation
loop. Myopic sampling, i.e. stage-wise selection of the path (on the
waypoint graph), has been used for surveys
\citep{singh2009efficient,Binney2013} that focus largely on reducing
predictive variance or entropy. These criteria are widely adopted in
the statistics literature on spatio-temporal design as well
\citep{bueso1998state,zidek2019monitoring}. However, response variance and entropy being depending only in GRF models on measurement locations and not on response values, criteria based on them only tend to have limited flexibility for
active adaptation of trajectories based on measurement values. The use
of data-driven adaptive criteria was introduced to include more
targeted sampling of regions of scientific interest in \cite{Low2009}
and \cite{fossuminformation}.
The primary contributions of this work are:
\begin{itemize}
\item Extending uncertainty reduction criteria to
vector-valued cases.
\item Closed-form expressions for the expected integrated Bernoulli
variance (IBV) of the excursions in GRFs.
\item Algorithms for myopic and multiple-step ahead sequential
strategies for optimizing AUV sampling with respect to the mentioned
criteria.
\item Replicable experiments on synthetic cases with accompanying
code
\item Results from full-scale field trials running myopic strategies onboard an AUV for the characterization of a river plume.
\end{itemize}
The remainder of this paper is organized as follows:
Section \ref{sec:ESEP} defines ESs, excursion probabilities (EPs), and
the design criteria connected to the IBV for excursions of
vector-valued GRFs. Section \ref{sec:heuristics} builds on these
assumptions when deriving the sequential design criteria for adaptive
sampling. In both sections properties of the methods are studied using
simulations. Section \ref{sec:case_study} demonstrates the methodology
used in field work characterizing a river plume. Section
\ref{sec:concl_disc} contains a summary and a discussion of future
work.
\section{Quantifying uncertainty on Excursion Sets implicitly defined by GRFs}
\label{sec:ESEP}
Section \ref{sec:bg_and_notation} introduces notation and co-Kriging
equations of multivariate GRFs. Section \ref{sec:set_uq} presents
uncertainty quantification (UQ) techniques on ESs of GRFs, in
particular the IBV and the excursion measure variance (EMV). Section
\ref{sec:eibv} turns to the effect of new observations on EMV and IBV,
and semi-analytical expected EMV and IBV over these observations are
derived.
%The resulting formulae form the backbone of the expected sequenatil uncertainty reduction strategies presented in Section \ref{sec:heuristics}.
Section \ref{Sec:UnivarEx} illustrates the concepts on a bivariate
example relevant for temperature and salinity in our case.
\subsection{Background, Notation and co-Kriging}
\label{sec:bg_and_notation}
We denote by $\gp$ a vector-valued random field indexed by some
arbitrary domain $\domain$, and assume values of the field at any
fixed location $\x \in \domain$, denoted $\gp[\x]$, to be a
$\no$-variate random vector ($\no\geq 2$). In the river plume
characterization case, $\domain$ is a prescribed domain in
Trondheimsfjord, Norway (for the purpose of our AUV application, a
discretization of a $2$-dimensional domain at fixed depth is
considered), and $\no=2$ with responses of temperature and salinity. A
bivariate GRF model is assumed for $\gp$. To motivate concepts,
Fig.~\ref{fig:real_temp} and \ref{fig:real_sal} shows a realization of such a
vector-valued GRF on $\domain=[0,1]^2$. Fig.~\ref{fig:jointex_roi} represents a by-product of interest derived from these
realizations, namely regions: i) in red, where both temperature and salinity are high (i.e., exceeding respective thresholds), indicative of ocean water, ii) in white, where both temperature and salinity are low, indicative of riverine water, and iii) in light-red, where one variable is above and the other below their respective thresholds, indicative of mixed waters.
\begin{figure}[!b]
\centering
\subfigure[Temperature.]{\includegraphics[
height=0.27\textwidth,keepaspectratio]{Figures/intro_temp.png}\label{fig:real_temp}}
\subfigure[Salinity.]{\includegraphics[
height=0.27\textwidth,keepaspectratio]{Figures/intro_sal.png}\label{fig:real_sal}}
\subfigure[Regions of interest.]{\includegraphics[
height=0.27\textwidth,keepaspectratio]{Figures/intro_excu.png}\label{fig:jointex_roi}}
\caption{Realization of a bivariate GRF (display (a) and (b)) and excursion set above some threshold (c). Joint excursion in red and excursion of a single variable in light-red.}
\label{example_excu}
\end{figure}
%The methods we develop are fairly general, and not limited to the special case of two-dimensional random fields. Hence, the rest of this section will consider generic excursion sets of gaussian random fields with an arbitrary number of output dimensions.
% Say we have an underlying phenomenon that is modeled as a $\no$-variate gaussian random field $\gp$ on some domain $\domain$,
For the general setting of a $\no$-variate random field, we are
interested in recovering the set of locations $\es$ in the domain for
which the components of $\gp$ lie in some set of specified values
$\T\subset \mathbb{R}^{\no}$; in other words \textit{the pre-image of
$T$ by $\gp$}:
$$
\es:=\gp^{-1}(\T)=\{\x \in \mathcal{M}: \gp[\x] \in \T\}.
$$
If we assume that $\gp$ has continuous trajectories
and $T$ is closed, then $\es$ becomes a Random Closed Set
\citep{Molchanov2005} and concepts from the theory of random sets will
prove useful to study $\es$.
%under some measure defined on the domain.
%\textcolor{red}{Is this really needed?}
Note that while some aspects of the developed approaches do not call
for a specific form of $\T$, we will often, for purposes of
simplicity, stay with the case of orthants
($\T=(-\infty, t_1] \times \dots \times (-\infty, t_{\no}]$ where
$t_1,\dots, t_{\no} \in \R$) as this will allow efficient calculation
of several key quantities. Note that changing some $\leq$ inequalities
to $\geq$ ones would lead to immediate adaptations.
Letting $\gp[\spatloc,\ell]$ denote the $\ell\text{-th}$ component of
$\gp[\spatloc]$ ($1\leq \ell\leq \no$), we use the term
\textit{generalized location} for the couple $x=(\spatloc,\ell)$.
%Given such a generalized location notation $x$,
The notation $\gp[x]$ will be used to denote $\gp[\spatloc,\ell]$
and
%
%This slight change of notation
will allow us to think of $\gp$ as a scalar-valued random field indexed by $\domain \times \{1\dots,p\}$, which will give the co-Kriging equations a particularly simple form that parallels the one of univariate Kriging. %Due to the naturality of the concept of \textit{generalized location}, we will generally use the word \textit{location}, while \textit{spatial location} will be used to stress that we are talking about a point $\spatloc \in \domain$. In the following, the letter $x$ will be reserved for generalized locations, while
The letters $\spatloc$ and $\ell$ will be used for spatial locations and response indices respectively.
%
% Replacement by overlooked revised chunks from June 19th
%\begin{comment}
%Given a dataset consisting of $q$ observations (a batch) at spatial locations $\spatloc_i \in \domain$ and response indices $l_i \in \lbrace 1, ..., \no\rbrace$, $i=1, ..., q$, we use boldface letters to denote concatenated quantities;
%\begin{align*}
%\bm{x}:=(\bm{\spatloc}, \bm{\ell}):= (x_1,\dots, x_q),~\text{with }x_i=(\spatloc_i,\ell_i).
%\end{align*}
%In general, corresponding to batches of observations. %The field values of batch observations are denoted by
%\begin{align*}
%\gp[\bm{x}]:=
%\left(\gp[\spatloc_1,\ell_1], ...,
%\gp[\spatloc_q,\ell_{q}]\right) \in \mathbb{R}^{q}.
%\end{align*}
%In the same fashion, $\mu(\bm{x})$ denotes the $q$-dimensional vector corresponding to the mean at $\bm{x}$ and $k(\bm{x}, \bm{x})$ the $q \times q$ covariance matrix.
%Given a random field $\gp$ and (noisy) observations of some of its components at selected points in the domain, one can predict the value of the field at location $\spatloc\in \domain$ by using the conditional mean of $\gp[\spatloc]$ and the associated covariance . This process is called co-Kriging.% , and kriging equations precisely tell us how to compute conditional means and covariances conditional on an arbitrary dataset.
%We present a general form of cokriging, where observations at locations $s \in \domain$ may only include a subset of the components of $\gp[\spatloc]\in\mathbb{R}^{\no}$ (heterotopic).
%Assuming that one wishes
%to predict $\gp[\bm{x}]$ at $q\geq 1$ generalized locations $\bm{x}=(\bm{\spatloc}, \bm{\ell})$,
%given $n$ batched of observations $\mathbf{z}_{[n]}$ at $\sum_{i=1}^n q_i$ generalized locations $\bm{x}_{[n]}=(\bm{x}_1,\dots, \bm{x}_n)$.
%The co-Kriging mean then amounts to simple kriging with respect to a scalar-valued GRF indexed by $\domain \times \{1\dots,p\}$: % and with covariance kernel $k(\bm{x}, \bm{x}')=K(\bm{\spatloc}, \bm{\spatloc}')_{\ell, \ell'}$, that is:
%
%\begin{equation}\label{eq:cokrig_mean}
%\mu_{[n]}(\bm{x})=\mu(\bm{x})+\lambda_{[n]}(\bm{x})^T (\mathbf{z}_{[n]}-\mu(\bm{x})),
%\end{equation}
%where $\mathbf{z}_{[n]}$ stands for the ($\sum_{i=1}^n q_i$)-dimensional vector of observed responses of $Z$ at all considered generalized locations, and
%where $\lambda_{[n]}(\bm{x})=\left[ k(\bm{x}_{[n]}, \bm{x}_{[n]}) + R(\bm{x}_{[n]}, \bm{x}_{[n]}) \right]^{-1} k(\bm{x}_{[n]}, \bm{x})$ is a vector of weights and matrix $R(\bm{x}_{[n]}, \bm{x}_{[n]})$ holds the observation noise covariance at the generalized locations.
%$k(\bm{x}_{[n]}, \bm{x}_{[n]})$ being assumed non-singular throughout the presentation.
%The associated co-Kriging %(conditional)
%residual cross-covariance function can be expressed in the same vein via
%
%\begin{equation}\label{eq:cokrig_cov}
%k_{[n]}(\bm{x},\bm{x}')=k(\bm{x},\bm{x}')-\lambda_{[n]}(\bm{x})^T \left[ k(\bm{x}_{[n]}, \bm{x}_{[n]}) + R(\bm{x}_{[n]}, \bm{x}_{[n]}) \right] \lambda_{{[n]}}(\bm{x}').
%\end{equation}
%\subsubsection{co-Kriging update formulae}
%In our case it is important to update the GRF model efficiently onboard the vehicle after every batch of obervations. Our setting is like that of a spatial Kalman filter with static model parameters, and we provide the co-Kriging equations for this case. Because of our representation of co-Kriging in terms of simple kriging with respect to generalized locations, the formulae is a straightforward adaptation of the batch-sequential Kriging update formulae from \cite{Chevalier.etal2013a}. The formulae are instrumental in deriving semi-analytical results for stepwise expected uncertainty reduction criteria for vector-valued random fields in the subsequent sections.
%Given the observations from $n$ batches, one wishes to update the prediction by incorporating a new vector of observations $\mathbf{z}_{n+1}$ measured at a batch of $q_{n+1} \geq 1$ generalized locations $\bm{x}_{n+1}$. The mean update formulae is
%It turns out that the concept of \textit{generalized location} makes the kriging formulae form-invariant across all dimensions. This allows us to directly adapt the
% delivers that
%
%\begin{equation}\label{eq:meanCoK}
%\mu_{[n+1]}(\bm{x})=\mu_{[n]}(\bm{x})+\lambda_{[n+1,n+1]}(\bm{x})^T (\mathbf{z}_{n+1}-\mu(\bm{x}_{n+1})),
%\end{equation}
%where $\lambda_{[n+1,n+1]}(\bm{x})$ denotes the $q_{n+1}$-dimensional sub-vector extracted from
%$\lambda_{[n+1]}(\bm{x})$ that corresponds to the Kriging weigths associated with the last $q_{n+1}$ responses when
%predicting at $\bm{x}$ relying on all measurements until batch $(n+1)$.
%\text{th}$ batch.
%, i.e. those from the $(n+1)\text{th}$ batch of measurements conducted at $\bm{x}_{n+1}$.
%The updated co-Kriging residual cross-covariance function is
%\begin{eqnarray}\label{eq:varCoK}
%k_{[n+1]}(\bm{x},\bm{x}') &=& k_{[n]}(\bm{x},\bm{x}') -c(\bm{x},\bm{x}',\bm{x}_{[n]}), \\
%c(\bm{x},\bm{x}',\bm{x}_{[n]})&=&\lambda_{[n+1,n+1]}(\bm{x})^T \left[k_{[n]}(\bm{x}_{[n]}, \bm{x}_{[n]})+R(\bm{x}_{[n]}, \bm{x}_{[n]}) \right] \lambda_{{[n+1,n+1]}}(\bm{x}'). \nonumber
%\end{eqnarray}
%\medskip
%
%We remark that, as noted in \cite{Chevalier2015} in the case of scalar-valued fields, these update formulae naturally
%extend to Universal Kriging in second-order settings and apply without Gaussian assumption.
%\end{comment}
%
%It turns out that the inclusion of several observations at a time (a batch) may be handled by the same equations as the
%one for a single observation, provided some notation adjustments are made. This motivates the following.
Furthemore, boldface letters will be used to denote concatenated
quantities corresponding to batches of observations. Given a dataset
consisting of
$q$ observations at spatial locations
$\bm{\spatloc}=(\spatloc_1,\dots,\spatloc_q) \in
\domain^q$ and response indices $\bm{\ell}=(\ell_1,\dots, \ell_q)\in
\lbrace 1, ..., \no\rbrace^q$, %$i=1, ..., q$,
we use the concatenated notation
\begin{align*}
\bm{x}:=%(\bm{\spatloc}, \bm{\ell}):=
(x_1,\dots, x_q),~\text{with }x_i=(\spatloc_i,\ell_i).
\end{align*}
We also compactly denote the field values at those different locations by
\begin{align*}
\gp[\bm{x}]:=
\left(\gp[\spatloc_1,\ell_1], ...,
\gp[\spatloc_q,\ell_{q}]\right) \in \mathbb{R}^{q}.
\end{align*}
For a second order random field $(Z_{\spatloc})_{\spatloc \in
\domain}$ with mean $\mu$ and matrix covariance function $K$,
$\mu$ is naturally extended to $\domain \in \lbrace 1, ...,
\no\rbrace$ into a function of $x=(\spatloc,
\ell)$ and is further straightforwardly vectorized into a function of
$\bm{x}$. As for $K$, it induces a covariance kernel
$k$ on the set of extended locations via $k((\spatloc,
\ell),(\spatloc', \ell'))=K(\spatloc, \spatloc')_{\ell,
\ell'}$. In vectorized/batch form, $k(\bm{x},
\bm{x}')$ then amounts to a matrix with numbers of lines and columns equal to
the numbers of generalized locations in
$\bm{x}$ and
$\bm{x}'$, respectively. Such
vectorized quantities turn out to be useful in order to arrive at
simple expressions for the co-Kriging equations below.
%\textcolor{red}{(ADDENDUM, C.T.) In the same fashion, we will use $\mu(\bm{x})$ to denote the $q$-dimensional vector corresponding to the mean at $\bm{x}$ and $k(\bm{x}, \bm{x})$ for the corresponding $q \times q$ covariance matrix.}
Given a GRF $\gp$ and observations of some of its components at
locations in the domain, one can predict the value of the field at
some unobserved location $\spatloc\in \domain$ by using the
conditional mean of $\gp[\spatloc]$, conditional on the data. This
coincides with co-Kriging equations, which tell us precisely how to
compute conditional means and covariances. We will present a general
form of co-Kriging, in the sense that it allows inclusion of several
(batch) observations at a time; observations at a given location
$u \in \domain$ may only include a subset of the components of
$\gp[\spatloc]\in\mathbb{R}^{\no}$ (heterotopic).
Assuming that $n$ batches of observations are available with sizes
$q_1,\dots, q_n$, and that one wishes to predict $\gp[\bm{x}]$ for
some batch of $q\geq 1$ generalized locations
$\bm{x}$, %\equiv(\bm{\spatloc}, \bm{\ell})$,
the simple co-Kriging mean then amounts to Kriging with respect to a
scalar-valued GRF indexed by $\domain\times \{1\dots,p\}$:
%
\begin{equation}\label{eq:cokrig_mean}
\mu_{[n]}(\bm{x})=\mu(\bm{x})+\lambda_{[n]}(\bm{x})^T (\mathbf{z}_{[n]}-\mu(\bm{x})).
\end{equation}
Here, $\mathbf{z}_{[n]}$ stands for the ($\sum_{i=1}^n
q_i$)-dimensional vector of observed (noisy) responses of
$Z$ at all considered generalized locations, and
$\lambda_{[n]}(\bm{x})$ is a vector of weights equal to
$$\left(k(\bm{x}_{[n]}, \bm{x}_{[n]})+\Delta_{[n]} \right)^{-1} k(\bm{x}_{[n]}, \bm{x})
$$
with $\bm{x}_{[n]}=(\bm{x}_1,\dots,
\bm{x}_n)$ and where
$\Delta_{[n]}$ is the covariance matrix of Gaussian-distributed noise
assumed to have affected measurements up to batch
$n$. For our applications with salinity and temperature observations,
this matrix is diagonal because we assume conditionally independent
sensor readings, but it might not be diagonal with other types of
combined measurements. The matrix in parenthesis will be assumed to
be non-singular throughout the presentation. The associated
co-Kriging %(conditional)
residual (cross-)covariance function can also be expressed in the same
vein via
%
\begin{equation}\label{eq:cokrig_cov}
k_{[n]}(\bm{x},\bm{x}')=k(\bm{x},\bm{x}')-\lambda_{[n]}(\bm{x})^T
\left(k(\bm{x}_{[n]}, \bm{x}_{[n]})+\Delta_{[n]} \right)
\lambda_{{[n]}}(\bm{x}').
\end{equation}
%\subsubsection{co-Kriging update formulae}
Let us now consider the case where a co-Kriging prediction of $Z$ was
made with respect to $n$ batches of generalized locations, concatenated again within $\bm{x}_{[n]}=(\bm{x}_1,\dots, \bm{x}_n)$,
and one wishes to update the prediction by incorporating a new vector
of observations $\mathbf{z}_{n+1}$ measured at a batch of
$q_{n+1} \geq 1$ generalized locations $\bm{x}_{n+1}$.
%It turns out that the concept of \textit{generalized location} makes the kriging formulae form-invariant across all dimensions. This allows us to directly adapt the
Thanks to our representation of co-Kriging in terms of simple Kriging
with respect to generalized locations, a strightforward adaptation of
the batch-sequential Kriging update formulae from
\citep{Chevalier.etal2013a} suggests that
%
\begin{equation}\label{eq:meanCoK}
\mu_{[n+1]}(\bm{x})=\mu_{[n]}(\bm{x})+\lambda_{[n+1,n+1]}(\bm{x})^T (\mathbf{z}_{n+1}-\mu(\bm{x}_{n+1})),
\end{equation}
where $\lambda_{[n+1,n+1]}(\bm{x})$ denotes the $q_{n+1}$-dimensional
sub-vector extracted from $\lambda_{[n+1]}(\bm{x})$ that corresponds
to the Kriging weights for the last $q_{n+1}$ responses
when predicting at $\bm{x}$ relying on all measurements until batch
$(n+1)$.
%\text{th}$ batch.
%, i.e. those from the $(n+1)\text{th}$ batch of measurements conducted at $\bm{x}_{n+1}$.
The associated co-Kriging residual (cross-)covariance function is
\begin{align}\label{eq:varCoK}
k_{[n+1]}(\bm{x},\bm{x}') & = k_{[n]}(\bm{x},\bm{x}')\\
\nonumber - & \lambda_{[n+1,n+1]}(\bm{x})^T
\left(k_{[n]}(\bm{x}_{n+1}, \bm{x}_{n+1})+\Delta_{n+1}\right)
%k_{[n]}(\bm{x}_{[n]}, \bm{x}_{[n]})
\lambda_{{[n+1,n+1]}}(\bm{x}'),
\end{align}
%\medskip
%
As noted in \citep{Chevalier2015} in the case of scalar-valued fields,
these update formulae naturally extend to universal Kriging in
second-order settings and apply without Gaussian assumptions. We will
now see how the latter formulae are instrumental in deriving
semi-analytical formulae for step-wise uncertainty reduction criteria
for vector-valued random fields.
\subsection{Uncertainty Quantification on ESs of multivariate GRFs}
\label{sec:set_uq}
We now introduce quantities that allow UQ on the volume of the ES
$\es$. Let $\mes$ be a (locally finite, Borel) measure on
$\domain$. We want to investigate the probability distribution of
$\mes(\es)$ through its moments. Centered moments may be computed
using Proposition~\ref{propo1} developed in the appendix. In
particular, as an integral over EPs, the
$\EMV = \operatorname{Var}[\mes(\es)]$ is:
\begin{equation*}
\begin{split}
\EMV
&=\int_{\domain^2} \mathbb{P}\left(
\gp[u]\in T, \gp[v]\in T \right)
d\mes^{\otimes}(u, v)\\
&-\left( \int_{\domain} \mathbb{P}\left(\gp[u]\in T\right) d\mes(u) \right)^2,
\end{split}
\end{equation*}
which in the excursion/sojourn case where $\T=(-\infty, t_1] \times
\dots \times (-\infty, t_{\no}]$ is
\begin{equation*}
\begin{split}
\EMV
%\operatorname{Var}[\mes(\es)]
&=\int_{\domain^2}
\varPhi_{2\no}
\left(
(\bt, \bt); \mu((u,v)),
K((u,v),(u,v))
\right)
\
\mathrm{d}\mes^{\otimes} %\mes
%\productMeasure
(u,v)\\
&-\left( \int_{\domain} \varPhi_{\no}\left(\bt;\mu(u), K(u)\right) d\mes(u) \right)^2,
\end{split}
\end{equation*}
where $\varPhi_{\no}$ denotes the $\no$-variate Gaussian cumulative
distribution function (CDF) numerically \citep{genz2009computation}.
Note that this quantity requires the solution of an integral over
$\domain^2$. In contrast, the IBV of
\cite{bect2019} involves solely an integral on $\domain$ and can be
expanded as
\begin{equation*}
\begin{split}
\operatorname{IBV} %(\es) %;\mes)
&=\int_{\domain}
\mathbb{P}\left(\gp[\uu]\in T\right)(1-\mathbb{P}\left(\gp[\uu]\in T\right))
d\mes(u) \\
&=\int_{\domain}
\varPhi_{\no}\left(\bt;\mu(\uu), K(\uu)\right)
-\left(\varPhi_{\no}\left(\bt;\mu(\uu), K(\uu)\right) \right)^2
\mathrm{d}\mes(u).
\end{split}
\end{equation*}
%
\subsection{Expected IBV and EMV}
\label{sec:eibv}
We compute the expected effect of the inclusion of new observations on
the $\EMV$ and $\IBV$ of the ES $\es$. Let us consider the
same setting as in Eq. \eqref{eq:meanCoK} and \eqref{eq:varCoK}, and
let $\currentExp{.}$ and $\currentProba{.}$ denote conditional
expectation and probability conditional on the first $n$ batches of
observations, respectively. %We want to compute the effect of including a new set of observations at $\bm{x}_{n+1}$ on the $\emv$ and $\ibv$.
We use $\IBV_{\stage}$ to denote $\IBV$ with respect to the conditional law $\mathbb{P}_{\stage}$.
\medskip
In order to study the effect of the inclusion of a new data point, we
let $ \currentIBV(\bm{x}; \bm{y}) $ denote the expected IBV under the
current law of the field, conditioned on observing $\bm{y}$ at $\bm{x}$
(generalized, possibly batch observation). The expected effect of a
new observation on the IBV is then
\begin{equation}\label{def:eibv}
\currentEIBV(\bm{x}):=\currentExp{\mbox{IBV}(\bm{x}; \bm{Y})},
\end{equation}
where $\bm{Y}$ is distributed according to the current law of
$Z_{\bm{x}}$ and with independent noise having covariance matrix
$\Delta_n$.
We next present a result that allows efficient computation of $\EIBV$
as an integral of CDFs of the multivariate Gaussian distribution. This will prove
useful when designing sequential expected uncertainty reduction strategies.
\begin{propo}
\label{propo_eibv}
\begin{equation}
\begin{split}
\currentEIBV(\bm{x})
&=\int_{\domain} \varPhi_{\no}\left(\bt;~\currentMean{\uu}, \currentCov{u, u}\right) d\mes(u)\\
&-\int_{\domain} \varPhi_{2\no}
\left(
\left(
\begin{matrix}
\bt-\currentMean{u}\\
\bt-\currentMean{u}
\end{matrix}
\right);
\mathbf{\Sigma}_{[n]}(\uu)
\right)
d\mes(u),
\end{split}
\end{equation}
where the matrix $\mathbf{\Sigma}_{[n]}(\uu)$ is defined as
\begin{equation*}
\begin{split}
\mathbf{\Sigma}_{[n]}(\uu)&=
\left(
\begin{matrix}
\currentCov{u, u} & \currentCov{u, u}-\futureCov{u, u}\\
\currentCov{u, u}-\futureCov{u, u} & \currentCov{u, u}
\end{matrix}
%\begin{matrix}
%\currentCov{u, u} & \Delta_{[n]}(\uu)\\
%\Delta_{[n]}(\uu) & \currentCov{u, u}
%\end{matrix}
\right).\\
\end{split}
\end{equation*}
\end{propo}
As for the expected EMV, a similar result may be derived.
\begin{propo}
\label{propo_emv}
\begin{equation*}
\begin{split}
\currentEEMV(\bm{x})
&=\int_{\domain^2}
\varPhi_{2\no}
\left(
(\bt, \bt); ~ \mu((u,v)),
K((u,v),(u,v))
\right)
\
\mathrm{d}\mes^{\otimes} %\mes
%\productMeasure
(u,v)\\
&-\int_{\domain^2} \varPhi_{2\no}
\left(
\left(
\begin{matrix}
\bt-\currentMean{\uu}\\
\bt-\currentMean{\vv}
\end{matrix}
\right);~
\mathbf{\tilde{\Sigma}}_{[\stage]}(\uu, \vv)
\right)
\mathrm{d}\mes^{\otimes} %\mes
%\productMeasure
(u,v)
\end{split}
\end{equation*}
where the matrix $\mathbf{\tilde{\Sigma}}_{[n]}(\uu, \vv)$ is defined blockwise as
\begin{equation*}
\begin{split}
\mathbf{\tilde{\Sigma}}_{[n]}(\uu, \vv)&=
\left(
\begin{matrix}
\tilde{\Sigma}_{1,1}(\uu, \uu) & \tilde{\Sigma}_{1,2}(\uu, \vv)\\
\tilde{\Sigma}_{2,1}(\vv, \uu) & \tilde{\Sigma}_{2,2}(\vv, \vv)
\end{matrix}\right)
\end{split}
\end{equation*}
with blocks given, for $i,j\in \{1,2\}$ and $u,v\in \domain$, by
\begin{equation*}
\begin{split}
\tilde{\Sigma}_{i,j}(u, v) &= \lambda_{[n+1,n+1]}(\uu)^T k_{[n]}(\bm{x},\bm{x}) \lambda_{[n+1,n+1]}(\vv) + \delta_{i,j}\futureCov{\uu, \vv}.
\end{split}
\end{equation*}
%and the other blocks are defined by following the same logic, exchanging the role of $\uu$ and $\vv$.
\end{propo}
We remark that Propositions \ref{propo_eibv} and \ref{propo_emv} are
twofold generalizations of results from \cite{chevalier2014fast}: they
extend previous results to the multivariate setting and also allow for
the inclusion of batch or heterotopic observations through the concept
of generalized locations. A key element for understanding these
propositions is that the conditional co-Kriging mean entering in the
EPs depend linearly on (batch) observations. The conditional equality
expressions thus become linear combinations of Gaussian variables
whose mean and covariance are easily calculated. Related closed-form
solutions have been noted in similar contexts
\citep{bhattacharjya2013value,stroh}, but not generalized to our
situation with random sets for vector-valued GRFs.
\subsection{Expected Bernoulli variance for a two dimensional Example}
\label{Sec:UnivarEx}
We illustrate the expected Bernoulli variance (EBV) associated with
different designs on a bivariate example. This mimics our river plume
application and hence the first and second component of the random
field will be called \textit{temperature} and \textit{salinity}. We
begin with a \textit{pointwise} example, considering a single
bivariate Gaussian distribution (i.e. no spatial elements).
\subsubsection{A pointwise study}
Say we want to study the excursion probability of a bivariate Gaussian
above some threshold, where the thresholds are set equal to the mean;
$\mu_1=t_1=5^o C$ for temperature and $\mu_2=t_2=30$ g/kg for
salinity, and we play with the temperature and salinity correlation
and variances to study the effect on the EP and EBV.
\begin{figure}[!b]
\centering
\includegraphics[width=0.99\textwidth]{Figures/illus_bivar.eps}
\caption{Density contour plots with increasing correlations between
temperature and salinity. The densities have unit variance and
thresholds identical to the mean values $5^o C$ and
$30$ g/kg.}
\label{illus_bivarDens}
\end{figure}
Fig.~\ref{illus_bivarDens} shows contour plots of three different
densities with increasing correlation $\gamma$ between temperature and
salinity. The displayed densities have unit standard deviations for
both temperature and salinity, but we also study the effect of
doubling the standard deviations.
Table \ref{tab:sim_rhoab} shows the initial EPs and the associated
Bernoulli variance (second row) for the examples indicated in
Fig.~\ref{illus_bivarDens}. The EPs increase with the correlation as
there is a strong tendency to have jointly low or high temperature and
salinity. The Bernoulli variance is similarly larger for high
correlations. EPs and Bernoulli variances are the same for temperature
and salinity standard deviations $\sigma_1$ and $\sigma_2$, which
implies that high variability in temperature and salinity is not
captured in the $p(1-p)$ expression.
\begin{table}[!t] \centering \caption{EP and Bernoulli variance for
different correlations and variances (top rows), and EBVs for both
temperature and salinity data, and only temperature data (bottom
rows).}
\begin{tabular}{c|ccc|ccc}
&\multicolumn{3}{c}{$\sigma_1=\sigma_2=1$} & \multicolumn{3}{c}{$\sigma_1=\sigma_2=2$} \\
\hline
Correlation $\gamma$ & 0.2 & 0.6 & 0.8 & 0.2 & 0.6 & 0.8 \\
\hline
$p$ & 0.28 & 0.35 & 0.40 & 0.28 & 0.35 & 0.40 \\
$p(1-p)$ & 0.20 & 0.23 & 0.24 & 0.20 & 0.23 & 0.24 \\
EBV, Temperature and Salinity & 0.092 & 0.089 & 0.085 & 0.052 & 0.051 & 0.049 \\
EBV, Temperature only & 0.151 & 0.138 & 0.123 & 0.137 & 0.114 & 0.093 \\
\hline
\end{tabular}
\label{tab:sim_rhoab}
\end{table}
The bottom two rows of Table \ref{tab:sim_rhoab} show EBV
results. This is presented for a design gathering both data types, and
for a design with temperature measurements alone. When both data are
gathered, the measurement model is
$(Y_1,Y_2)^t=(Z_1,Z_2)^t+\bepsilon$, with
$\bepsilon \sim N(0,0.5^2I_2)$, while $Y_1=Z_1+\epsilon$,
$\epsilon \sim N(0,0.5^2)$ when only temperature is measured. For
this illustration, Table \ref{tab:sim_rhoab} shows that the expected
Bernoulli variance gets smaller with larger standard deviations. The
expected reduction of Bernoulli variance is further largest for the
cases with high correlation $\gamma$. Albeit smaller, there is also
uncertainty reduction when only temperature is measured (bottom row),
especially when temperature and salinity are highly correlated. When
correlation is low ($\gamma=0.2$), there is little information about
salinity in the temperature data, and therefore less uncertainty
reduction.
%In an application with fresh cold water from a river source, the temperature and salinity variables will not only be interdependent, but will also likely show dependence in the spatial dimension. This in turn will impact the design criteria when we
% evaluate the information measure by integrating over several locations (Section \ref{sec:simulations}).
\subsubsection{Including Spatiality}
\label{sec:including_spatiality}
We now turn to an example involving a full-fledged GRF. The statistical model we consider has a linear trend
\begin{align*}
\mu(s)=\mathbb{E}\left[\begin{pmatrix}
Z_{\spatloc, 1}\\ Z_{\spatloc, 2}
\end{pmatrix}\right] &= \beta_0 + \beta_1 \spatloc,
\end{align*}
with $\beta_0$ a two dimensional vector and $\beta_1$ a $2\times 2$ matrix. In our examples, we only consider separable covariance models;
\begin{align*}
\textrm{Cov}\left(Z_{\spatloc, i}, Z_{v, j}\right) &= k(\spatloc, v) \gamma(i, j),~ \gamma(i, j) = \begin{cases} \sigma_i^2,~ i=j\\
\gamma \sigma_i \sigma_j,~i\neq j,
\end{cases}
\end{align*}
where an isotropic Mat\'{e}rn 3/2 kernel $(1+\eta h)\exp (-\eta h)$ is
used, for Euclidean distance $h$. In the accompanying Python examples
taking place within the MESLAS
toolbox~\footnote{\url{https://github.com/CedricTravelletti/MESLAS}},
these modeling assumptions can however be relaxed to anisotropic
covariance and changing variance levels across the spatial
domain. Both extensions are relevant for the setting with river
plumes, but in practice this requires more parameters to be
specified. With extensive satellite data or prior knowledge from
high-resolution ocean models, one could also possibly fit more complex
multivariate spatial covariance functions
\citep{gneiting2010matern,genton2015cross}, but that is outside the
scope of the current work.
In the rest of this section, we consider a GRF with mean and
covariance structure as above and parameters
\begin{align*}
\beta_0 = \begin{pmatrix}
5.8\\ 24.0
\end{pmatrix}, ~ \beta_1 = \begin{pmatrix}
0.0 & -4.0\\
0.0 & -3.8
\end{pmatrix},~ \sigma_1 = 2.5,~ \sigma_2 = 2.25, ~ \gamma = 0.2,
\end{align*}
and kernel parameter $\eta=3.5$.
One realization of this GRF is shown in Fig.~\ref{example_excu}.
In the computed examples, the spatial domain $\domain$ is
discretized to a set of $N$ grid locations
$\mathcal{M}_g = \{\x_i, i=1,\ldots,N \}$, where each cell has area
$\delta$; the same grid is used for the waypoint graph for possible
design locations. The EIBV is approximated by sums over all grid
cells.
We now study how the EBV [Eq.\eqref{def:eibv}] associated with data
collection at a point changes if only one of the two components of the
field is observed. We first draw a realization of the GRF defined
above and use it as ground-truth to mimic the real data-collection
process. A first set of observations are done at the locations
depicted in grey (see Fig.~\ref{fig:ebv_comp}), and the data is used
to update the GRF model. We then consider the green triangle as a
potential next observation location and plot the EBV reduction (at
each grid node in the waypoint graph) that would result from observing
only one component of the field (temperature or salinity), or both at
that point.
\begin{figure}[!b]
\centering
\subfigure[Regions of interest.]{\includegraphics[
height=0.21\textwidth,keepaspectratio]{Figures/ebv_comp_excu.png}\label{fig:ebv_comp_excu}}
\subfigure[Temperature.]{\includegraphics[
height=0.22\textwidth,keepaspectratio]{Figures/ebv_comp_temp.png}\label{fig:ebv_comp_temp}}
\subfigure[Salinity.]{\includegraphics[
height=0.22\textwidth,keepaspectratio]{Figures/ebv_comp_sal.png}\label{fig:ebv_comp_sal}}
\subfigure[Both.]{\includegraphics[
height=0.22\textwidth,keepaspectratio]{Figures/ebv_comp_both.png}\label{fig:ebv_comp_both}}
\caption{Pointwise Bernoulli variance reduction for observation of a
single or both components of the random field at one location. Data
collection locations in green. True excursion set in red. Places
where only one response is above threshold are depicted in pink. EBV
reduction associated to observing one or both responses at the green
location are shown in \ref{fig:ebv_comp_temp},
\ref{fig:ebv_comp_sal} and \ref{fig:ebv_comp_both}.}
\label{fig:ebv_comp}
\end{figure}
Note that plotting the EBV reduction at each point might also be used
to compare different data collection plans. For example,
Fig. \ref{fig:ebv_north_vs_east} shows the EBV reduction associated
with a data collection plan along a vertical line (static north) and
one associated with a horizontal (static east). Both expectations are
computed according to the a-priori distribution of the GRF (i.e. no
observations have been included yet).
\begin{figure}[ht]
\centering
\subfigure[Excursion probability.]{\includegraphics[
height=0.25\textwidth,keepaspectratio]{Figures/ebv_static_excu.png}\label{fig:ebv_static_excu}}
\subfigure[Static north design.]{\includegraphics[
height=0.26\textwidth,keepaspectratio]{Figures/ebv_static_north.png}\label{fig:ebv_static_north}}
\subfigure[Static east design.]{\includegraphics[
height=0.26\textwidth,keepaspectratio]{Figures/ebv_static_east.png}\label{fig:ebv_static_east}}
\caption{Pointwise Bernoulli variance reduction for two different static designs (later noted as \textit{static\_north} and \textit{static\_east}). The prior EP is shown in
\ref{fig:ebv_static_excu}. EBV reduction for each design shown in
\ref{fig:ebv_static_north} and \ref{fig:ebv_static_east}.}
\label{fig:ebv_north_vs_east}
\end{figure}
\section{Sequential designs and heuristic path planning}
\label{sec:heuristics}
We present sequential data collection
strategies that aim at reducing the expected uncertainty on the target
ES $\es$.
\subsection{Background}
%As before, let $\gp$ be a $\no$-variate random field on $\domain$. See Section \ref{sec:bg_and_notation} for a recap of the specific notation used to handle mean and covariance function of multivariate random fields.