-
Notifications
You must be signed in to change notification settings - Fork 0
/
stepper.h
1285 lines (1075 loc) · 45.6 KB
/
stepper.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/************************************************************************/
/* */
/* envutil - utility to convert between environment formats */
/* */
/* Copyright 2024 by Kay F. Jahnke */
/* */
/* The git repository for this software is at */
/* */
/* https://github.com/kfjahnke/envutil */
/* */
/* Please direct questions, bug reports, and contributions to */
/* */
/* [email protected] */
/* */
/* Permission is hereby granted, free of charge, to any person */
/* obtaining a copy of this software and associated documentation */
/* files (the "Software"), to deal in the Software without */
/* restriction, including without limitation the rights to use, */
/* copy, modify, merge, publish, distribute, sublicense, and/or */
/* sell copies of the Software, and to permit persons to whom the */
/* Software is furnished to do so, subject to the following */
/* conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the */
/* Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES */
/* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND */
/* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT */
/* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, */
/* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING */
/* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR */
/* OTHER DEALINGS IN THE SOFTWARE. */
/* */
/************************************************************************/
#include "zimt/zimt.h"
#if defined(ENVUTIL_STEPPER_H) == defined(HWY_TARGET_TOGGLE)
#ifdef ENVUTIL_STEPPER_H
#undef ENVUTIL_STEPPER_H
#else
#define ENVUTIL_STEPPER_H
#endif
HWY_BEFORE_NAMESPACE() ;
BEGIN_ZIMT_SIMD_NAMESPACE(project)
// This header provides a set of 'steppers': concrete implementations
// of objects which can serve as data source for zimt::process. The
// 'steppers' are quite specific: the provide 3D 'ray' coordinates
// to sample an environment so that these coordinates will pick pixels
// to populate a given 2D image in a specific projection.
// The steppers only produce the coordinates - providing pixel values
// to populate the image has to be done in a second step, with the
// 'act' functor of zimt::process.
// The steppers have the added feature of arbitrary orientation. Their
// c'tors take three unit vectors for the three axes of the stepper's
// orientation - if you want no rotation, pass (1,0,0), (0,1,0) and
// (0,0,1), and if you want rotation, rotate all three vectors with
// e.g. the same rotational quaternion. The resulting steppers will
// have the rotation 'built-in', so if you do e.g. a cut-out from
// an environment, the rotation can be used to set up the orientation
// of the 'virtual camera' for the cut-out.
// Adding this feature comes at little additional cost compared to
// what an implementation without rotation would use, but increases
// the usefulness. The alternative - using steppers without built-in
// rotation and starting the 'act' functor with a rotation - e.g. with
// a rotational quaternion - is slower, because the rotation has to be
// applied to every ray coordinate, and this takes quite a few CPU
// cycles. The built-in rotation used here can also exploit certain
// invariants and mathematical tricks which make it more efficient.
// The intended use is for extraction of partial images from
// environments, reprojection, panorama viewing and the likes.
// A concrete program using this code is 'extract.cc' in this repo.
// I have finally grasped the principle how to avoid 'rotating'
// coordinates at all: if one first calculates the 3D ray for the
// 'archetypal' unrotated 2D manifold, it's three components are
// used as factors for the three rotated vectors representing the
// re-oriented, rotated manifold. If no invariants apply, this
// needs nine multiplications and six additions, but many steppers
// have invariant parts throughout the processing of a segment.
// This hints at further possibilities: instead of using an
// orthonormal set of vectors, any set of three vecors might be
// used for all kinds of effects. The steppers take the vectors
// as arguments, so there is ample room for experiments.
// My realization is simply understanding the application of a
// rotational matrix. The key to understanding is that there 'is
// no rotation' - what's happening is that a given 3D vector is
// put into a different coordinate system with the same origin.
// This can be interpreted as the *result of a rotation*, but it
// does not mean that anything *actually does rotate*. If something
// actually does rotate, then rotational matrices and quaternions
// can be used to calculate the result of the rotation(s), but the
// essential fact is that the original vector is simply put into
// a different coordinate system. The typical modus operandi of using
// a stepper is to use a target which is 'draped' on the 'archetypal'
// 2D manifold representing it's projection. So the target is
// thought to be upright and it's center at unit distance forward;
// the target's x axis is parallel to the 'right' axis (x axis in
// lux convention) and the target's y axis is parallel to the 'down'
// axis (y axis in lux convention). Rather than thinking along the
// lines of a rotated camera, we can operate in the target image's
// coordinate system and 'pull data' from a rotated environment.
// This is the reverse transformation typically used for gemetrical
// transformations in a panorama stitching and viewing context:
// we start out with a planar discrete coordinate pertaining to
// a pixel in the target image and ask: "which source image pixels
// should affect this target pixel?" the location of the source
// pixel(s) is obtained by appropriate geometrical transformation,
// and how the source pixels in the vicinity of this location are
// combined is a matter of the chosen interpolation method. When
// stitching several images together, an additional criterion is
// the preference - or weight - which is given to what can be gleaned
// from each contributing source image in this fashion.
// Steppers also take limits for the planar coordinates, to define
// the area which will actually be sampled. The limits are given
// in model space units, which requires knowledge about the extent
// of the 2D archetype in question. An alternative would be to use
// texture coordinates, but to work with texture coordinates, we'd
// have to move to model space units for the internal caclculations,
// so 'feeding' model space units is more efficient. extract.cc
// can accept a horizontal field of view on the command line and
// calculates the extent's limits from it. This should be the most
// common way to go - the extent values produced in this way are
// symmetrical and isotropic. Here, in the implementation, we
// process the extent values, because they allow more flexibility.
// Steppers as such do not have a 'notion' of an image which they
// might be used for - beyond the limits, which can be passed to
// their c'tor. Within these limits, they will provide a uniform
// sampling over the 2D manifold, but emit rays which coincide
// with the 2D sample points. Note that the sampling is uniform
// in relation to a 'flattened-out' 2D manifold. If we're using
// a spherical_stepper, the 'archetypal form' is a spherical
// surface which can't be sampled uniformly. But the lat/lon
// representation of this spherical surface can be sampled
// uniformly, and this is what the spherical_stepper does.
// cylindrical_stepper has a cylindrical surface as it's
// archetypal form, but this can be 'rolled out' without
// deforming it, and the other singe-image steppers also have
// 'flat' archetypes because they are, conceptually, projections
// to the plane anyway. cubemap_stepper is slightly different.
// The corresponding archetype is the surface of a cube, but
// we use a 1X6 stripe representation. If the cubemap_stepper
// is set up for a minimal planar representation (section_md
// == 2 and refc_md == 1), the stripe has model space extent
// of (2,12), and the sampling of the environment will be as
// 'compact' as possible. With extra space around the cube faces
// proper, some areas near the cube's edges will be sampled
// more often (because they are 'seen' from several of the
// 'padded' cube faces. The set of rays generated from a 'tight'
// cubemap_stepper is a 'quite' even sampling of the environment:
// The distance between neighbouring rays does vary because of
// the rectilinear projection used for the cube faces, but the
// variation is nowhere near as extreme as what happens near the
// poles with a spherical_stepper, or at the 'back pole' of a
// fisheye_stepper. So to sample an entire environment I think
// a cubemap_stepper is quite a good choice, and combined with
// an interpolator which takes the variations of ray distance into
// account (e.g. twining), the results should be good. Other schemes
// to 'cover' an entire 360X180 environment as uniformly as possible
// require more calculation (e.g. triangulations) and/or do not map
// to a uniform grid. A disadvantage of the 1X6 stripe representation
// are the discontinuities between the six partial images, which
// can't be avoided: there is no way to arrange the six partial
// images without unrelated edges 'colliding'. This is the reason
// for using 'padding': if the padding is sufficiently wide (so that
// the interpolator's support remains within a single padded image)
// the discontinuity has no effect. To merely sample an environment
// with roughly equally spaced rays, a 'tight' cubemap_stepper is a
// good choice, though - this fact can be exploited e.g. for 'maps'
// of the environment which indicate properties of parts of the
// environment - often scaled-down versions of the environment used
// as masks to avoid unnecessary calculations e.g. for 'void' areas.
// With the underlying rectangular structure, such schemes can be
// used together with tiling schemes, marking which tiles will be
// used in an intended manipulation.
// we set up a base class for 'steppers' which encodes the '2D part'
// of setting up a sampling: We have a horizontal and a vertical
// range of 2D coordinates pertaining to the 2D manifold 'draped'
// in 'model space'. I refer to this 2D manifold as the 'archetypal'
// representation, and it is clearly defined for several projections:
// e.g. for spherical projection, it's a spherical surface with unit
// radius, with spherical coordinates centered on (0,0,1) - this
// is where the 'planar' comes from. The 'draping' basically fixes
// the scale: the archetypal representation has unit radius, and
// the width and height parameters, together with the 'extent' of
// the draped manifold, determine the distance between the sampling
// points on the 2D manifold taken as a plane.
// stepper_base is very similar to zimt's linspace_t, which might
// be used instead. The focus here is on precision and parameterization
// which fits the intended use - lux has the 'extent' of 2D manifolds
// readily at hand and uses 'edge-to-edge' semantics for the extent.
template < typename T , // fundamental type
std::size_t L > // lane count
struct stepper_base
{
typedef T value_t ;
static const std::size_t size = 3 ;
typedef zimt::xel_t < T , 3 > crd3_t ;
typedef zimt::simdized_type < crd3_t , L > crd3_v ;
typedef zimt::xel_t < T , 2 > crd2_t ;
typedef zimt::simdized_type < crd2_t , L > crd2_v ;
typedef zimt::simdized_type < T , L > f_v ;
typedef zimt::xel_t < int , 2 > crd_t ;
typedef zimt::simdized_type < crd_t , L > crd_v ;
typedef typename crd_v::value_type crd_ele_v ;
// delta is the increase from one (SIMDized) batch of data to
// the next, in the horizontal. This equals the increase from
// one draped pixel to the next times the SIMD lane count.
const T delta ;
// 'planar' is the current SIMDized planar coordinate. It's vertical
// component remains constant throughout a line, the horizontal
// component varies. The invariant vertical is what we aim to
// exploit with the stepper: oftentimes the output of the stepper,
// a 3D ray coordinate, can be calculated more efficiently if
// the vertical component remains constant.
crd2_v planar ;
// some helper variables, used for efficiency. The f... values
// are basically scaling factors - internally, we use doubled
// discrete coordinates, but the results are in model space
// scale and in floating point. Why doubled discrete coordinates?
// Because they can be used to represent half unit steps
// precisely, and this is useful, because we're often dealing
// with half unit steps: with edge-to-edge semantics, the extent
// is half a sampling step further out than the marginal pixels,
// and the center of even-shaped images is at a half unit as well.
const T fx0 , fx1 , fy0 , fy1 ;
// this is a SIMD constant holding values 0, 2, 4, ...
const crd_ele_v iota2 ;
// width and height give the number of sample steps over the entire
// surface covered by the image's extent. These two values define
// the stepper's - and it's generated sampling's - resolution.
// The width and height are independent of the extent and of
// each other, so anisotropic sampling is allowed and a given
// extent can be sampled with any horizontal and vertical
// resolution.
const int width , height ;
// stepper_base's standard c'tor receives the width and height,
// and the image's extent, given as the extrema first in
// horizontal, then in vertical direction. Note that we use
// edge-to-edge semantics to place the planar coordinates
// inside the extent.
stepper_base ( int _width , int _height ,
T _a0 , T _a1 , T _b0 , T _b1 )
: width ( _width ) ,
height ( _height) ,
fx1 ( _a1 / ( 2.0 * _width ) ) ,
fx0 ( _a0 / ( 2.0 * _width ) ) ,
fy1 ( _b1 / ( 2.0 * _height ) ) ,
fy0 ( _b0 / ( 2.0 * _height ) ) ,
delta ( L * ( _a1 - _a0 ) / T ( _width ) ) ,
iota2 ( 2 * crd_ele_v::iota() )
{ }
// init sets up the internal state ('planar') resulting from
// the initial discrete coordinate for the current segment.
// This is called by the 'init' member function in the
// concrete stepper implementations. Note how the incoming
// discrete coordinate is doubled in processing, and then
// mapped to model space scale by application of the scaling
// factors. Note also the way how the planar coordinate is
// calculated: by pulling both extrema into the calculation
// so that the corresponding opposite term disappears when
// the calculation 'hits the margin', we make certain that
// the resulting values are precisely in-range. This takes
// a few more cycles than the 'starting point + x * delta'
// approach, but with the latter, with large x, we might
// well end up a good way off the upper end of the range.
void init ( const crd_t & crd )
{
auto ll0 = iota2 + T ( crd[0] * 2 + 1 ) ;
planar[0] = ll0 * fx1 + ( T ( 2 * width ) - ll0 ) * fx0 ;
auto ll1 = crd[1] * 2 + 1 ;
planar[1] = ll1 * fy1 + T ( 2 * height - ll1 ) * fy0 ;
}
// increase modifies it's argument to contain the next value
// here we add 'delta' - an alternative would be to keep state
// representing the current discrete coordinate and to derive
// the current planar[0] value from it. This would be more
// precise, but since we calculate a new starting point for
// planar[0] with every segment (with the call to 'init') the
// drift should be negligible.
// increase is called by the concrete stepper implementation's
// 'increase' member function. Note that increase is 'dumb'
// and will produce values outside the extent eventually;
// the conrete stepper implementation will take care of that.
void increase()
{
planar[0] += delta ;
}
} ;
// struct spherical_stepper is an object to be used as a zimt get_t.
// It provides 3D ray coordinates for a sampling of the surface of a
// sphere with unit radius. What's special here is that the sampling
// works in an arbitrarily rotated coordinate system - or, if you
// think of the sphere as having it's own coordinate system, it works
// with an arbitrarily rotated sphere. Because the code can exploit
// certain invariants, this is more efficient than producing the
// sampling from a 'normal' sphere and subsequently rotating the
// 3D coordinates - e.g. with a rotational quaternion. The
// mathematics behind the code were gleaned from here:
// https://math.stackexchange.com/questions/73237/parametric-equation-of-a-circle-in-3d-space
// The key point is that, following a latitude line on an arbitrarily
// rotated sphere, the latitude line is still a circle which can be
// produced by a parametric function of the angle. The parameters of
// the equation are established and the fact that a zimt::get_t only
// varies it's output along the 'hot axis' while it processes a segment
// is exploited to good effect, reducing the calculatory expense to
// an invocation of sincos, nine additions and six multiplications
// for each batch of produced coordinates (counting SIMD operations;
// the vector width may result in the nominal vectors being represented
// by more than one register's worth).
// This object is intended for use with lux: lux already uses a similar
// scheme for rectilinear targets, but I did not realize that the
// method can be extended to other target projections.
template < typename T , // fundamental type
std::size_t L > // lane count
struct spherical_stepper
: public stepper_base < T , L >
{
typedef stepper_base < T , L > base_t ;
using typename base_t::value_t ;
using base_t::size ;
using typename base_t::crd_t ;
using typename base_t::crd3_t ;
using typename base_t::crd3_v ;
using typename base_t::f_v ;
using base_t::planar ;
using base_t::init ;
using base_t::increase ;
// we have three 3D vectors of unit length. They are orthogonal and
// can be produced e.g. by applying a rotational quaternion to the
// three cardinal vectors (1,0,0), (0,1,0), (0,0,1)
const crd3_t xx ;
const crd3_t yy ;
const crd3_t zz ;
// some helper variables, used for efficiency.
f_v x , r , y , z ;
crd3_v xxx , yyy , zzz ;
public:
// c'tor arguments:
// _xx, _yy and _zz are 3D directional vectors corresponding to
// (1,0,0), (0,1,0) and (0,0,1) with the rotation applied.
// If there is no rotation, we could use a simplified object,
// but this implementation is perfectly general. An alternative
// way to introduce these three vectors would be to pass in
// a rotational quaternion and rotate the three base vectors
// by it, but since this creates a dependency on the relevant
// quaternion code, I prefer to pass in the three vectors.
// The vectors are expected to be normalized.
// To put it differently: the three vectors _xx, _yy and _zz
// form the orthonormal basis for the rotated coordinate
// system in which we operate.
// _width and _height are the extent, in pixels, of the field
// of sample values which this get_t will produce - the
// 'notional' shape for zimt::process. The width and height
// don't need to result in an isotropic sampling; whichever
// the values are, the spread of sampling locations will be
// uniform in the horizontal and in the vertical. But note
// that the sampling does not start at (_a0,_b0) but half a
// sampling step 'inwards' - and it also ends half a sampling
// step inwards from (_a1,_b1): we're using 'edge-to-edge
// semantics'
// The last four arguments are the 'extent' of the 2D manifold
// which will be sampled, in model space units. The default
// values will spread the sampling locations evenly over an
// entire spherical surface.
// the 2D part of the work - producing the 'planar' coordinate
// is handled by the base type.
spherical_stepper ( crd3_t _xx , crd3_t _yy , crd3_t _zz ,
int _width ,
int _height ,
T _a0 = - M_PI ,
T _a1 = M_PI ,
T _b0 = - M_PI_2 ,
T _b1 = M_PI_2 ,
int _dy = 0 )
: xx ( _xx ) ,
yy ( _yy ) ,
zz ( _zz ) ,
base_t ( _width , _height , _a0 , _a1 , _b0 , _b1 )
{ }
// init is used to initialize the vectorized value to the value
// it should hold at the beginning of the run. The discrete
// coordinate 'crd' gives the location of the first value, and this
// function infers the start value from it.
void init ( crd3_v & trg , const crd_t & crd )
{
// the base type's 'init' initializes 'planar'
init ( crd ) ;
// these values pertain to the vertical:
sincos ( planar[1] , y , r ) ;
// these values pertain to the horizontal:
sincos ( planar[0] , x , z ) ;
// for efficiency, we form a few products now - they remain
// constant for the entire segment (so, until 'init' is called
// again by zimt::process)
xxx = xx * r ;
yyy = yy * y ;
zzz = zz * r ;
// now we can initialize the target value
trg = xxx * x + zzz * z + yyy ;
}
// 'capped' variant. This is only needed if the current segment is
// so short that no vectors can be formed at all. We fill up the
// target value with the last valid datum. The code for this
// function is the same for all steppers, but since it uses
// 'init', which is specific to a given stepper, and since I
// don't want to use a virtual init function, I repeat it
// in all steppers.
void init ( crd3_v & trg ,
const crd_t & crd ,
const std::size_t & cap )
{
init ( trg , crd ) ;
if ( cap < L )
trg.stuff ( cap ) ;
}
// increase modifies it's argument to contain the next values:
// the next 3D 'ray' coordinates in model space units, which are
// used as input to the 'act' functor invoked by zimt::process.
void increase ( crd3_v & trg )
{
// the base class increase increases planar[0]
increase() ;
// update x and z only - y and r are already fixed in the
// internal xxx, yyy and zzz variables.
sincos ( planar[0] , x , z ) ;
// finally, calculate the new target value
trg = xxx * x + zzz * z + yyy ;
}
// 'capped' variant. This is called after all vectors in the current
// segment have been processed. If we'd 'go over the lanes', we might
// only set entries below cap and rely on the higher lanes holding
// viable values from processing the last full vector, but it's
// probably more efficient to fill the target with a vector operation
// and stuff unconditionally.
void increase ( crd3_v & trg ,
const std::size_t & cap ,
const bool & _stuff = true )
{
increase ( trg ) ;
if ( cap < L )
trg.stuff ( cap ) ;
}
} ;
// cylindrical_stepper is very similar to spherical_stepper. The
// difference is the different treatment of the vertical, and that
// the x and z components aren't scaled with the radius, because
// it's always 1.0
template < typename T , // fundamental type
std::size_t L > // lane count
struct cylindrical_stepper
: public stepper_base < T , L >
{
typedef stepper_base < T , L > base_t ;
using typename base_t::value_t ;
using base_t::size ;
using typename base_t::crd_t ;
using typename base_t::crd3_t ;
using typename base_t::crd3_v ;
using typename base_t::f_v ;
using base_t::planar ;
using base_t::init ;
using base_t::increase ;
// we have three 3D vectors of unit length. The are orthogonal and
// can be produced e.g. by applying a rotational quaternion to the
// three cardinal vectors (1,0,0), (0,1,0), (0,0,1)
const crd3_t xx ;
const crd3_t yy ;
const crd3_t zz ;
// some helper variables, used for efficiency.
f_v x , r , y , z ;
crd3_v yyy ;
public:
cylindrical_stepper ( crd3_t _xx , crd3_t _yy , crd3_t _zz ,
int _width ,
int _height ,
T _a0 ,
T _a1 ,
T _b0 ,
T _b1 ,
int _dy = 0
)
: xx ( _xx ) ,
yy ( _yy ) ,
zz ( _zz ) ,
base_t ( _width , _height , _a0 , _a1 , _b0 , _b1 )
{ }
// init is used to initialize the vectorized value to the value
// it should hold at the beginning of the run. The discrete
// coordinate 'crd' gives the location of the first value, and this
// function infers the start value from it.
void init ( crd3_v & trg , const crd_t & crd )
{
// the base type's init initializes 'planar'
init ( crd ) ;
// this value pertains to the vertical:
y = planar[1] ; // constant while 'latitude' is constant
// these values pertain to the horizontal:
sincos ( planar[0] , x , z ) ;
// for efficiency, we form a few products now - they remain constant
// for the entire segment
yyy = yy * y ;
// now we can initialize the target value
trg = xx * x + zz * z + yyy ;
}
// increase modifies it's argument to contain the next value
void increase ( crd3_v & trg )
{
// the base class increase increases planar[0]
increase() ;
// update x and z
sincos ( planar[0] , x , z ) ;
// and calculate the new target value
trg = xx * x + zz * z + yyy ;
}
void init ( crd3_v & trg ,
const crd_t & crd ,
const std::size_t & cap )
{
init ( trg , crd ) ;
if ( cap < L )
trg.stuff ( cap ) ;
}
void increase ( crd3_v & trg ,
const std::size_t & cap ,
const bool & _stuff = true )
{
increase ( trg ) ;
if ( cap < L )
trg.stuff ( cap ) ;
}
} ;
// rectilinear_stepper is the simplest of the family, because
// the planar coordinate can be mapped more or less diretcly
// to the output.
template < typename T , // fundamental type
std::size_t L > // lane count
struct rectilinear_stepper
: public stepper_base < T , L >
{
typedef stepper_base < T , L > base_t ;
using typename base_t::value_t ;
using base_t::size ;
using typename base_t::crd_t ;
using typename base_t::crd3_t ;
using typename base_t::crd3_v ;
using typename base_t::f_v ;
using base_t::planar ;
using base_t::init ;
using base_t::increase ;
// we have three 3D vectors of unit length. The are orthogonal and
// can be produced e.g. by applying a rotational quaternion to the
// three cardinal vectors (1,0,0), (0,1,0), (0,0,1)
const crd3_t xx ;
const crd3_t yy ;
const crd3_t zz ;
// some helper variables, used for efficiency.
f_v x , r , y , z ;
crd3_v ddd ;
public:
rectilinear_stepper ( crd3_t _xx , crd3_t _yy , crd3_t _zz ,
int _width ,
int _height ,
T _a0 ,
T _a1 ,
T _b0 ,
T _b1 ,
int _dy = 0
)
: xx ( _xx ) ,
yy ( _yy ) ,
zz ( _zz ) ,
base_t ( _width , _height , _a0 , _a1 , _b0 , _b1 )
{ }
// init is used to initialize the vectorized value to the value
// it should hold at the beginning of the run. The discrete
// coordinate 'crd' gives the location of the first value, and this
// function infers the start value from it.
void init ( crd3_v & trg , const crd_t & crd )
{
// the base type's init initializes 'planar'
init ( crd ) ;
// for efficiency, we form 'ddd' now - it remains constant
// for the entire segment
ddd = yy * planar[1] + zz ;
// now we can initialize the target value
trg = xx * planar[0] + ddd ;
}
// increase modifies it's argument to contain the next value
void increase ( crd3_v & trg )
{
// the base class increase increases planar[0]
increase() ;
// calculate the new target value
trg = xx * planar[0] + ddd ;
}
// the capped variants:
void init ( crd3_v & trg ,
const crd_t & crd ,
const std::size_t & cap )
{
init ( trg , crd ) ;
if ( cap < L )
trg.stuff ( cap ) ;
}
void increase ( crd3_v & trg ,
const std::size_t & cap ,
const bool & _stuff = true )
{
increase ( trg ) ;
if ( cap < L )
trg.stuff ( cap ) ;
}
} ;
// fisheye_stepper is a bit more involved mathematically, and
// there are no handy invariances, because the fisheye projection
// is a radial function, whereas the traversal of the 2D manifold
// is a tensor, so there is no common mathematical ground and
// each sample point (or, rather, each batch of sample points)
// has to be calculated 'all the way' from the planar coordinate.
template < typename T , // fundamental type
std::size_t L > // lane count
struct fisheye_stepper
: public stepper_base < T , L >
{
typedef stepper_base < T , L > base_t ;
using typename base_t::value_t ;
using base_t::size ;
using typename base_t::crd_t ;
using typename base_t::crd3_t ;
using typename base_t::crd3_v ;
using typename base_t::f_v ;
using base_t::planar ;
using base_t::init ;
using base_t::increase ;
// we have three 3D vectors of unit length. The are orthogonal and
// can be produced e.g. by applying a rotational quaternion to the
// three cardinal vectors (1,0,0), (0,1,0), (0,0,1)
const crd3_t xx ;
const crd3_t yy ;
const crd3_t zz ;
// some helper variables, used for efficiency.
f_v x , r , y , z ;
crd3_v yyy ;
fisheye_stepper ( crd3_t _xx , crd3_t _yy , crd3_t _zz ,
int _width ,
int _height ,
T _a0 ,
T _a1 ,
T _b0 ,
T _b1 ,
int _dy = 0
)
: xx ( _xx ) ,
yy ( _yy ) ,
zz ( _zz ) ,
base_t ( _width , _height , _a0 , _a1 , _b0 , _b1 )
{ }
// for fisheye images, we can't use any handy invariants - each
// sample point has to be calculated 'all the way', and we use
// this function for the calculation:
void work ( crd3_v & trg )
{
// 2D distance from center, equals the angle from the central
// axis zz (the rotated 'forward' axis (0,0,1) and the same
// as the 'latitude'
auto a = M_PI_2 - norm ( planar ) ;
// angle from the horizontal, same as the 'longitude'
auto b = atan2 ( planar[0] , planar[1] ) ;
// these values pertain to the vertical:
sincos ( a , z , r ) ;
// these values pertain to the horizontal:
sincos ( b , x , y ) ;
// now we can initialize the target value
trg = xx * r * x + zz * z + yy * r * y ;
}
// init is used to initialize the vectorized value to the value
// it should hold at the beginning of the run. The discrete
// coordinate 'crd' gives the location of the first value, and this
// function infers the start value from it.
void init ( crd3_v & trg , const crd_t & crd )
{
// the base type's init initializes 'planar'
init ( crd ) ;
work ( trg ) ;
}
// increase modifies it's argument to contain the next value
void increase ( crd3_v & trg )
{
// the base class increase increases planar[0]
increase() ;
work ( trg ) ;
}
void init ( crd3_v & trg ,
const crd_t & crd ,
const std::size_t & cap )
{
init ( trg , crd ) ;
if ( cap < L )
trg.stuff ( cap ) ;
}
void increase ( crd3_v & trg ,
const std::size_t & cap ,
const bool & _stuff = true )
{
increase ( trg ) ;
if ( cap < L )
trg.stuff ( cap ) ;
}
} ;
// even more involved than fisheye_stepper, this stepper for
// stereographic targets has to deal with the peculiarities of
// the stereographic projection, resulting in a more complex
// temp for 'a', the equivalent of the latitude: with fisheye
// images, this is simply the distance-to-center of the planar,
// but for stereographic images, we need to calculate our way
// by first looking at the line from the unit sphere's pole
// to the target point, then intersect that with the unit
// sphere to get the ray.
template < typename T , // fundamental type
std::size_t L > // lane count
struct stereographic_stepper
: public stepper_base < T , L >
{
typedef stepper_base < T , L > base_t ;
using typename base_t::value_t ;
using base_t::size ;
using typename base_t::crd_t ;
using typename base_t::crd3_t ;
using typename base_t::crd3_v ;
using typename base_t::f_v ;
using base_t::planar ;
using base_t::init ;
using base_t::increase ;
// we have three 3D vectors of unit length. The are orthogonal and
// can be produced e.g. by applying a rotational quaternion to the
// three cardinal vectors (1,0,0), (0,1,0), (0,0,1)
const crd3_t xx ;
const crd3_t yy ;
const crd3_t zz ;
// some helper variables, used for efficiency.
f_v x , r , y , z ;
crd3_v yyy ;
stereographic_stepper ( crd3_t _xx , crd3_t _yy , crd3_t _zz ,
int _width ,
int _height ,
T _a0 ,
T _a1 ,
T _b0 ,
T _b1 ,
int _dy = 0
)
: xx ( _xx ) ,
yy ( _yy ) ,
zz ( _zz ) ,
base_t ( _width , _height , _a0 , _a1 , _b0 , _b1 )
{ }
// for stereographic images, we can't use any handy invariants
// - each sample point has to be calculated 'all the way', and
// we use this function for the calculation:
void work ( crd3_v & trg )
{
auto a = M_PI_2 - 2.0 * atan ( norm ( planar ) / 2.0 ) ;
// angle from the horizontal, same as the 'longitude'
auto b = atan2 ( planar[0] , planar[1] ) ;
// these values pertain to the vertical:
sincos ( a , z , r ) ;
// these values pertain to the horizontal:
sincos ( b , x , y ) ;
// now we can initialize the target value
trg = xx * r * x + zz * z + yy * r * y ;
}
// init is used to initialize the vectorized value to the value
// it should hold at the beginning of the run. The discrete
// coordinate 'crd' gives the location of the first value, and this
// function infers the start value from it.
void init ( crd3_v & trg , const crd_t & crd )
{
// the base type's init initializes 'planar'
init ( crd ) ;
work ( trg ) ;
}
// increase modifies it's argument to contain the next value
void increase ( crd3_v & trg )
{
// the base class increase increases planar[0]
increase() ;
work ( trg ) ;
}
void init ( crd3_v & trg ,
const crd_t & crd ,
const std::size_t & cap )
{
init ( trg , crd ) ;
if ( cap < L )
trg.stuff ( cap ) ;
}
void increase ( crd3_v & trg ,
const std::size_t & cap ,
const bool & _stuff = true )
{
increase ( trg ) ;
if ( cap < L )
trg.stuff ( cap ) ;
}
} ;
// with the new notion of the IR image of a cubemap, which gives
// a direct conversion between IR and ray coordinates both ways,
// we can now set up a cubemap stepper, which will 'cast rays'
// to populate a cubemap image. With 'tight' extent of (-1,-6),
// (1,6) we get a minimal cubemap with precisely 90 degrees
// fov - larger extent poduces images with larger fov.
template < typename T , // fundamental type
std::size_t L > // lane count
struct cubemap_stepper
: public stepper_base < T , L >
{
typedef stepper_base < T , L > base_t ;
using typename base_t::value_t ;
using base_t::size ;
using typename base_t::crd_t ;
using typename base_t::crd3_t ;
using typename base_t::crd3_v ;
using typename base_t::f_v ;
using base_t::planar ;
using base_t::init ;
using base_t::increase ;
using base_t::width ;
// we have three 3D vectors of unit length. The are orthogonal and
// can be produced e.g. by applying a rotational quaternion to the
// three cardinal vectors (1,0,0), (0,1,0), (0,0,1)
const crd3_t xx ;