forked from Shinmera/3d-vectors
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathssa.lisp
131 lines (111 loc) · 4.36 KB
/
ssa.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#|
This file is a part of 3d-vectors
(c) 2015 Shirakumo http://tymoon.eu ([email protected])
Author: Nicolas Hafner <[email protected]>
|#
;;; IR1
(in-package #:org.shirakumo.flare.vector)
(sb-c:defknown %rsqrtss ((sb-ext:simd-pack single-float)) (sb-ext:simd-pack single-float)
(sb-c:foldable sb-c:flushable sb-c:movable)
:overwrite-fndb-silently T)
(sb-c:defknown %rsqrtps ((sb-ext:simd-pack single-float)) (sb-ext:simd-pack single-float)
(sb-c:foldable sb-c:flushable sb-c:movable)
:overwrite-fndb-silently T)
(sb-c:defknown %mulps ((sb-ext:simd-pack single-float)
(sb-ext:simd-pack single-float)) (sb-ext:simd-pack single-float)
(sb-c:foldable sb-c:flushable sb-c:movable sb-c:always-translatable)
:overwrite-fndb-silently T)
(sb-c:defknown %haddps ((sb-ext:simd-pack single-float)
(sb-ext:simd-pack single-float)) (sb-ext:simd-pack single-float)
(sb-c:foldable sb-c:flushable sb-c:movable sb-c:always-translatable)
:overwrite-fndb-silently T)
;;; IR2
(in-package #:sb-vm)
(define-vop (3d-vectors::%rsqrtss)
(:policy :fast-safe)
(:translate 3d-vectors::%rsqrtss)
(:args (x :scs (single-sse-reg) :target r))
(:arg-types simd-pack-single)
(:results (r :scs (single-sse-reg)))
(:result-types simd-pack-single)
(:generator 3
(inst rsqrtss r x)))
(define-vop (3d-vectors::%rsqrtps)
(:policy :fast-safe)
(:translate 3d-vectors::%rsqrtps)
(:args (x :scs (single-sse-reg) :target r))
(:arg-types simd-pack-single)
(:results (r :scs (single-sse-reg)))
(:result-types simd-pack-single)
(:generator 3
(inst rsqrtps r x)))
(define-vop (3d-vectors::%mulps)
(:policy :fast-safe)
(:translate 3d-vectors::%mulps)
(:args (x :scs (single-sse-reg) :target r)
(y :scs (single-sse-reg)))
(:arg-types simd-pack-single simd-pack-single)
(:results (r :scs (single-sse-reg)))
(:result-types simd-pack-single)
(:generator 4
(cond ((location= r y)
(inst mulps y x))
(T
(move r x)
(inst mulps r y)))))
(define-vop (3d-vectors::%haddps)
(:policy :fast-safe)
(:translate 3d-vectors::%haddps)
(:args (x :scs (single-sse-reg) :target r)
(y :scs (single-sse-reg)))
(:arg-types simd-pack-single simd-pack-single)
(:results (r :scs (single-sse-reg)))
(:result-types simd-pack-single)
(:generator 4
(cond ((location= r y)
(inst haddps y x))
(T
(move r x)
(inst haddps r y)))))
;;; High-level
(in-package #:org.shirakumo.flare.vector)
(macrolet ((stubdef (name args) `(defun ,name ,args (,name ,@args))))
(stubdef %rsqrtss (x))
(stubdef %rsqrtps (x))
(stubdef %mulps (x y))
(stubdef %haddps (x y)))
(declaim (inline rsqrtss))
(declaim (ftype (function (single-float) single-float) rsqrtss))
(defun isqrt~ (x)
(nth-value 0 (sb-ext:%simd-pack-singles
(%rsqrtss (sb-ext:%make-simd-pack-single x 0.0s0 0.0s0 0.0s0)))))
(declaim (inline f4*))
(declaim (ftype (function (single-float single-float single-float single-float single-float single-float single-float single-float)
(values single-float single-float single-float single-float)) f4*))
(defun f4* (a b c d x y z w)
(sb-ext:%simd-pack-singles (%mulps (sb-ext:%make-simd-pack-single a b c d)
(sb-ext:%make-simd-pack-single x y z w))))
(defun f4/+ (a b c d)
(let* ((pack (sb-ext:%make-simd-pack-single a b c d))
(pack (%haddps pack pack))
(pack (%haddps pack pack)))
(nth-value 0 (sb-ext:%simd-pack-singles pack))))
(defun simd-vunit (x y z w)
(let* ((pk (sb-ext:%make-simd-pack-single x y z w))
(sq (%mulps pk pk))
(su (%haddps sq sq))
(su (%haddps su su)))
(%mulps pk (%rsqrtps su))))
(define-ofun vunit~ (a)
(etypecase a
(vec2 (let* ((x (vx2 a)) (y (vy2 a))
(m (- 1 (/ (sqrt 2))))
(r (/ (max x y)))
(r (* r (- (1+ m) (* r m (+ x y))))))
(vec2 (* r x) (* r y))))
(vec4 (multiple-value-bind (x y z w) (simd-vunit (vx4 a) (vy4 a) (vz4 a) (vw4 a))
(vec4 x y z w)))))
(define-ofun nvunit~ (a)
(etypecase a
(vec4 (multiple-value-bind (x y z w) (simd-vunit (vx4 a) (vy4 a) (vz4 a) (vw4 a))
(%vsetf a x y z w)))))