takagi · jmbr · Aug 18, 2019
diff --git a/include/double2.h b/include/double2.h
@@ -0,0 +1,49 @@
+/**
+   This file is a part of cl-cuda project.
+   Copyright (c) 2019 Juan M. Bello-Rivas ([email protected])
+ */
+
+#ifndef CL_CUDA_DOUBLE2_H_
+#define CL_CUDA_DOUBLE2_H_
+
+__device__ double2 double2_add ( double2 a, double2 b )
+{
+  return make_double2 ( a.x + b.x, a.y + b.y );
+}
+
+__device__ double2 double2_sub ( double2 a, double2 b )
+{
+  return make_double2 ( a.x - b.x, a.y - b.y );
+}
+
+__device__ double2 double2_scale ( double2 a, float k )
+{
+  return make_double2 ( a.x * k, a.y * k );
+}
+
+__device__ double2 double2_scale_flipped ( float k, double2 a )
+{
+  return double2_scale ( a, k );
+}
+
+__device__ double2 double2_scale_inverted ( double2 a, float k )
+{
+  return double2_scale ( a, 1.0 / k );
+}
+
+__device__ double2 double2_negate ( double2 x )
+{
+  return make_double2 ( - x.x, - x.y );
+}
+
+__device__ double2 double2_recip ( double2 x )
+{
+  return make_double2 ( 1.0 / x.x, 1.0 / x.y );
+}
+
+__device__ float double2_dot ( double2 a, double2 b )
+{
+  return a.x * b.x + a.y * b.y;
+}
+
+#endif // CL_CUDA_DOUBLE2_H_
diff --git a/include/float2.h b/include/float2.h
@@ -0,0 +1,49 @@
+/**
+   This file is a part of cl-cuda project.
+   Copyright (c) 2019 Juan M. Bello-Rivas ([email protected])
+ */
+
+#ifndef CL_CUDA_FLOAT2_H_
+#define CL_CUDA_FLOAT2_H_
+
+__device__ float2 float2_add ( float2 a, float2 b )
+{
+  return make_float2 ( a.x + b.x, a.y + b.y );
+}
+
+__device__ float2 float2_sub ( float2 a, float2 b )
+{
+  return make_float2 ( a.x - b.x, a.y - b.y );
+}
+
+__device__ float2 float2_scale ( float2 a, float k )
+{
+  return make_float2 ( a.x * k, a.y * k );
+}
+
+__device__ float2 float2_scale_flipped ( float k, float2 a )
+{
+  return float2_scale ( a, k );
+}
+
+__device__ float2 float2_scale_inverted ( float2 a, float k )
+{
+  return float2_scale ( a, 1.0 / k );
+}
+
+__device__ float2 float2_negate ( float2 x )
+{
+  return make_float2 ( - x.x, - x.y );
+}
+
+__device__ float2 float2_recip ( float2 x )
+{
+  return make_float2 ( 1.0 / x.x, 1.0 / x.y );
+}
+
+__device__ float float2_dot ( float2 a, float2 b )
+{
+  return a.x * b.x + a.y * b.y;
+}
+
+#endif // CL_CUDA_FLOAT2_H_
diff --git a/src/api/memory.lisp b/src/api/memory.lisp
@@ -93,8 +93,10 @@
       (:float (cffi:mem-aref host-ptr :float index))
       (:double (cffi:mem-aref host-ptr :double index))
       ((:boolean :int8) (cffi:mem-aref host-ptr '(:boolean :int8) index))
+      ((:struct 'float2) (cffi:mem-aref host-ptr '(:struct float2) index))
       ((:struct 'float3) (cffi:mem-aref host-ptr '(:struct float3) index))
       ((:struct 'float4) (cffi:mem-aref host-ptr '(:struct float4) index))
+      ((:struct 'double2) (cffi:mem-aref host-ptr '(:struct double2) index))
       ((:struct 'double3) (cffi:mem-aref host-ptr '(:struct double3) index))
       ((:struct 'double4) (cffi:mem-aref host-ptr '(:struct double4) index))
       (_ (error "The value ~S is an invalid CFFI type to access host memory." cffi-type)))))
@@ -108,10 +110,14 @@
       (:double (setf (cffi:mem-aref host-ptr :double index) new-value))
       ((:boolean :int8)
        (setf (cffi:mem-aref host-ptr '(:boolean :int8) index) new-value))
+      ((:struct 'float2)
+       (setf (cffi:mem-aref host-ptr '(:struct float2) index) new-value))
       ((:struct 'float3)
        (setf (cffi:mem-aref host-ptr '(:struct float3) index) new-value))
       ((:struct 'float4)
        (setf (cffi:mem-aref host-ptr '(:struct float4) index) new-value))
+      ((:struct 'double2)
+       (setf (cffi:mem-aref host-ptr '(:struct double2) index) new-value))
       ((:struct 'double3)
        (setf (cffi:mem-aref host-ptr '(:struct double3) index) new-value))
       ((:struct 'double4)

diff --git a/src/lang/built-in.lisp b/src/lang/built-in.lisp
@@ -39,9 +39,11 @@
   '(;; arithmetic operators
     + (((int    int)    int    t   "+")
        ((float  float)  float  t   "+")
+       ((float2 float2) float2 nil "float2_add")
        ((float3 float3) float3 nil "float3_add")
        ((float4 float4) float4 nil "float4_add")
        ((double  double)  double  t   "+")
+       ((double2 double2) double2 nil "double2_add")
        ((double3 double3) double3 nil "double3_add")
        ((double4 double4) double4 nil "double4_add")
        ((int    float)    float    t   "+")
@@ -50,16 +52,20 @@
        ((double    int)    double    t   "+"))
     - (((int)           int    nil "int_negate")
        ((float)         float  nil "float_negate")
+       ((float2)        float2 nil "float2_negate")
        ((float3)        float3 nil "float3_negate")
        ((float4)        float4 nil "float4_negate")
        ((double)  double  nil "double_negate")
+       ((double2) double2 nil "double2_negate")
        ((double3) double3 nil "double3_negate")
        ((double4) double4 nil "double4_negate")
        ((int    int)    int    t   "-")
        ((float  float)  float  t   "-")
+       ((float2 float2) float2 nil "float2_sub")
        ((float3 float3) float3 nil "float3_sub")
        ((float4 float4) float4 nil "float4_sub")
        ((double  double)  double  t   "-")
+       ((double2 double2) double2 nil "double2_sub")
        ((double3 double3) double3 nil "double3_sub")
        ((double4 double4) double4 nil "double4_sub")
        ((int    float)    float    t   "-")
@@ -68,12 +74,16 @@
        ((double    int)    double    t   "-"))
     * (((int    int)    int    t   "*")
        ((float  float)  float  t   "*")
+       ((float2 float)  float2 nil "float2_scale")
        ((float3 float)  float3 nil "float3_scale")
+       ((float  float2) float2 nil "float2_scale_flipped")
        ((float  float3) float3 nil "float3_scale_flipped")
        ((float4 float)  float4 nil "float4_scale")
        ((float  float4) float4 nil "float4_scale_flipped")
        ((double  double)  double  t   "*")
+       ((double2 double)  double2 nil "double2_scale")
        ((double3 double)  double3 nil "double3_scale")
+       ((double  double2) double2 nil "double2_scale_flipped")
        ((double  double3) double3 nil "double3_scale_flipped")
        ((double4 double)  double4 nil "double4_scale")
        ((double  double4) double4 nil "double4_scale_flipped")
@@ -83,16 +93,20 @@
        ((double    int)    double    t   "*"))
     / (((int)           int    nil "int_recip")
        ((float)         float  nil "float_recip")
+       ((float2)        float2 nil "float2_recip")
        ((float3)        float3 nil "float3_recip")
        ((float4)        float4 nil "float4_recip")
        ((double)  double  nil "double_recip")
+       ((double2) double2 nil "double2_recip")
        ((double3) double3 nil "double3_recip")
        ((double4) double4 nil "double4_recip")
        ((int    int)    int    t   "/")
        ((float  float)  float  t   "/")
+       ((float2 float)  float2 nil "float2_scale_inverted")
        ((float3 float)  float3 nil "float3_scale_inverted")
        ((float4 float)  float4 nil "float4_scale_inverted")
        ((double  double)  double  t   "/")
+       ((double2 double)  double2 nil "double2_scale_inverted")
        ((double3 double)  double3 nil "double3_scale_inverted")
        ((double4 double)  double4 nil "double4_scale_inverted")
        ((int    float)    float    t   "/")
@@ -166,17 +180,21 @@
              ((double) double* nil "&")
              ((curand-state-xorwow) curand-state-xorwow* nil "&"))
     ;; built-in vector constructor
+    float2 (((float float) float2 nil "make_float2"))
     float3 (((float float float) float3 nil "make_float3"))
     float4 (((float float float float) float4 nil "make_float4"))
+    double2 (((double double) double2 nil "make_double2"))
     double3 (((double double double) double3 nil "make_double3"))
     double4 (((double double double double) double4 nil "make_double4"))
     ;; Synchronization functions
     syncthreads ((() void nil "__syncthreads"))
     ;; type casting intrinsics
     double-to-int-rn (((double) int nil "__double2int_rn"))
     ;; linear algebraic operators
-    dot (((float3 float3) float nil "float3_dot")
+    dot (((float2 float2) float nil "float2_dot")
+         ((float3 float3) float nil "float3_dot")
          ((float4 float4) float nil "float4_dot")
+         ((double2 double2) double nil "double2_dot")
          ((double3 double3) double nil "double3_dot")
          ((double4 double4) double nil "double4_dot"))
     ;; CURAND operations

diff --git a/src/lang/compiler/compile-kernel.lisp b/src/lang/compiler/compile-kernel.lisp
@@ -89,9 +89,11 @@
 (defun compile-includes ()
   "#include \"int.h\"
 #include \"float.h\"
+#include \"float2.h\"
 #include \"float3.h\"
 #include \"float4.h\"
 #include \"double.h\"
+#include \"double2.h\"
 #include \"double3.h\"
 #include \"double4.h\"
 #include \"curand.h\"

diff --git a/src/lang/data.lisp b/src/lang/data.lisp
@@ -17,6 +17,14 @@
            :cl-cuda-float-p
            ;; Double
            :cl-cuda-double-p
+           ;; Float2
+           :float2
+           :make-float2
+           :float2-x
+           :float2-y
+           :float2-p
+           :float2-=
+           :with-float2
            ;; Float3
            :float3
            :make-float3
@@ -36,6 +44,14 @@
            :float4-p
            :float4-=
            :with-float4
+           ;; Double2
+           :double2
+           :make-double2
+           :double2-x
+           :double2-y
+           :double2-p
+           :double2-=
+           :with-double2
            ;; Double3
            :double3
            :make-double3
@@ -103,6 +119,41 @@
   (typep object 'double-float))
 
 
+;;;
+;;; Float2
+;;;
+
+(defstruct (float2 (:constructor make-float2 (x y)))
+  (x 0.0 :type single-float)
+  (y 0.0 :type single-float))
+
+(defun float2-= (a b)
+  (and (= (float2-x a) (float2-x b))
+       (= (float2-y a) (float2-y b))))
+
+(cffi:defcstruct (float2 :class float2-c)
+  (x :float)
+  (y :float))
+
+(defmacro with-float2 ((x y) value &body body)
+  (once-only (value)
+    `(let ((,x (float2-x ,value))
+           (,y (float2-y ,value)))
+       (declare (ignorable ,x ,y))
+       ,@body)))
+
+(defmethod cffi:translate-into-foreign-memory ((value float2)
+                                               (type float2-c)
+                                               ptr)
+  (cffi:with-foreign-slots ((x y) ptr (:struct float2))
+    (setf x (float2-x value)
+          y (float2-y value))))
+
+(defmethod cffi:translate-from-foreign (value (type float2-c))
+  (cffi:with-foreign-slots ((x y) value (:struct float2))
+    (make-float2 x y)))
+
+
 ;;;
 ;;; Float3
 ;;;
@@ -188,6 +239,41 @@
     (make-float4 x y z w)))
 
 
+;;;
+;;; Double2
+;;;
+
+(defstruct (double2 (:constructor make-double2 (x y)))
+  (x 0.0d0 :type double-float)
+  (y 0.0d0 :type double-float))
+
+(defun double2-= (a b)
+  (and (= (double2-x a) (double2-x b))
+       (= (double2-y a) (double2-y b))))
+
+(cffi:defcstruct (double2 :class double2-c)
+  (x :double)
+  (y :double))
+
+(defmacro with-double2 ((x y) value &body body)
+  (once-only (value)
+    `(let ((,x (double2-x ,value))
+           (,y (double2-y ,value)))
+       (declare (ignorable ,x ,y))
+       ,@body)))
+
+(defmethod cffi:translate-into-foreign-memory ((value double2)
+                                               (type double2-c)
+                                               ptr)
+  (cffi:with-foreign-slots ((x y) ptr (:struct double2))
+    (setf x (double2-x value)
+          y (double2-y value))))
+
+(defmethod cffi:translate-from-foreign (value (type double2-c))
+  (cffi:with-foreign-slots ((x y) value (:struct double2))
+    (make-double2 x y)))
+
+
 ;;;
 ;;; Double3
 ;;;

diff --git a/src/lang/lang.lisp b/src/lang/lang.lisp
@@ -11,7 +11,15 @@
 
 ;; reexport symbols of data structures cl-cuda provides
 (reexport-from :cl-cuda.lang.data
-               :include '(;; Float3
+               :include '(;; Float2
+                          :float2
+                          :make-float2
+                          :float2-x
+                          :float2-y
+                          :float2-p
+                          :float2-=
+                          :with-float2
+                          ;; Float3
                           :float3
                           :make-float3
                           :float3-x
@@ -30,6 +38,14 @@
                           :float4-p
                           :float4-=
                           :with-float4
+                          ;; Double2
+                          :double2
+                          :make-double2
+                          :double2-x
+                          :double2-y
+                          :double2-p
+                          :double2-=
+                          :with-double2
                           ;; Double3
                           :double3
                           :make-double3
@@ -58,16 +74,20 @@
                           :float
                           :double
                           :curand-state-xorwow
+                          :float2
                           :float3
                           :float4
+                          :double2
                           :double3
                           :double4
                           :bool*
                           :int*
                           :float*
                           :double*
+                          :float2*
                           :float3*
                           :float4*
+                          :double2*
                           :double3*
                           :double4*
                           :curand-state-xorwow*