This repository is currently being migrated. It's locked while the migration is in progress.
forked from whill-labs/gpu_stereo_image_proc
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathlibSGM.patch
73 lines (71 loc) · 2.85 KB
/
libSGM.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
diff --git a/src/horizontal_path_aggregation.cu b/src/horizontal_path_aggregation.cu
index e5742fa..815db3e 100644
--- a/src/horizontal_path_aggregation.cu
+++ b/src/horizontal_path_aggregation.cu
@@ -86,7 +86,7 @@ __global__ void aggregate_horizontal_path_kernel(
for(unsigned int j = 0; j < DP_BLOCK_SIZE; ++j){
const int x = static_cast<int>(width - (min_disp + j + dp_offset));
if(0 <= x && x < static_cast<int>(width)){
- right_buffer[i][j] = __ldg(&right[i * feature_step + x]);
+ right_buffer[i][j] = ldg(&right[i * feature_step + x]);
}else{
right_buffer[i][j] = 0;
}
@@ -106,7 +106,7 @@ __global__ void aggregate_horizontal_path_kernel(
if(y >= height){
continue;
}
- const feature_type left_value = __ldg(&left[j * feature_step + x]);
+ const feature_type left_value = ldg(&left[j * feature_step + x]);
if(DIRECTION > 0){
const feature_type t = right_buffer[j][DP_BLOCK_SIZE - 1];
for(unsigned int k = DP_BLOCK_SIZE - 1; k > 0; --k){
@@ -119,7 +119,7 @@ __global__ void aggregate_horizontal_path_kernel(
#endif
if(lane_id == 0 && x >= min_disp){
right_buffer[j][0] =
- __ldg(&right[j * feature_step + x - min_disp]);
+ ldg(&right[j * feature_step + x - min_disp]);
}
}else{
const feature_type t = right_buffer[j][0];
@@ -135,7 +135,7 @@ __global__ void aggregate_horizontal_path_kernel(
if(lane_id + 1 == SUBGROUP_SIZE){
if(x >= min_disp + dp_offset + DP_BLOCK_SIZE - 1){
right_buffer[j][DP_BLOCK_SIZE - 1] =
- __ldg(&right[j * feature_step + x - (min_disp + dp_offset + DP_BLOCK_SIZE - 1)]);
+ ldg(&right[j * feature_step + x - (min_disp + dp_offset + DP_BLOCK_SIZE - 1)]);
}else{
right_buffer[j][DP_BLOCK_SIZE - 1] = 0;
}
diff --git a/src/oblique_path_aggregation.cu b/src/oblique_path_aggregation.cu
index 3405093..a4a44e0 100644
--- a/src/oblique_path_aggregation.cu
+++ b/src/oblique_path_aggregation.cu
@@ -96,7 +96,7 @@ __global__ void aggregate_oblique_path_kernel(
__syncthreads();
// Compute
if(0 <= x && x < static_cast<int>(width)){
- const feature_type left_value = __ldg(&left[x + y * width]);
+ const feature_type left_value = ldg(&left[x + y * width]);
feature_type right_values[DP_BLOCK_SIZE];
for(unsigned int j = 0; j < DP_BLOCK_SIZE; ++j){
right_values[j] = right_buffer[right0_addr_lo + j][right0_addr_hi];
diff --git a/src/path_aggregation_common.hpp b/src/path_aggregation_common.hpp
index ddf5590..533be51 100644
--- a/src/path_aggregation_common.hpp
+++ b/src/path_aggregation_common.hpp
@@ -99,6 +99,15 @@ __device__ unsigned int generate_mask()
return static_cast<unsigned int>((1ull << SIZE) - 1u);
}
+template<typename T>
+__device__ __forceinline__ T ldg(const T* ptr) {
+#if __CUDA_ARCH__ >= 350
+ return __ldg(ptr);
+#else
+ return *ptr;
+#endif
+}
+
}
}