ryankert01 commented on code in PR #1108:
URL: https://github.com/apache/mahout/pull/1108#discussion_r2875419501


##########
qdp/qdp-kernels/src/amplitude.cu:
##########
@@ -432,20 +435,29 @@ __global__ void l2_norm_batch_kernel(
 
     const size_t vec_idx = block_in_sample * blockDim.x + threadIdx.x;
     const size_t stride = blockDim.x * blocks_per_sample;
+    const double* sample_input = input_batch + base;
+    const bool sample_input_aligned =
+        (reinterpret_cast<uintptr_t>(sample_input) % alignof(double2)) == 0;
 
     double local_sum = 0.0;
 
     size_t vec_offset = vec_idx;
     size_t offset = vec_offset * 2;
     while (offset + 1 < sample_len) {
-        const double2 v = __ldg(reinterpret_cast<const double2*>(input_batch + 
base) + vec_offset);
-        local_sum += v.x * v.x + v.y * v.y;
+        if (sample_input_aligned) {
+            const double2 v = __ldg(reinterpret_cast<const 
double2*>(sample_input) + vec_offset);
+            local_sum += v.x * v.x + v.y * v.y;
+        } else {
+            const double v1 = __ldg(sample_input + offset);
+            const double v2 = __ldg(sample_input + offset + 1);
+            local_sum += v1 * v1 + v2 * v2;
+        }

Review Comment:
   trival



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to