Skip to content

Commit 7948cd1

Browse files
committed
GPU: Fix alignment in GPU memory benchmark
1 parent 07f9cfc commit 7948cd1

1 file changed

Lines changed: 2 additions & 2 deletions

File tree

GPU/GPUbenchmark/Shared/Utils.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ inline std::string getTestName(Mode mode, Test test, KernelConfig blocks)
146146
template <class chunk_t>
147147
inline chunk_t* getCustomPtr(chunk_t* scratchPtr, float startGB)
148148
{
149-
return reinterpret_cast<chunk_t*>(reinterpret_cast<char*>(scratchPtr) + static_cast<size_t>(GB * startGB));
149+
return reinterpret_cast<chunk_t*>(reinterpret_cast<char*>(scratchPtr) + (static_cast<size_t>(GB * startGB) & 0xFFFFFFFFFFFFF000));
150150
}
151151

152152
inline float computeThroughput(Test test, float result, float chunkSizeGB, int ntests)
@@ -160,7 +160,7 @@ inline float computeThroughput(Test test, float result, float chunkSizeGB, int n
160160
template <class chunk_t>
161161
inline size_t getBufferCapacity(float chunkReservedGB)
162162
{
163-
return static_cast<size_t>((GB * chunkReservedGB) / sizeof(chunk_t));
163+
return (static_cast<size_t>(GB * chunkReservedGB) & 0xFFFFFFFFFFFFF000) / sizeof(chunk_t);
164164
}
165165

166166
// LCG: https://rosettacode.org/wiki/Linear_congruential_generator

0 commit comments

Comments
 (0)