So, I was trying with this MLIR code:
module {
func @matmul_linalg(%A: memref<8x8xf32>, %B: memref<8x8xf32>, %C: memref<8x8xf32>) {
linalg.matmul ins(%A, %B : memref<8x8xf32>, memref<8x8xf32>)
outs(%C: memref<8x8xf32>)
return
}
func @main() {
%A = memref.alloc() : memref<8x8xf32>
%B = memref.alloc() : memref<8x8xf32>
%C = memref.alloc() : memref<8x8xf32>
%cf1 = constant 1.0 : f32
linalg.fill(%A, %cf1) : memref<8x8xf32>, f32
linalg.fill(%B, %cf1) : memref<8x8xf32>, f32
linalg.fill(%C, %cf1) : memref<8x8xf32>, f32
call @matmul_linalg(%A, %B, %C) : (memref<8x8xf32>, memref<8x8xf32>, memref<8x8xf32>) -> ()
return
}
}
and this is my pass:
mlir-opt matmul-gpu-02.mlir.in \
--linalg-tile-to-parallel-loops="linalg-tile-sizes=4,2" \
--convert-linalg-to-parallel-loops \
--test-gpu-greedy-parallel-loop-mapping \
--convert-parallel-loops-to-gpu \
--gpu-kernel-outlining \
--lower-affine \
--convert-scf-to-std \
--canonicalize \
--pass-pipeline="gpu.module(strip-debuginfo, convert-gpu-to-nvvm, gpu-to-cubin)" \
--gpu-to-llvm 2>&1 >matmul-gpu-02.mlir.out
and this is how I’m generating the object:
mlir-translate matmul-gpu-02.mlir.out --mlir-to-llvmir | opt -O3 -S | llc -O3 | as - -o matmul-gpu-02.mlir.o
I didn’t get any complain up to this point, but when I was trying to generate the executable –
clang++-11 matmul-gpu-02.mlir.o -lcuda \
$HOME/opt/llvm/lib/libmlir_cuda_runtime.so \
$HOME/opt/llvm/lib/libmlir_runner_utils.so \
-o matmul-gpu-02
I get these errors –
'cuStreamSynchronize(stream)' failed with 'CUDA_ERROR_ILLEGAL_ADDRESS'
'cuStreamDestroy(stream)' failed with 'CUDA_ERROR_ILLEGAL_ADDRESS'
'cuModuleUnload(module)' failed with 'CUDA_ERROR_ILLEGAL_ADDRESS'
'cuModuleLoadData(&module, data)' failed with 'CUDA_ERROR_ILLEGAL_ADDRESS'
'cuModuleGetFunction(&function, module, name)' failed with 'CUDA_ERROR_INVALID_HANDLE'
'cuStreamCreate(&stream, CU_STREAM_NON_BLOCKING)' failed with 'CUDA_ERROR_ILLEGAL_ADDRESS'
'cuLaunchKernel(function, gridX, gridY, gridZ, blockX, blockY, blockZ, smem, stream, params, extra)' failed with 'CUDA_ERROR_INVALID_HANDLE'
'cuStreamSynchronize(stream)' failed with 'CUDA_ERROR_INVALID_HANDLE'
'cuStreamDestroy(stream)' failed with 'CUDA_ERROR_INVALID_HANDLE'
'cuModuleUnload(module)' failed with 'CUDA_ERROR_INVALID_HANDLE'
'cuModuleLoadData(&module, data)' failed with 'CUDA_ERROR_ILLEGAL_ADDRESS'
'cuModuleGetFunction(&function, module, name)' failed with 'CUDA_ERROR_INVALID_HANDLE'
'cuStreamCreate(&stream, CU_STREAM_NON_BLOCKING)' failed with 'CUDA_ERROR_ILLEGAL_ADDRESS'
'cuLaunchKernel(function, gridX, gridY, gridZ, blockX, blockY, blockZ, smem, stream, params, extra)' failed with 'CUDA_ERROR_INVALID_HANDLE'
'cuStreamSynchronize(stream)' failed with 'CUDA_ERROR_INVALID_HANDLE'
'cuStreamDestroy(stream)' failed with 'CUDA_ERROR_INVALID_HANDLE'
'cuModuleUnload(module)' failed with 'CUDA_ERROR_INVALID_HANDLE'
'cuModuleLoadData(&module, data)' failed with 'CUDA_ERROR_ILLEGAL_ADDRESS'
'cuModuleGetFunction(&function, module, name)' failed with 'CUDA_ERROR_INVALID_HANDLE'
'cuStreamCreate(&stream, CU_STREAM_NON_BLOCKING)' failed with 'CUDA_ERROR_ILLEGAL_ADDRESS'
'cuLaunchKernel(function, gridX, gridY, gridZ, blockX, blockY, blockZ, smem, stream, params, extra)' failed with 'CUDA_ERROR_INVALID_HANDLE'
'cuStreamSynchronize(stream)' failed with 'CUDA_ERROR_INVALID_HANDLE'
'cuStreamDestroy(stream)' failed with 'CUDA_ERROR_INVALID_HANDLE'
'cuModuleUnload(module)' failed with 'CUDA_ERROR_INVALID_HANDLE'
Any idea?