Skip to content

Commit 81081ea

Browse files
authored
Merge pull request #56 from AnyDSL/opencl-spirv
Initial support for OpenCL SPIR-V
2 parents e36b56f + 078dcaa commit 81081ea

File tree

7 files changed

+87
-21
lines changed

7 files changed

+87
-21
lines changed

cmake/anydsl_runtime-config.cmake.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ function(anydsl_runtime_wrap outfiles)
273273
${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/intrinsics_nvvm.impala
274274
${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/intrinsics_amdgpu.impala
275275
${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/intrinsics_opencl.impala
276+
${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/intrinsics_spirv.impala
276277
${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/intrinsics_thorin.impala
277278
${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/runtime.impala
278279
${_additional_platform_files})

platforms/artic/intrinsics_opencl.impala

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
#[import(cc = "device", name = "min")] fn opencl_min(i32, i32) -> i32;
5656
#[import(cc = "device", name = "max")] fn opencl_max(i32, i32) -> i32;
5757
#[import(cc = "device", name = "atomic_add")] fn opencl_atomic_add_global(&mut addrspace(1)i32, i32) -> i32;
58+
#[import(cc = "device", name = "atomic_add")] fn opencl_atomic_add_global_f32(&mut addrspace(1)f32, f32) -> f32;
5859
#[import(cc = "device", name = "atomic_add")] fn opencl_atomic_add_shared(&mut addrspace(3)i32, i32) -> i32;
5960
#[import(cc = "device", name = "atomic_min")] fn opencl_atomic_min_global(&mut addrspace(1)i32, i32) -> i32;
6061
#[import(cc = "device", name = "atomic_min")] fn opencl_atomic_min_shared(&mut addrspace(3)i32, i32) -> i32;
@@ -100,6 +101,43 @@ fn @opencl_accelerator(dev: i32) = Accelerator {
100101
barrier = @|| opencl_barrier(CLK_LOCAL_MEM_FENCE),
101102
};
102103

104+
fn spv_cl_get_num_groups() = *spirv_get_builtin[&mut addrspace(8) simd[u64 * 3]](24 /* BuiltInNumWorkgroups */);
105+
fn spv_cl_get_local_size() = *spirv_get_builtin[&mut addrspace(8) simd[u64 * 3]](25 /* BuiltInWorkgroupSize */);
106+
fn spv_cl_get_group_id() = *spirv_get_builtin[&mut addrspace(8) simd[u64 * 3]](26 /* BuiltInWorkgroupId */);
107+
fn spv_cl_get_local_id() = *spirv_get_builtin[&mut addrspace(8) simd[u64 * 3]](27 /* BuiltInLocalInvocationId */);
108+
fn spv_cl_get_global_id() = *spirv_get_builtin[&mut addrspace(8) simd[u64 * 3]](28 /* BuiltInGlobalInvocationId */);
109+
fn spv_cl_get_global_size() = *spirv_get_builtin[&mut addrspace(8) simd[u64 * 3]](31 /* BuiltInGlobalSize */);
110+
111+
fn @opencl_spirv_accelerator(dev: i32) = Accelerator {
112+
exec = @|body| |grid, block| {
113+
let work_item = WorkItem {
114+
tidx = @|| spv_cl_get_local_id()(0) as i32,
115+
tidy = @|| spv_cl_get_local_id()(1) as i32,
116+
tidz = @|| spv_cl_get_local_id()(2) as i32,
117+
bidx = @|| spv_cl_get_local_id()(0) as i32,
118+
bidy = @|| spv_cl_get_group_id()(1) as i32,
119+
bidz = @|| spv_cl_get_group_id()(2) as i32,
120+
gidx = @|| spv_cl_get_global_id()(0) as i32,
121+
gidy = @|| spv_cl_get_global_id()(1) as i32,
122+
gidz = @|| spv_cl_get_global_id()(2) as i32,
123+
bdimx = @|| spv_cl_get_local_size()(0) as i32,
124+
bdimy = @|| spv_cl_get_local_size()(1) as i32,
125+
bdimz = @|| spv_cl_get_local_size()(2) as i32,
126+
gdimx = @|| spv_cl_get_global_size()(0) as i32,
127+
gdimy = @|| spv_cl_get_global_size()(1) as i32,
128+
gdimz = @|| spv_cl_get_global_size()(2) as i32,
129+
nblkx = @|| spv_cl_get_num_groups()(0) as i32,
130+
nblky = @|| spv_cl_get_num_groups()(1) as i32,
131+
nblkz = @|| spv_cl_get_num_groups()(2) as i32
132+
};
133+
opencl_spirv(dev, grid, block, || @body(work_item))
134+
},
135+
sync = @|| synchronize_opencl(dev),
136+
alloc = @|size| alloc_opencl(dev, size),
137+
alloc_unified = @|size| alloc_opencl_unified(dev, size),
138+
barrier = @|| opencl_barrier(CLK_LOCAL_MEM_FENCE),
139+
};
140+
103141
static opencl_intrinsics = Intrinsics {
104142
expf = opencl_expf,
105143
exp2f = opencl_exp2f,
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
#[import(cc = "device", name = "spirv.builtin")] fn spirv_get_builtin[T](i32) -> T;

platforms/artic/intrinsics_thorin.impala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#[import(cc = "thorin")] fn cuda(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> ();
1616
#[import(cc = "thorin")] fn nvvm(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> ();
1717
#[import(cc = "thorin")] fn opencl(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> ();
18+
#[import(cc = "thorin")] fn opencl_spirv(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> ();
1819
#[import(cc = "thorin")] fn amdgpu_hsa(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> ();
1920
#[import(cc = "thorin")] fn amdgpu_pal(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> ();
2021
#[import(cc = "thorin")] fn reserve_shared[T](_size: i32) -> &mut addrspace(3)[T];

src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ if(RUNTIME_JIT)
148148
../platforms/${frontend}/intrinsics_nvvm.impala
149149
../platforms/${frontend}/intrinsics_amdgpu.impala
150150
../platforms/${frontend}/intrinsics_opencl.impala
151+
../platforms/${frontend}/intrinsics_spirv.impala
151152
../platforms/${frontend}/intrinsics_thorin.impala
152153
../platforms/${frontend}/intrinsics.impala
153154
../platforms/${frontend}/runtime.impala)

src/opencl_platform.cpp

Lines changed: 44 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -357,25 +357,32 @@ void time_kernel_callback(cl_event event, cl_int, void* data) {
357357
CHECK_OPENCL(err, "clReleaseEvent()");
358358
}
359359

360+
static inline bool ends_with(std::string_view str, std::string_view suffix) {
361+
if (str.size() < suffix.size())
362+
return false;
363+
return str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
364+
}
365+
360366
void OpenCLPlatform::launch_kernel(DeviceId dev, const LaunchParams& launch_params) {
361367
if (devices_[dev].is_intel_fpga && launch_params.num_args == 0) {
362368
debug("processing by autorun kernel");
363369
return;
364370
}
365371

366372
auto kernel = load_kernel(dev, launch_params.file_name, launch_params.kernel_name);
373+
bool is_spirv = ends_with(launch_params.file_name, ".spv");
367374

368375
// set up arguments
369-
std::vector<cl_mem> kernel_structs(launch_params.num_args);
376+
std::vector<cl_mem> kernel_structs;
370377
for (uint32_t i = 0; i < launch_params.num_args; i++) {
371-
if (launch_params.args.types[i] == KernelArgType::Struct) {
378+
if (!is_spirv && launch_params.args.types[i] == KernelArgType::Struct) {
372379
// create a buffer for each structure argument
373380
cl_int err = CL_SUCCESS;
374381
cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR;
375382
cl_mem struct_buf = clCreateBuffer(devices_[dev].ctx, flags, launch_params.args.sizes[i], launch_params.args.data[i], &err);
376383
CHECK_OPENCL(err, "clCreateBuffer()");
377-
kernel_structs[i] = struct_buf;
378-
clSetKernelArg(kernel, i, sizeof(cl_mem), &kernel_structs[i]);
384+
kernel_structs.push_back(struct_buf);
385+
clSetKernelArg(kernel, i, sizeof(cl_mem), &struct_buf);
379386
} else {
380387
#ifdef CL_VERSION_2_0
381388
if (launch_params.args.types[i] == KernelArgType::Ptr && devices_[dev].version_major == 2) {
@@ -421,11 +428,9 @@ void OpenCLPlatform::launch_kernel(DeviceId dev, const LaunchParams& launch_para
421428
dynamic_profile(dev, launch_params.file_name);
422429

423430
// release temporary buffers for struct arguments
424-
for (uint32_t i = 0; i < launch_params.num_args; i++) {
425-
if (launch_params.args.types[i] == KernelArgType::Struct) {
426-
cl_int err = clReleaseMemObject(kernel_structs[i]);
427-
CHECK_OPENCL(err, "clReleaseMemObject()");
428-
}
431+
for (auto tmp : kernel_structs) {
432+
cl_int err = clReleaseMemObject(tmp);
433+
CHECK_OPENCL(err, "clReleaseMemObject()");
429434
}
430435
}
431436

@@ -515,6 +520,21 @@ cl_program OpenCLPlatform::load_program_binary(DeviceId dev, const std::string&
515520
return program;
516521
}
517522

523+
cl_program OpenCLPlatform::load_program_il(DeviceId dev, const std::string& filename, const std::string& program_string) const {
524+
#if CL_VERSION_2_1
525+
const size_t program_length = program_string.length();
526+
const char* program_c_str = program_string.c_str();
527+
cl_int err = CL_SUCCESS;
528+
cl_program program = clCreateProgramWithIL(devices_[dev].ctx, (const void*)program_c_str, program_length, &err);
529+
CHECK_OPENCL(err, "clCreateProgramWithIL()");
530+
debug("Loading IL '%' for OpenCL device %", filename, dev);
531+
532+
return program;
533+
#else
534+
error("OpenCL 2.1 or later is required for SPIR-V support.");
535+
#endif
536+
}
537+
518538
cl_program OpenCLPlatform::load_program_source(DeviceId dev, const std::string& filename, const std::string& program_string) const {
519539
const size_t program_length = program_string.length();
520540
const char* program_c_str = program_string.c_str();
@@ -589,25 +609,28 @@ cl_kernel OpenCLPlatform::load_kernel(DeviceId dev, const std::string& filename,
589609
if (prog_it == prog_cache.end()) {
590610
opencl_dev.unlock();
591611

592-
if (canonical.extension() != ".cl")
593-
error("Incorrect extension for kernel file '%' (should be '.cl')", canonical.string());
594-
595612
// load file from disk or cache
596613
auto src_path = canonical;
597614
if (opencl_dev.is_intel_fpga)
598615
src_path.replace_extension(".aocx");
599616
std::string src_code = runtime_->load_file(src_path.string());
600617

601-
// compile src or load from cache
602-
std::string bin = opencl_dev.is_intel_fpga ? src_code : runtime_->load_from_cache(devices_[dev].platform_name + devices_[dev].device_name + src_code);
603-
if (bin.empty()) {
604-
program = load_program_source(dev, src_path.string(), src_code);
618+
if (canonical.extension() == ".spv") {
619+
program = load_program_il(dev, src_path.string(), src_code);
605620
program = compile_program(dev, program, src_path.string());
606-
runtime_->store_to_cache(devices_[dev].platform_name + devices_[dev].device_name + src_code, program_as_string(program));
607-
} else {
608-
program = load_program_binary(dev, src_path.string(), bin);
609-
program = compile_program(dev, program, src_path.string());
610-
}
621+
} else if (canonical.extension() == ".cl") {
622+
// compile src or load from cache
623+
std::string bin = opencl_dev.is_intel_fpga ? src_code : runtime_->load_from_cache(devices_[dev].platform_name + devices_[dev].device_name + src_code);
624+
if (bin.empty()) {
625+
program = load_program_source(dev, src_path.string(), src_code);
626+
program = compile_program(dev, program, src_path.string());
627+
runtime_->store_to_cache(devices_[dev].platform_name + devices_[dev].device_name + src_code, program_as_string(program));
628+
} else {
629+
program = load_program_binary(dev, src_path.string(), bin);
630+
program = compile_program(dev, program, src_path.string());
631+
}
632+
} else
633+
error("Incorrect extension for kernel file '%' (should be '.cl' or .'spv')", canonical.string());
611634

612635
opencl_dev.lock();
613636
prog_cache[canonical.string()] = program;

src/opencl_platform.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ class OpenCLPlatform : public Platform {
107107

108108
cl_kernel load_kernel(DeviceId dev, const std::string& filename, const std::string& kernelname);
109109
cl_program load_program_binary(DeviceId dev, const std::string& filename, const std::string& program_string) const;
110+
cl_program load_program_il(DeviceId dev, const std::string& filename, const std::string& program_string) const;
110111
cl_program load_program_source(DeviceId dev, const std::string& filename, const std::string& program_string) const;
111112
cl_program compile_program(DeviceId dev, cl_program program, const std::string& filename) const;
112113

0 commit comments

Comments
 (0)