Module: Mesa Branch: main Commit: f745a07b36cc4c8fae90d6586fda543957867285 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f745a07b36cc4c8fae90d6586fda543957867285
Author: Karol Herbst <[email protected]> Date: Wed Apr 26 12:39:39 2023 +0200 rusticl/program: rework dynamic Program state We had a lot of implicit locks going on even though there was strictly no need in doing so. This makes the compilation APIs more atomic while also providing a cleaner interface. Not in the mood of splitting it up without deadlocking in the middle. So it's one big commit sadly. Signed-off-by: Karol Herbst <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22434> --- src/gallium/frontends/rusticl/api/kernel.rs | 30 +- src/gallium/frontends/rusticl/api/program.rs | 5 - src/gallium/frontends/rusticl/core/kernel.rs | 16 +- src/gallium/frontends/rusticl/core/program.rs | 400 +++++++++++++------------- 4 files changed, 215 insertions(+), 236 deletions(-) diff --git a/src/gallium/frontends/rusticl/api/kernel.rs b/src/gallium/frontends/rusticl/api/kernel.rs index 8a7e18067e9..30381c38098 100644 --- a/src/gallium/frontends/rusticl/api/kernel.rs +++ b/src/gallium/frontends/rusticl/api/kernel.rs @@ -1,16 +1,13 @@ use crate::api::event::create_and_queue; use crate::api::icd::*; use crate::api::util::*; -use crate::core::device::*; use crate::core::event::*; use crate::core::kernel::*; -use crate::core::program::*; use mesa_rust_util::ptr::*; use mesa_rust_util::string::*; use rusticl_opencl_gen::*; -use std::collections::HashSet; use std::mem; use std::os::raw::c_void; use std::ptr; @@ -126,19 +123,6 @@ unsafe fn kernel_work_arr_or_default<'a>(arr: *const usize, work_dim: cl_uint) - } } -fn get_devices_with_valid_build(p: &Arc<Program>) -> CLResult<Vec<&Arc<Device>>> { - // CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built executable for program. - let devs: Vec<_> = p - .devs - .iter() - .filter(|d| p.status(d) == CL_BUILD_SUCCESS as cl_build_status) - .collect(); - if devs.is_empty() { - return Err(CL_INVALID_PROGRAM_EXECUTABLE); - } - Ok(devs) -} - pub fn create_kernel( program: cl_program, kernel_name: *const ::std::os::raw::c_char, @@ -164,9 +148,7 @@ pub fn create_kernel( // CL_INVALID_KERNEL_DEFINITION if the function definition for __kernel function given by // kernel_name such as the number of arguments, the argument types are not the same for all // devices for which the program executable has been built. - let devs = get_devices_with_valid_build(&p)?; - let kernel_args: HashSet<_> = devs.iter().map(|d| p.args(d, &name)).collect(); - if kernel_args.len() != 1 { + if p.kernel_signatures(&name).len() != 1 { return Err(CL_INVALID_KERNEL_DEFINITION); } @@ -180,7 +162,12 @@ pub fn create_kernels_in_program( num_kernels_ret: *mut cl_uint, ) -> CLResult<()> { let p = program.get_arc()?; - let devs = get_devices_with_valid_build(&p)?; + + // CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built executable for any device in + // program. + if p.kernels().is_empty() { + return Err(CL_INVALID_PROGRAM_EXECUTABLE); + } // CL_INVALID_VALUE if kernels is not NULL and num_kernels is less than the number of kernels // in program. @@ -190,11 +177,10 @@ pub fn create_kernels_in_program( let mut num_kernels = 0; for name in p.kernels() { - let kernel_args: HashSet<_> = devs.iter().map(|d| p.args(d, &name)).collect(); // Kernel objects are not created for any __kernel functions in program that do not have the // same function definition across all devices for which a program executable has been // successfully built. - if kernel_args.len() != 1 { + if p.kernel_signatures(&name).len() != 1 { continue; } diff --git a/src/gallium/frontends/rusticl/api/program.rs b/src/gallium/frontends/rusticl/api/program.rs index a4a4b4e06e9..fc5ce5bc084 100644 --- a/src/gallium/frontends/rusticl/api/program.rs +++ b/src/gallium/frontends/rusticl/api/program.rs @@ -236,7 +236,6 @@ pub fn create_program_with_binary( } let prog = Program::from_bins(c, devs, &bins); - prog.build_nirs(); Ok(cl_program::from_arc(prog)) //• CL_INVALID_BINARY if an invalid program binary was encountered for any device. binary_status will return specific status for each device. @@ -292,7 +291,6 @@ pub fn build_program( //• CL_INVALID_OPERATION if program was not created with clCreateProgramWithSource, clCreateProgramWithIL or clCreateProgramWithBinary. if res { - p.build_nirs(); Ok(()) } else { if Platform::dbg().program { @@ -435,9 +433,6 @@ pub fn link_program( CL_LINK_PROGRAM_FAILURE }; - // Pre build nir kernels - res.build_nirs(); - let res = cl_program::from_arc(res); call_cb(pfn_notify, res, user_data); diff --git a/src/gallium/frontends/rusticl/core/kernel.rs b/src/gallium/frontends/rusticl/core/kernel.rs index b0fea9ef593..04e1f4a42db 100644 --- a/src/gallium/frontends/rusticl/core/kernel.rs +++ b/src/gallium/frontends/rusticl/core/kernel.rs @@ -735,14 +735,14 @@ fn deserialize_nir( Some((nir, args, internal_args)) } -pub fn convert_spirv_to_nir( - p: &Program, +pub(super) fn convert_spirv_to_nir( + build: &ProgramBuild, name: &str, args: &[spirv::SPIRVKernelArg], dev: &Arc<Device>, -) -> (NirShader, Vec<KernelArg>, Vec<InternalKernelArg>, String) { +) -> (NirShader, Vec<KernelArg>, Vec<InternalKernelArg>) { let cache = dev.screen().shader_cache(); - let key = p.hash_key(dev, name); + let key = build.hash_key(dev, name); let res = if let Some(cache) = &cache { cache.get(&mut key.unwrap()).and_then(|entry| { @@ -753,10 +753,10 @@ pub fn convert_spirv_to_nir( None }; - let (nir, args, internal_args) = if let Some(res) = res { + if let Some(res) = res { res } else { - let mut nir = p.to_nir(name, dev); + let mut nir = build.to_nir(name, dev); /* this is a hack until we support fp16 properly and check for denorms inside * vstore/vload_half @@ -788,9 +788,7 @@ pub fn convert_spirv_to_nir( } (nir, args, internal_args) - }; - - (nir, args, internal_args, p.attribute_str(name, dev)) + } } fn extract<'a, const S: usize>(buf: &'a mut &[u8]) -> &'a [u8; S] { diff --git a/src/gallium/frontends/rusticl/core/program.rs b/src/gallium/frontends/rusticl/core/program.rs index 232b25e05ab..ff9c12f4984 100644 --- a/src/gallium/frontends/rusticl/core/program.rs +++ b/src/gallium/frontends/rusticl/core/program.rs @@ -62,9 +62,7 @@ pub struct Program { pub devs: Vec<Arc<Device>>, pub src: ProgramSourceType, pub kernel_count: AtomicU32, - spec_constants: Mutex<HashMap<u32, nir_const_value>>, build: Mutex<ProgramBuild>, - nir_builds: Mutex<HashMap<String, NirKernelBuild>>, } impl_cl_type_trait!(cl_program, Program, CL_INVALID_PROGRAM); @@ -77,9 +75,158 @@ pub struct NirKernelBuild { pub attributes_string: String, } -struct ProgramBuild { +pub(super) struct ProgramBuild { builds: HashMap<Arc<Device>, ProgramDevBuild>, + spec_constants: HashMap<u32, nir_const_value>, kernels: Vec<String>, + kernel_builds: HashMap<String, NirKernelBuild>, +} + +impl ProgramBuild { + fn attribute_str(&self, kernel: &str, d: &Arc<Device>) -> String { + let info = self.dev_build(d); + + let attributes_strings = [ + info.spirv.as_ref().unwrap().vec_type_hint(kernel), + info.spirv.as_ref().unwrap().local_size(kernel), + info.spirv.as_ref().unwrap().local_size_hint(kernel), + ]; + + let attributes_strings: Vec<_> = attributes_strings + .iter() + .flatten() + .map(String::as_str) + .collect(); + attributes_strings.join(",") + } + + fn args(&self, dev: &Arc<Device>, kernel: &str) -> Vec<spirv::SPIRVKernelArg> { + self.dev_build(dev).spirv.as_ref().unwrap().args(kernel) + } + + fn build_nirs(&mut self, is_src: bool) { + for kernel_name in &self.kernels { + let kernel_args: HashSet<_> = self + .devs_with_build() + .iter() + .map(|d| self.args(d, kernel_name)) + .collect(); + + let args = kernel_args.into_iter().next().unwrap(); + let mut nirs = HashMap::new(); + let mut args_set = HashSet::new(); + let mut internal_args_set = HashSet::new(); + let mut attributes_string_set = HashSet::new(); + + // TODO: we could run this in parallel? + for d in self.devs_with_build() { + let (nir, args, internal_args) = convert_spirv_to_nir(self, kernel_name, &args, d); + let attributes_string = self.attribute_str(kernel_name, d); + nirs.insert(d.clone(), Arc::new(nir)); + args_set.insert(args); + internal_args_set.insert(internal_args); + attributes_string_set.insert(attributes_string); + } + + // we want the same (internal) args for every compiled kernel, for now + assert!(args_set.len() == 1); + assert!(internal_args_set.len() == 1); + assert!(attributes_string_set.len() == 1); + let args = args_set.into_iter().next().unwrap(); + let internal_args = internal_args_set.into_iter().next().unwrap(); + + // spec: For kernels not created from OpenCL C source and the clCreateProgramWithSource + // API call the string returned from this query [CL_KERNEL_ATTRIBUTES] will be empty. + let attributes_string = if is_src { + attributes_string_set.into_iter().next().unwrap() + } else { + String::new() + }; + + self.kernel_builds.insert( + kernel_name.clone(), + NirKernelBuild { + nirs: nirs, + args: args, + internal_args: internal_args, + attributes_string: attributes_string, + }, + ); + } + } + + fn dev_build(&self, dev: &Device) -> &ProgramDevBuild { + self.builds.get(dev).unwrap() + } + + fn dev_build_mut(&mut self, dev: &Device) -> &mut ProgramDevBuild { + self.builds.get_mut(dev).unwrap() + } + + fn devs_with_build(&self) -> Vec<&Arc<Device>> { + self.builds + .iter() + .filter(|(_, build)| build.status == CL_BUILD_SUCCESS as cl_build_status) + .map(|(d, _)| d) + .collect() + } + + pub fn hash_key(&self, dev: &Arc<Device>, name: &str) -> Option<cache_key> { + if let Some(cache) = dev.screen().shader_cache() { + let info = self.dev_build(dev); + assert_eq!(info.status, CL_BUILD_SUCCESS as cl_build_status); + + let spirv = info.spirv.as_ref().unwrap(); + let mut bin = spirv.to_bin().to_vec(); + bin.extend_from_slice(name.as_bytes()); + + for (k, v) in &self.spec_constants { + bin.extend_from_slice(&k.to_ne_bytes()); + unsafe { + // SAFETY: we fully initialize this union + bin.extend_from_slice(&v.u64_.to_ne_bytes()); + } + } + + Some(cache.gen_key(&bin)) + } else { + None + } + } + + pub fn to_nir(&self, kernel: &str, d: &Arc<Device>) -> NirShader { + let mut spec_constants: Vec<_> = self + .spec_constants + .iter() + .map(|(&id, &value)| nir_spirv_specialization { + id: id, + value: value, + defined_on_module: true, + }) + .collect(); + + let info = self.dev_build(d); + assert_eq!(info.status, CL_BUILD_SUCCESS as cl_build_status); + + let mut log = Platform::dbg().program.then(Vec::new); + let nir = info.spirv.as_ref().unwrap().to_nir( + kernel, + d.screen + .nir_shader_compiler_options(pipe_shader_type::PIPE_SHADER_COMPUTE), + &d.lib_clc, + &mut spec_constants, + d.address_bits(), + log.as_mut(), + ); + + if let Some(log) = log { + for line in log { + eprintln!("{}", line); + } + }; + + nir.unwrap() + } } struct ProgramDevBuild { @@ -162,12 +309,12 @@ impl Program { devs: devs.to_vec(), src: ProgramSourceType::Src(src), kernel_count: AtomicU32::new(0), - spec_constants: Mutex::new(HashMap::new()), build: Mutex::new(ProgramBuild { builds: Self::create_default_builds(devs), + spec_constants: HashMap::new(), kernels: Vec::new(), + kernel_builds: HashMap::new(), }), - nir_builds: Mutex::new(HashMap::new()), }) } @@ -229,18 +376,21 @@ impl Program { ); } + let mut build = ProgramBuild { + builds: builds, + spec_constants: HashMap::new(), + kernels: kernels.into_iter().collect(), + kernel_builds: HashMap::new(), + }; + build.build_nirs(false); + Arc::new(Self { base: CLObjectBase::new(), context: context, devs: devs, src: ProgramSourceType::Binary, kernel_count: AtomicU32::new(0), - spec_constants: Mutex::new(HashMap::new()), - build: Mutex::new(ProgramBuild { - builds: builds, - kernels: kernels.into_iter().collect(), - }), - nir_builds: Mutex::new(HashMap::new()), + build: Mutex::new(build), }) } @@ -252,12 +402,12 @@ impl Program { context: context, src: ProgramSourceType::Il(SPIRVBin::from_bin(spirv)), kernel_count: AtomicU32::new(0), - spec_constants: Mutex::new(HashMap::new()), build: Mutex::new(ProgramBuild { builds: builds, + spec_constants: HashMap::new(), kernels: Vec::new(), + kernel_builds: HashMap::new(), }), - nir_builds: Mutex::new(HashMap::new()), }) } @@ -265,53 +415,33 @@ impl Program { self.build.lock().unwrap() } - fn dev_build_info<'a>( - l: &'a mut MutexGuard<ProgramBuild>, - dev: &Arc<Device>, - ) -> &'a mut ProgramDevBuild { - l.builds.get_mut(dev).unwrap() - } - - fn nir_build_info(&self) -> MutexGuard<HashMap<String, NirKernelBuild>> { - self.nir_builds.lock().unwrap() - } - pub fn get_nir_kernel_build(&self, name: &str) -> NirKernelBuild { - let info = self.nir_build_info(); - info.get(name).unwrap().clone() - } - - pub fn set_nir_kernel_build(&self, name: &str, nir_build: NirKernelBuild) { - let mut info = self.nir_build_info(); - info.insert(String::from(name), nir_build); + let info = self.build_info(); + info.kernel_builds.get(name).unwrap().clone() } pub fn status(&self, dev: &Arc<Device>) -> cl_build_status { - Self::dev_build_info(&mut self.build_info(), dev).status + self.build_info().dev_build(dev).status } pub fn log(&self, dev: &Arc<Device>) -> String { - Self::dev_build_info(&mut self.build_info(), dev) - .log - .clone() + self.build_info().dev_build(dev).log.clone() } pub fn bin_type(&self, dev: &Arc<Device>) -> cl_program_binary_type { - Self::dev_build_info(&mut self.build_info(), dev).bin_type + self.build_info().dev_build(dev).bin_type } pub fn options(&self, dev: &Arc<Device>) -> String { - Self::dev_build_info(&mut self.build_info(), dev) - .options - .clone() + self.build_info().dev_build(dev).options.clone() } // we need to precalculate the size pub fn bin_sizes(&self) -> Vec<usize> { - let mut lock = self.build_info(); + let lock = self.build_info(); let mut res = Vec::new(); for d in &self.devs { - let info = Self::dev_build_info(&mut lock, d); + let info = lock.dev_build(d); res.push( info.spirv @@ -337,10 +467,10 @@ impl Program { slice::from_raw_parts(vals.as_ptr().cast(), vals.len() / size_of::<*mut u8>()) }; - let mut lock = self.build_info(); + let lock = self.build_info(); for (i, d) in self.devs.iter().enumerate() { let mut ptr = ptrs[i]; - let info = Self::dev_build_info(&mut lock, d); + let info = lock.dev_build(d); let spirv = info.spirv.as_ref().unwrap().to_bin(); unsafe { @@ -365,12 +495,15 @@ impl Program { ptrs.to_vec() } - pub fn args(&self, dev: &Arc<Device>, kernel: &str) -> Vec<spirv::SPIRVKernelArg> { - Self::dev_build_info(&mut self.build_info(), dev) - .spirv - .as_ref() - .unwrap() - .args(kernel) + pub fn kernel_signatures(&self, kernel_name: &str) -> HashSet<Vec<spirv::SPIRVKernelArg>> { + let build = self.build_info(); + let devs = build.devs_with_build(); + + if devs.is_empty() { + return HashSet::new(); + } + + devs.iter().map(|d| build.args(d, kernel_name)).collect() } pub fn kernels(&self) -> Vec<String> { @@ -388,7 +521,7 @@ impl Program { return false; } - let d = Self::dev_build_info(&mut info, dev); + let mut d = info.dev_build_mut(dev); let spirvs = [d.spirv.as_ref().unwrap()]; let (spirv, log) = spirv::SPIRVBin::link(&spirvs, lib); @@ -403,6 +536,7 @@ impl Program { d.status = CL_BUILD_SUCCESS as cl_build_status; let mut kernels = d.spirv.as_ref().unwrap().kernels(); info.kernels.append(&mut kernels); + info.build_nirs(self.is_src()); true } else { d.status = CL_BUILD_ERROR; @@ -418,7 +552,7 @@ impl Program { headers: &[spirv::CLCHeader], info: &mut MutexGuard<ProgramBuild>, ) -> bool { - let d = Self::dev_build_info(info, dev); + let mut d = info.dev_build_mut(dev); let (spirv, log) = match &self.src { ProgramSourceType::Il(spirv) => spirv.clone_on_validate(), @@ -459,8 +593,7 @@ impl Program { options: String, headers: &[spirv::CLCHeader], ) -> bool { - let mut info = self.build_info(); - self.do_compile(dev, options, headers, &mut info) + self.do_compile(dev, options, headers, &mut self.build_info()) } pub fn link( @@ -478,7 +611,7 @@ impl Program { for d in &devs { let bins: Vec<_> = locks .iter_mut() - .map(|l| Self::dev_build_info(l, d).spirv.as_ref().unwrap()) + .map(|l| l.dev_build(d).spirv.as_ref().unwrap()) .collect(); let (spirv, log) = spirv::SPIRVBin::link(&bins, lib); @@ -512,159 +645,26 @@ impl Program { ); } + let mut build = ProgramBuild { + builds: builds, + spec_constants: HashMap::new(), + kernels: kernels.into_iter().collect(), + kernel_builds: HashMap::new(), + }; + + // Pre build nir kernels + build.build_nirs(false); + Arc::new(Self { base: CLObjectBase::new(), context: context, devs: devs, src: ProgramSourceType::Linked, kernel_count: AtomicU32::new(0), - spec_constants: Mutex::new(HashMap::new()), - build: Mutex::new(ProgramBuild { - builds: builds, - kernels: kernels.into_iter().collect(), - }), - nir_builds: Mutex::new(HashMap::new()), + build: Mutex::new(build), }) } - pub fn build_nir_kernel(&self, name: &str, args: Vec<spirv::SPIRVKernelArg>) -> NirKernelBuild { - let mut nirs = HashMap::new(); - let mut args_set = HashSet::new(); - let mut internal_args_set = HashSet::new(); - let mut attributes_string_set = HashSet::new(); - - // TODO: we could run this in parallel? - for d in self.devs_with_build() { - let (nir, args, internal_args, attributes_string) = - convert_spirv_to_nir(self, name, &args, d); - nirs.insert(d.clone(), Arc::new(nir)); - args_set.insert(args); - internal_args_set.insert(internal_args); - attributes_string_set.insert(attributes_string); - } - - // we want the same (internal) args for every compiled kernel, for now - assert!(args_set.len() == 1); - assert!(internal_args_set.len() == 1); - assert!(attributes_string_set.len() == 1); - let args = args_set.into_iter().next().unwrap(); - let internal_args = internal_args_set.into_iter().next().unwrap(); - - // spec: For kernels not created from OpenCL C source and the clCreateProgramWithSource API call - // the string returned from this query [CL_KERNEL_ATTRIBUTES] will be empty. - let attributes_string = if self.is_src() { - attributes_string_set.into_iter().next().unwrap() - } else { - String::new() - }; - - NirKernelBuild { - nirs: nirs, - args: args, - internal_args: internal_args, - attributes_string: attributes_string, - } - } - - pub fn build_nirs(&self) { - let devs = self.devs_with_build(); - for k in &self.kernels() { - let kernel_args: HashSet<_> = devs.iter().map(|d| self.args(d, k)).collect(); - let nir_build = self.build_nir_kernel(k, kernel_args.into_iter().next().unwrap()); - self.set_nir_kernel_build(k, nir_build); - } - } - - pub(super) fn hash_key(&self, dev: &Arc<Device>, name: &str) -> Option<cache_key> { - if let Some(cache) = dev.screen().shader_cache() { - let mut lock = self.build_info(); - let info = Self::dev_build_info(&mut lock, dev); - assert_eq!(info.status, CL_BUILD_SUCCESS as cl_build_status); - - let spirv = info.spirv.as_ref().unwrap(); - let mut bin = spirv.to_bin().to_vec(); - bin.extend_from_slice(name.as_bytes()); - - for (k, v) in self.spec_constants.lock().unwrap().iter() { - bin.extend_from_slice(&k.to_ne_bytes()); - unsafe { - // SAFETY: we fully initialize this union - bin.extend_from_slice(&v.u64_.to_ne_bytes()); - } - } - - Some(cache.gen_key(&bin)) - } else { - None - } - } - - pub fn devs_with_build(&self) -> Vec<&Arc<Device>> { - let mut lock = self.build_info(); - self.devs - .iter() - .filter(|d| { - let info = Self::dev_build_info(&mut lock, d); - info.status == CL_BUILD_SUCCESS as cl_build_status - }) - .collect() - } - - pub fn attribute_str(&self, kernel: &str, d: &Arc<Device>) -> String { - let mut lock = self.build_info(); - let info = Self::dev_build_info(&mut lock, d); - - let attributes_strings = [ - info.spirv.as_ref().unwrap().vec_type_hint(kernel), - info.spirv.as_ref().unwrap().local_size(kernel), - info.spirv.as_ref().unwrap().local_size_hint(kernel), - ]; - - let attributes_strings: Vec<_> = attributes_strings - .iter() - .flatten() - .map(String::as_str) - .collect(); - attributes_strings.join(",") - } - - pub fn to_nir(&self, kernel: &str, d: &Arc<Device>) -> NirShader { - let constants = self.spec_constants.lock().unwrap(); - let mut spec_constants: Vec<_> = constants - .iter() - .map(|(&id, &value)| nir_spirv_specialization { - id: id, - value: value, - defined_on_module: true, - }) - .collect(); - drop(constants); - - let mut lock = self.build_info(); - - let info = Self::dev_build_info(&mut lock, d); - assert_eq!(info.status, CL_BUILD_SUCCESS as cl_build_status); - - let mut log = Platform::dbg().program.then(Vec::new); - let nir = info.spirv.as_ref().unwrap().to_nir( - kernel, - d.screen - .nir_shader_compiler_options(pipe_shader_type::PIPE_SHADER_COMPUTE), - &d.lib_clc, - &mut spec_constants, - d.address_bits(), - log.as_mut(), - ); - - if let Some(log) = log { - for line in log { - eprintln!("{}", line); - } - }; - - nir.unwrap() - } - pub fn is_il(&self) -> bool { matches!(self.src, ProgramSourceType::Il(_)) } @@ -683,7 +683,7 @@ impl Program { } pub fn set_spec_constant(&self, spec_id: u32, data: &[u8]) { - let mut lock = self.spec_constants.lock().unwrap(); + let mut lock = self.build_info(); let mut val = nir_const_value::default(); match data.len() { @@ -694,6 +694,6 @@ impl Program { _ => unreachable!("Spec constant with invalid size!"), }; - lock.insert(spec_id, val); + lock.spec_constants.insert(spec_id, val); } }
