1use std::{fs::File, ops::DerefMut, sync::Mutex, time::Instant};
3
4use csv::Writer;
5use green_kernels::{laplace_3d::Laplace3dKernel, traits::Kernel, types::GreenKernelEvalType};
6use itertools::{iproduct, Itertools};
7use kifmm::{
8 fftw::array::AlignedAllocable,
9 fmm::types::FmmSvdMode,
10 linalg::rsvd::{MatrixRsvd, Normaliser},
11 traits::{
12 fftw::Dft,
13 fmm::{DataAccess, Evaluate},
14 general::single_node::{AsComplex, Epsilon, Hadamard8x8},
15 tree::{SingleFmmTree, SingleTree},
16 },
17 tree::helpers::points_fixture,
18 BlasFieldTranslationSaRcmp, FftFieldTranslation, SingleNodeBuilder,
19};
20use num::Float;
21use rand::distributions::uniform::SampleUniform;
22use rlst::{rlst_dynamic_array2, MatrixSvd, RawAccess, RawAccessMut, RlstScalar};
23
24fn grid_search_laplace_blas<T>(
25 filename: String,
26 n_points: usize,
27 expansion_order_vec: &[usize],
28 svd_threshold_vec: &[Option<T>],
29 surface_diff_vec: &[usize],
30 depth_vec: &[u64],
31 rsvd_settings_vec: &[FmmSvdMode],
32) where
33 T: RlstScalar<Real = T> + Epsilon + Float + Default + SampleUniform + MatrixRsvd,
34{
35 let prune_empty = true;
37
38 let parameters = iproduct!(
39 surface_diff_vec.iter(),
40 svd_threshold_vec.iter(),
41 depth_vec.iter(),
42 expansion_order_vec.iter(),
43 rsvd_settings_vec.iter()
44 )
45 .map(
46 |(surface_diff, svd_threshold, depth, expansion_order, rsvd_settings)| {
47 (
48 *surface_diff,
49 *svd_threshold,
50 *depth,
51 *expansion_order,
52 rsvd_settings,
53 )
54 },
55 )
56 .collect_vec();
57
58 let parameters_cloned = parameters.iter().cloned().collect_vec();
59
60 let fmms = Mutex::new(Vec::new());
61 let progress = Mutex::new(0usize);
62
63 let n_params = parameters.len();
64
65 let n_sources = n_points;
67 let n_targets = n_points;
68 let sources = points_fixture::<T::Real>(n_sources, None, None, Some(0));
69 let targets = points_fixture::<T::Real>(n_targets, None, None, Some(1));
70 let n_vecs = 1;
71 let tmp = vec![T::one(); n_sources * n_vecs];
72 let mut charges = rlst_dynamic_array2!(T, [n_sources, n_vecs]);
73 charges.data_mut().copy_from_slice(&tmp);
74
75 let s = Instant::now();
76 parameters.into_iter().enumerate().for_each(
77 |(i, (surface_diff, svd_threshold, depth, expansion_order, rsvd_settings))| {
78 let expansion_order = vec![expansion_order; (depth + 1) as usize];
79
80 let s = Instant::now();
81 let fmm = SingleNodeBuilder::new(true)
82 .tree(
83 sources.data(),
84 targets.data(),
85 None,
86 Some(depth),
87 prune_empty,
88 )
89 .unwrap()
90 .parameters(
91 charges.data(),
92 &expansion_order,
93 Laplace3dKernel::new(),
94 GreenKernelEvalType::Value,
95 BlasFieldTranslationSaRcmp::new(
96 svd_threshold,
97 Some(surface_diff),
98 *rsvd_settings,
99 ),
100 )
101 .unwrap()
102 .build()
103 .unwrap();
104 let setup_time = s.elapsed();
105 fmms.lock().unwrap().push((i, fmm, setup_time));
106 *progress.lock().unwrap().deref_mut() += 1;
107
108 println!(
109 "BLAS Pre-computed {:?}/{:?}",
110 *progress.lock().unwrap(),
111 n_params
112 );
113 },
114 );
115
116 let file = File::create(format!("{filename}.csv")).unwrap();
117 let mut writer = Writer::from_writer(file);
118 writer
119 .write_record(&[
120 "depth".to_string(),
121 "surface_diff".to_string(),
122 "svd_threshold".to_string(),
123 "expansion_order".to_string(),
124 "runtime".to_string(),
125 "min_rel_err".to_string(),
126 "mean_rel_err".to_string(),
127 "max_rel_err".to_string(),
128 "n_iter".to_string(),
129 "n_oversamples".to_string(),
130 "setup_time".to_string(),
131 ])
132 .unwrap();
133
134 println!(
135 "BLAS Pre-computation Time Elapsed {:?}",
136 s.elapsed().as_secs()
137 );
138
139 let n_sources = 1000000;
141 let sources = points_fixture::<T::Real>(n_sources, None, None, Some(0));
142 let n_vecs = 1;
143 let tmp = vec![T::one(); n_sources * n_vecs];
144 let mut charges = rlst_dynamic_array2!(T, [n_sources, n_vecs]);
145 charges.data_mut().copy_from_slice(&tmp);
146
147 let mut progress = 0;
148 for (i, fmm, setup_time) in fmms.lock().unwrap().iter_mut() {
149 let (surface_diff, svd_threshold, depth, expansion_order, rsvd_settings) =
150 parameters_cloned[*i];
151
152 let svd_threshold = svd_threshold.unwrap_or(T::zero().re());
153
154 let s = Instant::now();
155 fmm.evaluate().unwrap();
156 let time = s.elapsed().as_millis() as f32;
157 progress += 1;
158 println!("BLAS Evaluated {progress:?}/{n_params:?}");
159
160 let leaf_idx = 1;
161 let leaf = fmm.tree().target_tree().all_leaves().unwrap()[leaf_idx];
162 let potential = fmm.potential(&leaf).unwrap()[0];
163 let leaf_targets = fmm.tree().target_tree().coordinates(&leaf).unwrap();
164 let n_targets = leaf_targets.len() / fmm.dim();
165 let mut direct = vec![T::zero(); n_targets];
166 fmm.kernel().evaluate_st(
167 GreenKernelEvalType::Value,
168 sources.data(),
169 leaf_targets,
170 charges.data(),
171 &mut direct,
172 );
173
174 let rel_error = direct
175 .iter()
176 .zip(potential)
177 .map(|(&d, &p)| {
178 let abs_error = RlstScalar::abs(d - p);
179 abs_error / p
180 })
181 .collect_vec();
182
183 let min_rel_err = rel_error
184 .iter()
185 .filter(|&&x| !x.is_nan())
186 .cloned()
187 .min_by(|a, b| a.partial_cmp(b).unwrap())
188 .unwrap();
189 let max_rel_err = rel_error
190 .iter()
191 .filter(|&&x| !x.is_nan())
192 .cloned()
193 .max_by(|a, b| a.partial_cmp(b).unwrap())
194 .unwrap();
195
196 let mean_rel_err: T = rel_error.into_iter().sum::<T>() / T::from(direct.len()).unwrap();
197
198 let n_iter_;
199 let n_oversamples_;
200 match rsvd_settings {
201 FmmSvdMode::Random {
202 n_components: _,
203 normaliser,
204 n_oversamples,
205 random_state: _,
206 } => {
207 if let Some(Normaliser::Qr(n)) = normaliser {
208 n_iter_ = *n
209 } else {
210 n_iter_ = 0;
211 }
212 n_oversamples_ = n_oversamples.unwrap();
213 }
214
215 FmmSvdMode::Deterministic => {
216 n_iter_ = 0usize;
217 n_oversamples_ = 0usize;
218 }
219 }
220
221 writer
222 .write_record(&[
223 depth.to_string(),
224 surface_diff.to_string(),
225 svd_threshold.to_string(),
226 expansion_order.to_string(),
227 time.to_string(),
228 min_rel_err.to_string(),
229 mean_rel_err.to_string(),
230 max_rel_err.to_string(),
231 n_iter_.to_string(),
232 n_oversamples_.to_string(),
233 (setup_time.as_millis() as f32).to_string(),
234 ])
235 .unwrap();
236 }
237}
238
239fn grid_search_laplace_fft<T>(
240 filename: String,
241 n_points: usize,
242 expansion_order_vec: &[usize],
243 depth_vec: &[u64],
244 block_size_vec: &[usize],
245) where
246 T: RlstScalar<Real = T>
247 + AsComplex
248 + Default
249 + Dft<InputType = T, OutputType = <T as AsComplex>::ComplexType>
250 + SampleUniform
251 + Float
252 + Epsilon
253 + AlignedAllocable
254 + MatrixSvd,
255 <T as AsComplex>::ComplexType:
256 Hadamard8x8<Scalar = <T as AsComplex>::ComplexType> + AlignedAllocable,
257 <T as Dft>::Plan: Sync,
258{
259 let prune_empty = true;
261
262 let parameters = iproduct!(
263 depth_vec.iter(),
264 expansion_order_vec.iter(),
265 block_size_vec.iter()
266 )
267 .map(|(depth, expansion_order, block_size)| (*depth, *expansion_order, *block_size))
268 .collect_vec();
269
270 let parameters_cloned = parameters.iter().cloned().collect_vec();
271
272 let fmms = Mutex::new(Vec::new());
273 let progress = Mutex::new(0usize);
274
275 let n_params = parameters.len();
276
277 let s = Instant::now();
278 parameters
279 .into_iter()
280 .enumerate()
281 .for_each(|(i, (depth, expansion_order, block_size))| {
282 let expansion_order = vec![expansion_order; (depth + 1) as usize];
283 let n_sources = n_points;
285 let n_targets = n_points;
286 let sources = points_fixture::<T::Real>(n_sources, None, None, Some(0));
287 let targets = points_fixture::<T::Real>(n_targets, None, None, Some(1));
288 let n_vecs = 1;
289 let tmp = vec![T::one(); n_sources * n_vecs];
290 let mut charges = rlst_dynamic_array2!(T, [n_sources, n_vecs]);
291 charges.data_mut().copy_from_slice(&tmp);
292
293 let s = Instant::now();
294 let fmm = SingleNodeBuilder::new(true)
295 .tree(
296 sources.data(),
297 targets.data(),
298 None,
299 Some(depth),
300 prune_empty,
301 )
302 .unwrap()
303 .parameters(
304 charges.data(),
305 &expansion_order,
306 Laplace3dKernel::new(),
307 GreenKernelEvalType::Value,
308 FftFieldTranslation::new(Some(block_size)),
309 )
310 .unwrap()
311 .build()
312 .unwrap();
313 let setup_time = s.elapsed();
314
315 fmms.lock().unwrap().push((i, fmm, setup_time));
316 *progress.lock().unwrap().deref_mut() += 1;
317
318 println!(
319 "FFT Pre-computed {:?}/{:?}",
320 *progress.lock().unwrap(),
321 n_params
322 );
323 });
324
325 println!(
326 "FFT Pre-computation Time Elapsed {:?}",
327 s.elapsed().as_secs()
328 );
329 let file = File::create(format!("{filename}.csv")).unwrap();
330 let mut writer = Writer::from_writer(file);
331 writer
332 .write_record(&[
333 "depth".to_string(),
334 "expansion_order".to_string(),
335 "block_size".to_string(),
336 "runtime".to_string(),
337 "min_rel_err".to_string(),
338 "mean_rel_err".to_string(),
339 "max_rel_err".to_string(),
340 "setup_time".to_string(),
341 ])
342 .unwrap();
343
344 let n_sources = n_points;
345 let sources = points_fixture::<T::Real>(n_sources, None, None, Some(0));
346 let n_vecs = 1;
347 let tmp = vec![T::one(); n_sources * n_vecs];
348 let mut charges = rlst_dynamic_array2!(T, [n_sources, n_vecs]);
349 charges.data_mut().copy_from_slice(&tmp);
350
351 let mut progress = 0;
352 for (i, fmm, setup_time) in fmms.lock().unwrap().iter_mut() {
353 let (depth, expansion_order, block_size) = parameters_cloned[*i];
354
355 let s = Instant::now();
356 fmm.evaluate().unwrap();
357 let time = s.elapsed().as_millis() as f32;
358 progress += 1;
359 println!("FFT Evaluated {progress:?}/{n_params:?}");
360
361 let leaf_idx = 1;
362 let leaf = fmm.tree().target_tree().all_leaves().unwrap()[leaf_idx];
363 let potential = fmm.potential(&leaf).unwrap()[0];
364 let leaf_targets = fmm.tree().target_tree().coordinates(&leaf).unwrap();
365 let n_targets = leaf_targets.len() / fmm.dim();
366 let mut direct = vec![T::zero(); n_targets];
367 fmm.kernel().evaluate_st(
368 GreenKernelEvalType::Value,
369 sources.data(),
370 leaf_targets,
371 charges.data(),
372 &mut direct,
373 );
374
375 let rel_error = direct
376 .iter()
377 .zip(potential)
378 .map(|(&d, &p)| {
379 let abs_error = RlstScalar::abs(d - p);
380 abs_error / p
381 })
382 .collect_vec();
383
384 let min_rel_err = rel_error
385 .iter()
386 .filter(|&&x| !x.is_nan())
387 .cloned()
388 .min_by(|a, b| a.partial_cmp(b).unwrap())
389 .unwrap();
390
391 let max_rel_err = rel_error
392 .iter()
393 .filter(|&&x| !x.is_nan())
394 .cloned()
395 .max_by(|a, b| a.partial_cmp(b).unwrap())
396 .unwrap();
397
398 let mean_rel_err: T = rel_error.into_iter().sum::<T>() / T::from(direct.len()).unwrap();
399
400 writer
401 .write_record(&[
402 depth.to_string(),
403 expansion_order.to_string(),
404 block_size.to_string(),
405 time.to_string(),
406 min_rel_err.to_string(),
407 mean_rel_err.to_string(),
408 max_rel_err.to_string(),
409 (setup_time.as_millis() as f32).to_string(),
410 ])
411 .unwrap();
412 }
413}
414
415fn main() {
416 let max_m2l_fft_block_size_vec = vec![16, 32, 64, 128];
417 let rsvd_settings_vec = [FmmSvdMode::new(false, None, None, None, None)];
418
419 {
421 let expansion_order_vec: Vec<usize> = vec![3];
422
423 let svd_threshold_vec = vec![None, Some(1e-7), Some(1e-5), Some(1e-3), Some(1e-1)];
424
425 let surface_diff_vec: Vec<usize> = vec![0, 1, 2];
426 let depth_vec: Vec<u64> = vec![4, 5];
427
428 let n_points = 10000;
429
430 grid_search_laplace_fft::<f32>(
431 "grid_search_laplace_fft_f32_m1".to_string(),
432 n_points,
433 &expansion_order_vec,
434 &depth_vec,
435 &max_m2l_fft_block_size_vec,
436 );
437
438 for (i, &rsvd_settings) in rsvd_settings_vec.iter().enumerate() {
439 grid_search_laplace_blas::<f32>(
440 format!("grid_search_laplace_blas_f32_m1_{i}").to_string(),
441 n_points,
442 &expansion_order_vec,
443 &svd_threshold_vec,
444 &surface_diff_vec,
445 &depth_vec,
446 &[rsvd_settings],
447 );
448 }
449 }
450}