1use std::{fs::File, ops::DerefMut, sync::Mutex, time::Instant};
3
4use csv::Writer;
5use green_kernels::{laplace_3d::Laplace3dKernel, traits::Kernel, types::GreenKernelEvalType};
6use itertools::{iproduct, Itertools};
7use kifmm::{
8    fftw::array::AlignedAllocable,
9    fmm::{
10        helpers::single_node::l2_error,
11        types::{FmmSvdMode, PinvMode},
12    },
13    linalg::rsvd::{MatrixRsvd, Normaliser},
14    traits::{
15        fftw::Dft,
16        fmm::{DataAccess, Evaluate},
17        general::single_node::{ArgmaxValue, AsComplex, Cast, Epsilon, Hadamard8x8, Upcast},
18        tree::{SingleFmmTree, SingleTree},
19    },
20    tree::helpers::points_fixture,
21    BlasFieldTranslationAca, BlasFieldTranslationSaRcmp, FftFieldTranslation, SingleNodeBuilder,
22};
23use num::Float;
24use rand::distributions::uniform::SampleUniform;
25use rlst::{rlst_dynamic_array2, MatrixQr, MatrixSvd, RawAccess, RawAccessMut, RlstScalar};
26
27#[allow(dead_code)]
28fn grid_search_laplace_blas_aca<
29    T: RlstScalar<Real = T>
30        + Epsilon
31        + MatrixRsvd
32        + Float
33        + SampleUniform
34        + MatrixQr
35        + Default
36        + Upcast
37        + ArgmaxValue<T>
38        + Cast<<T as Upcast>::Higher>
39        + Cast<<<T as Upcast>::Higher as RlstScalar>::Real>,
40>(
41    filename: String,
42    n_points: usize,
43    expansion_order_vec: &[usize],
44    eps_vec: &[Option<T>],
45    surface_diff_vec: &[usize],
46    depth_vec: &[u64],
47) where
48    <T as RlstScalar>::Real: Epsilon,
49    <T as Upcast>::Higher: RlstScalar + MatrixSvd + Epsilon + Cast<T>,
50    <<T as Upcast>::Higher as RlstScalar>::Real: Epsilon + MatrixSvd + Cast<T::Real>,
51{
52    let prune_empty = true;
54
55    let parameters = iproduct!(
56        surface_diff_vec.iter(),
57        eps_vec.iter(),
58        depth_vec.iter(),
59        expansion_order_vec.iter(),
60    )
61    .map(|(surface_diff, eps_threshold, depth, expansion_order)| {
62        (*surface_diff, *eps_threshold, *depth, *expansion_order)
63    })
64    .collect_vec();
65
66    let parameters_cloned = parameters.iter().cloned().collect_vec();
67
68    let mut fmms = Vec::new();
69    let mut progress = 0usize;
70
71    let n_params = parameters.len();
72
73    let n_sources = n_points;
75    let n_targets = n_points;
76    let sources = points_fixture::<T::Real>(n_sources, None, None, Some(0));
77    let targets = points_fixture::<T::Real>(n_targets, None, None, Some(1));
78    let n_vecs = 1;
79    let tmp = vec![T::one(); n_sources * n_vecs];
80    let mut charges = rlst_dynamic_array2!(T, [n_sources, n_vecs]);
81    charges.data_mut().copy_from_slice(&tmp);
82
83    let s = Instant::now();
84    parameters.into_iter().enumerate().for_each(
85        |(i, (surface_diff, eps, depth, expansion_order))| {
86            let expansion_order = vec![expansion_order; (depth + 1) as usize];
87
88            let s = Instant::now();
89            let fmm = SingleNodeBuilder::new(true)
90                .tree(
91                    sources.data(),
92                    targets.data(),
93                    None,
94                    Some(depth),
95                    prune_empty,
96                )
97                .unwrap()
98                .parameters(
99                    charges.data(),
100                    &expansion_order,
101                    Laplace3dKernel::new(),
102                    GreenKernelEvalType::Value,
103                    BlasFieldTranslationAca::new(eps, Some(surface_diff), Some(true)),
104                    Some(PinvMode::aca(eps, None, None, true)),
105                )
106                .unwrap()
107                .build()
108                .unwrap();
109            let setup_time = s.elapsed();
110            fmms.push((i, fmm, setup_time));
111            progress += 1;
112
113            println!("BLAS ACA+ Pre-computed {:?}/{:?}", progress, n_params);
114        },
115    );
116
117    let file = File::create(format!("{filename}.csv")).unwrap();
118    let mut writer = Writer::from_writer(file);
119    writer
120        .write_record(&[
121            "depth".to_string(),
122            "surface_diff".to_string(),
123            "svd_threshold".to_string(),
124            "expansion_order".to_string(),
125            "runtime".to_string(),
126            "min_rel_err".to_string(),
127            "mean_rel_err".to_string(),
128            "max_rel_err".to_string(),
129            "l2_error".to_string(),
130            "setup_time".to_string(),
131        ])
132        .unwrap();
133
134    println!(
135        "BLAS Pre-computation Time Elapsed {:?}",
136        s.elapsed().as_secs()
137    );
138
139    let n_sources = 1000000;
141    let sources = points_fixture::<T::Real>(n_sources, None, None, Some(0));
142    let n_vecs = 1;
143    let tmp = vec![T::one(); n_sources * n_vecs];
144    let mut charges = rlst_dynamic_array2!(T, [n_sources, n_vecs]);
145    charges.data_mut().copy_from_slice(&tmp);
146
147    let mut progress = 0;
148    for (i, fmm, setup_time) in fmms.iter_mut() {
149        let (surface_diff, eps, depth, expansion_order) = parameters_cloned[*i];
150
151        let eps = eps.unwrap_or(T::zero().re());
152
153        let s = Instant::now();
154        fmm.evaluate().unwrap();
155        let time = s.elapsed().as_millis() as f32;
156        progress += 1;
157        println!("BLAS Evaluated {progress:?}/{n_params:?}");
158
159        let leaf_idx = 1;
160        let leaf = fmm.tree().target_tree().all_leaves().unwrap()[leaf_idx];
161        let potential = fmm.potential(&leaf).unwrap()[0];
162        let leaf_targets = fmm.tree().target_tree().coordinates(&leaf).unwrap();
163        let n_targets = leaf_targets.len() / fmm.dim();
164        let mut direct = vec![T::zero(); n_targets];
165        fmm.kernel().evaluate_st(
166            GreenKernelEvalType::Value,
167            sources.data(),
168            leaf_targets,
169            charges.data(),
170            &mut direct,
171        );
172
173        let rel_error = direct
174            .iter()
175            .zip(potential)
176            .map(|(&d, &p)| {
177                let abs_error = RlstScalar::abs(d - p);
178                abs_error / p
179            })
180            .collect_vec();
181
182        let min_rel_err = rel_error
183            .iter()
184            .filter(|&&x| !x.is_nan())
185            .cloned()
186            .min_by(|a, b| a.partial_cmp(b).unwrap())
187            .unwrap();
188        let max_rel_err = rel_error
189            .iter()
190            .filter(|&&x| !x.is_nan())
191            .cloned()
192            .max_by(|a, b| a.partial_cmp(b).unwrap())
193            .unwrap();
194
195        let mean_rel_err: T = rel_error.into_iter().sum::<T>() / T::from(direct.len()).unwrap();
196        let l2_error = l2_error(&direct, potential);
197
198        writer
199            .write_record(&[
200                depth.to_string(),
201                surface_diff.to_string(),
202                eps.to_string(),
203                expansion_order.to_string(),
204                time.to_string(),
205                min_rel_err.to_string(),
206                mean_rel_err.to_string(),
207                max_rel_err.to_string(),
208                l2_error.to_string(),
209                (setup_time.as_millis() as f32).to_string(),
210            ])
211            .unwrap();
212    }
213}
214
215fn grid_search_laplace_blas_svd<
216    T: RlstScalar<Real = T>
217        + Epsilon
218        + MatrixRsvd
219        + Float
220        + SampleUniform
221        + MatrixQr
222        + Default
223        + Upcast
224        + ArgmaxValue<T>
225        + Cast<<T as Upcast>::Higher>
226        + Cast<<<T as Upcast>::Higher as RlstScalar>::Real>,
227>(
228    filename: String,
229    n_points: usize,
230    expansion_order_vec: &[usize],
231    svd_threshold_vec: &[Option<T>],
232    surface_diff_vec: &[usize],
233    depth_vec: &[u64],
234    rsvd_settings_vec: &[FmmSvdMode],
235) where
236    <T as RlstScalar>::Real: Epsilon,
237    <T as Upcast>::Higher: RlstScalar + MatrixSvd + Epsilon + Cast<T>,
238    <<T as Upcast>::Higher as RlstScalar>::Real: Epsilon + MatrixSvd + Cast<T::Real>,
239{
240    let prune_empty = true;
242
243    let parameters = iproduct!(
244        surface_diff_vec.iter(),
245        svd_threshold_vec.iter(),
246        depth_vec.iter(),
247        expansion_order_vec.iter(),
248        rsvd_settings_vec.iter()
249    )
250    .map(
251        |(surface_diff, svd_threshold, depth, expansion_order, rsvd_settings)| {
252            (
253                *surface_diff,
254                *svd_threshold,
255                *depth,
256                *expansion_order,
257                rsvd_settings,
258            )
259        },
260    )
261    .collect_vec();
262
263    let parameters_cloned = parameters.iter().cloned().collect_vec();
264
265    let fmms = Mutex::new(Vec::new());
266    let progress = Mutex::new(0usize);
267
268    let n_params = parameters.len();
269
270    let n_sources = n_points;
272    let n_targets = n_points;
273    let sources = points_fixture::<T::Real>(n_sources, None, None, Some(0));
274    let targets = points_fixture::<T::Real>(n_targets, None, None, Some(1));
275    let n_vecs = 1;
276    let tmp = vec![T::one(); n_sources * n_vecs];
277    let mut charges = rlst_dynamic_array2!(T, [n_sources, n_vecs]);
278    charges.data_mut().copy_from_slice(&tmp);
279
280    let s = Instant::now();
281    parameters.into_iter().enumerate().for_each(
282        |(i, (surface_diff, svd_threshold, depth, expansion_order, rsvd_settings))| {
283            let expansion_order = vec![expansion_order; (depth + 1) as usize];
284
285            let s = Instant::now();
286            let fmm = SingleNodeBuilder::new(true)
287                .tree(
288                    sources.data(),
289                    targets.data(),
290                    None,
291                    Some(depth),
292                    prune_empty,
293                )
294                .unwrap()
295                .parameters(
296                    charges.data(),
297                    &expansion_order,
298                    Laplace3dKernel::new(),
299                    GreenKernelEvalType::Value,
300                    BlasFieldTranslationSaRcmp::new(
301                        svd_threshold,
302                        Some(surface_diff),
303                        *rsvd_settings,
304                    ),
305                    None,
306                )
307                .unwrap()
308                .build()
309                .unwrap();
310            let setup_time = s.elapsed();
311            fmms.lock().unwrap().push((i, fmm, setup_time));
312            *progress.lock().unwrap().deref_mut() += 1;
313
314            println!(
315                "BLAS Pre-computed {:?}/{:?}",
316                *progress.lock().unwrap(),
317                n_params
318            );
319        },
320    );
321
322    let file = File::create(format!("{filename}.csv")).unwrap();
323    let mut writer = Writer::from_writer(file);
324    writer
325        .write_record(&[
326            "depth".to_string(),
327            "surface_diff".to_string(),
328            "svd_threshold".to_string(),
329            "expansion_order".to_string(),
330            "runtime".to_string(),
331            "min_rel_err".to_string(),
332            "mean_rel_err".to_string(),
333            "max_rel_err".to_string(),
334            "n_iter".to_string(),
335            "n_oversamples".to_string(),
336            "setup_time".to_string(),
337        ])
338        .unwrap();
339
340    println!(
341        "BLAS Pre-computation Time Elapsed {:?}",
342        s.elapsed().as_secs()
343    );
344
345    let n_sources = 1000000;
347    let sources = points_fixture::<T::Real>(n_sources, None, None, Some(0));
348    let n_vecs = 1;
349    let tmp = vec![T::one(); n_sources * n_vecs];
350    let mut charges = rlst_dynamic_array2!(T, [n_sources, n_vecs]);
351    charges.data_mut().copy_from_slice(&tmp);
352
353    let mut progress = 0;
354    for (i, fmm, setup_time) in fmms.lock().unwrap().iter_mut() {
355        let (surface_diff, svd_threshold, depth, expansion_order, rsvd_settings) =
356            parameters_cloned[*i];
357
358        let svd_threshold = svd_threshold.unwrap_or(T::zero().re());
359
360        let s = Instant::now();
361        fmm.evaluate().unwrap();
362        let time = s.elapsed().as_millis() as f32;
363        progress += 1;
364        println!("BLAS Evaluated {progress:?}/{n_params:?}");
365
366        let leaf_idx = 1;
367        let leaf = fmm.tree().target_tree().all_leaves().unwrap()[leaf_idx];
368        let potential = fmm.potential(&leaf).unwrap()[0];
369        let leaf_targets = fmm.tree().target_tree().coordinates(&leaf).unwrap();
370        let n_targets = leaf_targets.len() / fmm.dim();
371        let mut direct = vec![T::zero(); n_targets];
372        fmm.kernel().evaluate_st(
373            GreenKernelEvalType::Value,
374            sources.data(),
375            leaf_targets,
376            charges.data(),
377            &mut direct,
378        );
379
380        let rel_error = direct
381            .iter()
382            .zip(potential)
383            .map(|(&d, &p)| {
384                let abs_error = RlstScalar::abs(d - p);
385                abs_error / p
386            })
387            .collect_vec();
388
389        let min_rel_err = rel_error
390            .iter()
391            .filter(|&&x| !x.is_nan())
392            .cloned()
393            .min_by(|a, b| a.partial_cmp(b).unwrap())
394            .unwrap();
395        let max_rel_err = rel_error
396            .iter()
397            .filter(|&&x| !x.is_nan())
398            .cloned()
399            .max_by(|a, b| a.partial_cmp(b).unwrap())
400            .unwrap();
401
402        let mean_rel_err: T = rel_error.into_iter().sum::<T>() / T::from(direct.len()).unwrap();
403
404        let n_iter_;
405        let n_oversamples_;
406        match rsvd_settings {
407            FmmSvdMode::Random {
408                n_components: _,
409                normaliser,
410                n_oversamples,
411                random_state: _,
412            } => {
413                if let Some(Normaliser::Qr(n)) = normaliser {
414                    n_iter_ = *n
415                } else {
416                    n_iter_ = 0;
417                }
418                n_oversamples_ = n_oversamples.unwrap();
419            }
420
421            FmmSvdMode::Deterministic => {
422                n_iter_ = 0usize;
423                n_oversamples_ = 0usize;
424            }
425        }
426
427        writer
428            .write_record(&[
429                depth.to_string(),
430                surface_diff.to_string(),
431                svd_threshold.to_string(),
432                expansion_order.to_string(),
433                time.to_string(),
434                min_rel_err.to_string(),
435                mean_rel_err.to_string(),
436                max_rel_err.to_string(),
437                n_iter_.to_string(),
438                n_oversamples_.to_string(),
439                (setup_time.as_millis() as f32).to_string(),
440            ])
441            .unwrap();
442    }
443}
444
445fn grid_search_laplace_fft<T>(
446    filename: String,
447    n_points: usize,
448    expansion_order_vec: &[usize],
449    depth_vec: &[u64],
450    block_size_vec: &[usize],
451) where
452    T: RlstScalar<Real = T>
453        + AsComplex
454        + Default
455        + Dft<InputType = T, OutputType = <T as AsComplex>::ComplexType>
456        + SampleUniform
457        + Float
458        + Epsilon
459        + AlignedAllocable
460        + MatrixSvd
461        + MatrixQr
462        + Upcast
463        + ArgmaxValue<T>
464        + Cast<<T as Upcast>::Higher>
465        + Cast<<<T as Upcast>::Higher as RlstScalar>::Real>,
466    <T as AsComplex>::ComplexType:
467        Hadamard8x8<Scalar = <T as AsComplex>::ComplexType> + AlignedAllocable,
468    <T as Dft>::Plan: Sync,
469    <T as Upcast>::Higher: RlstScalar + MatrixSvd + Epsilon + Cast<T>,
470    <<T as Upcast>::Higher as RlstScalar>::Real: Epsilon + MatrixSvd + Cast<T::Real>,
471{
472    let prune_empty = true;
474
475    let parameters = iproduct!(
476        depth_vec.iter(),
477        expansion_order_vec.iter(),
478        block_size_vec.iter()
479    )
480    .map(|(depth, expansion_order, block_size)| (*depth, *expansion_order, *block_size))
481    .collect_vec();
482
483    let parameters_cloned = parameters.iter().cloned().collect_vec();
484
485    let fmms = Mutex::new(Vec::new());
486    let progress = Mutex::new(0usize);
487
488    let n_params = parameters.len();
489
490    let s = Instant::now();
491    parameters
492        .into_iter()
493        .enumerate()
494        .for_each(|(i, (depth, expansion_order, block_size))| {
495            let expansion_order = vec![expansion_order; (depth + 1) as usize];
496            let n_sources = n_points;
498            let n_targets = n_points;
499            let sources = points_fixture::<T::Real>(n_sources, None, None, Some(0));
500            let targets = points_fixture::<T::Real>(n_targets, None, None, Some(1));
501            let n_vecs = 1;
502            let tmp = vec![T::one(); n_sources * n_vecs];
503            let mut charges = rlst_dynamic_array2!(T, [n_sources, n_vecs]);
504            charges.data_mut().copy_from_slice(&tmp);
505
506            let s = Instant::now();
507            let fmm = SingleNodeBuilder::new(true)
508                .tree(
509                    sources.data(),
510                    targets.data(),
511                    None,
512                    Some(depth),
513                    prune_empty,
514                )
515                .unwrap()
516                .parameters(
517                    charges.data(),
518                    &expansion_order,
519                    Laplace3dKernel::new(),
520                    GreenKernelEvalType::Value,
521                    FftFieldTranslation::new(Some(block_size)),
522                    None,
523                )
524                .unwrap()
525                .build()
526                .unwrap();
527            let setup_time = s.elapsed();
528
529            fmms.lock().unwrap().push((i, fmm, setup_time));
530            *progress.lock().unwrap().deref_mut() += 1;
531
532            println!(
533                "FFT Pre-computed {:?}/{:?}",
534                *progress.lock().unwrap(),
535                n_params
536            );
537        });
538
539    println!(
540        "FFT Pre-computation Time Elapsed {:?}",
541        s.elapsed().as_secs()
542    );
543    let file = File::create(format!("{filename}.csv")).unwrap();
544    let mut writer = Writer::from_writer(file);
545    writer
546        .write_record(&[
547            "depth".to_string(),
548            "expansion_order".to_string(),
549            "block_size".to_string(),
550            "runtime".to_string(),
551            "min_rel_err".to_string(),
552            "mean_rel_err".to_string(),
553            "max_rel_err".to_string(),
554            "setup_time".to_string(),
555        ])
556        .unwrap();
557
558    let n_sources = n_points;
559    let sources = points_fixture::<T::Real>(n_sources, None, None, Some(0));
560    let n_vecs = 1;
561    let tmp = vec![T::one(); n_sources * n_vecs];
562    let mut charges = rlst_dynamic_array2!(T, [n_sources, n_vecs]);
563    charges.data_mut().copy_from_slice(&tmp);
564
565    let mut progress = 0;
566    for (i, fmm, setup_time) in fmms.lock().unwrap().iter_mut() {
567        let (depth, expansion_order, block_size) = parameters_cloned[*i];
568
569        let s = Instant::now();
570        fmm.evaluate().unwrap();
571        let time = s.elapsed().as_millis() as f32;
572        progress += 1;
573        println!("FFT Evaluated {progress:?}/{n_params:?}");
574
575        let leaf_idx = 1;
576        let leaf = fmm.tree().target_tree().all_leaves().unwrap()[leaf_idx];
577        let potential = fmm.potential(&leaf).unwrap()[0];
578        let leaf_targets = fmm.tree().target_tree().coordinates(&leaf).unwrap();
579        let n_targets = leaf_targets.len() / fmm.dim();
580        let mut direct = vec![T::zero(); n_targets];
581        fmm.kernel().evaluate_st(
582            GreenKernelEvalType::Value,
583            sources.data(),
584            leaf_targets,
585            charges.data(),
586            &mut direct,
587        );
588
589        let rel_error = direct
590            .iter()
591            .zip(potential)
592            .map(|(&d, &p)| {
593                let abs_error = RlstScalar::abs(d - p);
594                abs_error / p
595            })
596            .collect_vec();
597
598        let min_rel_err = rel_error
599            .iter()
600            .filter(|&&x| !x.is_nan())
601            .cloned()
602            .min_by(|a, b| a.partial_cmp(b).unwrap())
603            .unwrap();
604
605        let max_rel_err = rel_error
606            .iter()
607            .filter(|&&x| !x.is_nan())
608            .cloned()
609            .max_by(|a, b| a.partial_cmp(b).unwrap())
610            .unwrap();
611
612        let mean_rel_err: T = rel_error.into_iter().sum::<T>() / T::from(direct.len()).unwrap();
613
614        writer
615            .write_record(&[
616                depth.to_string(),
617                expansion_order.to_string(),
618                block_size.to_string(),
619                time.to_string(),
620                min_rel_err.to_string(),
621                mean_rel_err.to_string(),
622                max_rel_err.to_string(),
623                (setup_time.as_millis() as f32).to_string(),
624            ])
625            .unwrap();
626    }
627}
628
629fn main() {
630    let max_m2l_fft_block_size_vec = vec![16, 32, 64, 128];
631    let rsvd_settings_vec = [FmmSvdMode::new(false, None, None, None, None)];
632
633    {
635        let expansion_order_vec: Vec<usize> = vec![3];
636
637        let svd_threshold_vec = vec![None, Some(1e-7), Some(1e-5), Some(1e-3), Some(1e-1)];
638
639        let surface_diff_vec: Vec<usize> = vec![0, 1, 2];
640        let depth_vec: Vec<u64> = vec![4, 5];
641
642        let n_points = 10000;
643
644        grid_search_laplace_fft::<f32>(
645            "grid_search_laplace_fft_f32_m1".to_string(),
646            n_points,
647            &expansion_order_vec,
648            &depth_vec,
649            &max_m2l_fft_block_size_vec,
650        );
651
652        for (i, &rsvd_settings) in rsvd_settings_vec.iter().enumerate() {
653            grid_search_laplace_blas_svd::<f32>(
654                format!("grid_search_laplace_blas_f32_m1_{i}").to_string(),
655                n_points,
656                &expansion_order_vec,
657                &svd_threshold_vec,
658                &surface_diff_vec,
659                &depth_vec,
660                &[rsvd_settings],
661            );
662        }
663    }
664}