grid_search_single_node/
grid_search_single_node.rs

1//! Optimal parameter search on a single node for the Laplace problem
2use std::{fs::File, ops::DerefMut, sync::Mutex, time::Instant};
3
4use csv::Writer;
5use green_kernels::{laplace_3d::Laplace3dKernel, traits::Kernel, types::GreenKernelEvalType};
6use itertools::{iproduct, Itertools};
7use kifmm::{
8    fftw::array::AlignedAllocable,
9    fmm::types::FmmSvdMode,
10    linalg::rsvd::{MatrixRsvd, Normaliser},
11    traits::{
12        fftw::Dft,
13        fmm::{DataAccess, Evaluate},
14        general::single_node::{AsComplex, Epsilon, Hadamard8x8},
15        tree::{SingleFmmTree, SingleTree},
16    },
17    tree::helpers::points_fixture,
18    BlasFieldTranslationSaRcmp, FftFieldTranslation, SingleNodeBuilder,
19};
20use num::Float;
21use rand::distributions::uniform::SampleUniform;
22use rlst::{rlst_dynamic_array2, MatrixSvd, RawAccess, RawAccessMut, RlstScalar};
23
24fn grid_search_laplace_blas<T>(
25    filename: String,
26    n_points: usize,
27    expansion_order_vec: &[usize],
28    svd_threshold_vec: &[Option<T>],
29    surface_diff_vec: &[usize],
30    depth_vec: &[u64],
31    rsvd_settings_vec: &[FmmSvdMode],
32) where
33    T: RlstScalar<Real = T> + Epsilon + Float + Default + SampleUniform + MatrixRsvd,
34{
35    // FMM parameters
36    let prune_empty = true;
37
38    let parameters = iproduct!(
39        surface_diff_vec.iter(),
40        svd_threshold_vec.iter(),
41        depth_vec.iter(),
42        expansion_order_vec.iter(),
43        rsvd_settings_vec.iter()
44    )
45    .map(
46        |(surface_diff, svd_threshold, depth, expansion_order, rsvd_settings)| {
47            (
48                *surface_diff,
49                *svd_threshold,
50                *depth,
51                *expansion_order,
52                rsvd_settings,
53            )
54        },
55    )
56    .collect_vec();
57
58    let parameters_cloned = parameters.iter().cloned().collect_vec();
59
60    let fmms = Mutex::new(Vec::new());
61    let progress = Mutex::new(0usize);
62
63    let n_params = parameters.len();
64
65    // Setup random sources and targets
66    let n_sources = n_points;
67    let n_targets = n_points;
68    let sources = points_fixture::<T::Real>(n_sources, None, None, Some(0));
69    let targets = points_fixture::<T::Real>(n_targets, None, None, Some(1));
70    let n_vecs = 1;
71    let tmp = vec![T::one(); n_sources * n_vecs];
72    let mut charges = rlst_dynamic_array2!(T, [n_sources, n_vecs]);
73    charges.data_mut().copy_from_slice(&tmp);
74
75    let s = Instant::now();
76    parameters.into_iter().enumerate().for_each(
77        |(i, (surface_diff, svd_threshold, depth, expansion_order, rsvd_settings))| {
78            let expansion_order = vec![expansion_order; (depth + 1) as usize];
79
80            let s = Instant::now();
81            let fmm = SingleNodeBuilder::new(true)
82                .tree(
83                    sources.data(),
84                    targets.data(),
85                    None,
86                    Some(depth),
87                    prune_empty,
88                )
89                .unwrap()
90                .parameters(
91                    charges.data(),
92                    &expansion_order,
93                    Laplace3dKernel::new(),
94                    GreenKernelEvalType::Value,
95                    BlasFieldTranslationSaRcmp::new(
96                        svd_threshold,
97                        Some(surface_diff),
98                        *rsvd_settings,
99                    ),
100                )
101                .unwrap()
102                .build()
103                .unwrap();
104            let setup_time = s.elapsed();
105            fmms.lock().unwrap().push((i, fmm, setup_time));
106            *progress.lock().unwrap().deref_mut() += 1;
107
108            println!(
109                "BLAS Pre-computed {:?}/{:?}",
110                *progress.lock().unwrap(),
111                n_params
112            );
113        },
114    );
115
116    let file = File::create(format!("{filename}.csv")).unwrap();
117    let mut writer = Writer::from_writer(file);
118    writer
119        .write_record(&[
120            "depth".to_string(),
121            "surface_diff".to_string(),
122            "svd_threshold".to_string(),
123            "expansion_order".to_string(),
124            "runtime".to_string(),
125            "min_rel_err".to_string(),
126            "mean_rel_err".to_string(),
127            "max_rel_err".to_string(),
128            "n_iter".to_string(),
129            "n_oversamples".to_string(),
130            "setup_time".to_string(),
131        ])
132        .unwrap();
133
134    println!(
135        "BLAS Pre-computation Time Elapsed {:?}",
136        s.elapsed().as_secs()
137    );
138
139    // Setup random sources and targets
140    let n_sources = 1000000;
141    let sources = points_fixture::<T::Real>(n_sources, None, None, Some(0));
142    let n_vecs = 1;
143    let tmp = vec![T::one(); n_sources * n_vecs];
144    let mut charges = rlst_dynamic_array2!(T, [n_sources, n_vecs]);
145    charges.data_mut().copy_from_slice(&tmp);
146
147    let mut progress = 0;
148    for (i, fmm, setup_time) in fmms.lock().unwrap().iter_mut() {
149        let (surface_diff, svd_threshold, depth, expansion_order, rsvd_settings) =
150            parameters_cloned[*i];
151
152        let svd_threshold = svd_threshold.unwrap_or(T::zero().re());
153
154        let s = Instant::now();
155        fmm.evaluate().unwrap();
156        let time = s.elapsed().as_millis() as f32;
157        progress += 1;
158        println!("BLAS Evaluated {progress:?}/{n_params:?}");
159
160        let leaf_idx = 1;
161        let leaf = fmm.tree().target_tree().all_leaves().unwrap()[leaf_idx];
162        let potential = fmm.potential(&leaf).unwrap()[0];
163        let leaf_targets = fmm.tree().target_tree().coordinates(&leaf).unwrap();
164        let n_targets = leaf_targets.len() / fmm.dim();
165        let mut direct = vec![T::zero(); n_targets];
166        fmm.kernel().evaluate_st(
167            GreenKernelEvalType::Value,
168            sources.data(),
169            leaf_targets,
170            charges.data(),
171            &mut direct,
172        );
173
174        let rel_error = direct
175            .iter()
176            .zip(potential)
177            .map(|(&d, &p)| {
178                let abs_error = RlstScalar::abs(d - p);
179                abs_error / p
180            })
181            .collect_vec();
182
183        let min_rel_err = rel_error
184            .iter()
185            .filter(|&&x| !x.is_nan())
186            .cloned()
187            .min_by(|a, b| a.partial_cmp(b).unwrap())
188            .unwrap();
189        let max_rel_err = rel_error
190            .iter()
191            .filter(|&&x| !x.is_nan())
192            .cloned()
193            .max_by(|a, b| a.partial_cmp(b).unwrap())
194            .unwrap();
195
196        let mean_rel_err: T = rel_error.into_iter().sum::<T>() / T::from(direct.len()).unwrap();
197
198        let n_iter_;
199        let n_oversamples_;
200        match rsvd_settings {
201            FmmSvdMode::Random {
202                n_components: _,
203                normaliser,
204                n_oversamples,
205                random_state: _,
206            } => {
207                if let Some(Normaliser::Qr(n)) = normaliser {
208                    n_iter_ = *n
209                } else {
210                    n_iter_ = 0;
211                }
212                n_oversamples_ = n_oversamples.unwrap();
213            }
214
215            FmmSvdMode::Deterministic => {
216                n_iter_ = 0usize;
217                n_oversamples_ = 0usize;
218            }
219        }
220
221        writer
222            .write_record(&[
223                depth.to_string(),
224                surface_diff.to_string(),
225                svd_threshold.to_string(),
226                expansion_order.to_string(),
227                time.to_string(),
228                min_rel_err.to_string(),
229                mean_rel_err.to_string(),
230                max_rel_err.to_string(),
231                n_iter_.to_string(),
232                n_oversamples_.to_string(),
233                (setup_time.as_millis() as f32).to_string(),
234            ])
235            .unwrap();
236    }
237}
238
239fn grid_search_laplace_fft<T>(
240    filename: String,
241    n_points: usize,
242    expansion_order_vec: &[usize],
243    depth_vec: &[u64],
244    block_size_vec: &[usize],
245) where
246    T: RlstScalar<Real = T>
247        + AsComplex
248        + Default
249        + Dft<InputType = T, OutputType = <T as AsComplex>::ComplexType>
250        + SampleUniform
251        + Float
252        + Epsilon
253        + AlignedAllocable
254        + MatrixSvd,
255    <T as AsComplex>::ComplexType:
256        Hadamard8x8<Scalar = <T as AsComplex>::ComplexType> + AlignedAllocable,
257    <T as Dft>::Plan: Sync,
258{
259    // FMM parameters
260    let prune_empty = true;
261
262    let parameters = iproduct!(
263        depth_vec.iter(),
264        expansion_order_vec.iter(),
265        block_size_vec.iter()
266    )
267    .map(|(depth, expansion_order, block_size)| (*depth, *expansion_order, *block_size))
268    .collect_vec();
269
270    let parameters_cloned = parameters.iter().cloned().collect_vec();
271
272    let fmms = Mutex::new(Vec::new());
273    let progress = Mutex::new(0usize);
274
275    let n_params = parameters.len();
276
277    let s = Instant::now();
278    parameters
279        .into_iter()
280        .enumerate()
281        .for_each(|(i, (depth, expansion_order, block_size))| {
282            let expansion_order = vec![expansion_order; (depth + 1) as usize];
283            // Setup random sources and targets
284            let n_sources = n_points;
285            let n_targets = n_points;
286            let sources = points_fixture::<T::Real>(n_sources, None, None, Some(0));
287            let targets = points_fixture::<T::Real>(n_targets, None, None, Some(1));
288            let n_vecs = 1;
289            let tmp = vec![T::one(); n_sources * n_vecs];
290            let mut charges = rlst_dynamic_array2!(T, [n_sources, n_vecs]);
291            charges.data_mut().copy_from_slice(&tmp);
292
293            let s = Instant::now();
294            let fmm = SingleNodeBuilder::new(true)
295                .tree(
296                    sources.data(),
297                    targets.data(),
298                    None,
299                    Some(depth),
300                    prune_empty,
301                )
302                .unwrap()
303                .parameters(
304                    charges.data(),
305                    &expansion_order,
306                    Laplace3dKernel::new(),
307                    GreenKernelEvalType::Value,
308                    FftFieldTranslation::new(Some(block_size)),
309                )
310                .unwrap()
311                .build()
312                .unwrap();
313            let setup_time = s.elapsed();
314
315            fmms.lock().unwrap().push((i, fmm, setup_time));
316            *progress.lock().unwrap().deref_mut() += 1;
317
318            println!(
319                "FFT Pre-computed {:?}/{:?}",
320                *progress.lock().unwrap(),
321                n_params
322            );
323        });
324
325    println!(
326        "FFT Pre-computation Time Elapsed {:?}",
327        s.elapsed().as_secs()
328    );
329    let file = File::create(format!("{filename}.csv")).unwrap();
330    let mut writer = Writer::from_writer(file);
331    writer
332        .write_record(&[
333            "depth".to_string(),
334            "expansion_order".to_string(),
335            "block_size".to_string(),
336            "runtime".to_string(),
337            "min_rel_err".to_string(),
338            "mean_rel_err".to_string(),
339            "max_rel_err".to_string(),
340            "setup_time".to_string(),
341        ])
342        .unwrap();
343
344    let n_sources = n_points;
345    let sources = points_fixture::<T::Real>(n_sources, None, None, Some(0));
346    let n_vecs = 1;
347    let tmp = vec![T::one(); n_sources * n_vecs];
348    let mut charges = rlst_dynamic_array2!(T, [n_sources, n_vecs]);
349    charges.data_mut().copy_from_slice(&tmp);
350
351    let mut progress = 0;
352    for (i, fmm, setup_time) in fmms.lock().unwrap().iter_mut() {
353        let (depth, expansion_order, block_size) = parameters_cloned[*i];
354
355        let s = Instant::now();
356        fmm.evaluate().unwrap();
357        let time = s.elapsed().as_millis() as f32;
358        progress += 1;
359        println!("FFT Evaluated {progress:?}/{n_params:?}");
360
361        let leaf_idx = 1;
362        let leaf = fmm.tree().target_tree().all_leaves().unwrap()[leaf_idx];
363        let potential = fmm.potential(&leaf).unwrap()[0];
364        let leaf_targets = fmm.tree().target_tree().coordinates(&leaf).unwrap();
365        let n_targets = leaf_targets.len() / fmm.dim();
366        let mut direct = vec![T::zero(); n_targets];
367        fmm.kernel().evaluate_st(
368            GreenKernelEvalType::Value,
369            sources.data(),
370            leaf_targets,
371            charges.data(),
372            &mut direct,
373        );
374
375        let rel_error = direct
376            .iter()
377            .zip(potential)
378            .map(|(&d, &p)| {
379                let abs_error = RlstScalar::abs(d - p);
380                abs_error / p
381            })
382            .collect_vec();
383
384        let min_rel_err = rel_error
385            .iter()
386            .filter(|&&x| !x.is_nan())
387            .cloned()
388            .min_by(|a, b| a.partial_cmp(b).unwrap())
389            .unwrap();
390
391        let max_rel_err = rel_error
392            .iter()
393            .filter(|&&x| !x.is_nan())
394            .cloned()
395            .max_by(|a, b| a.partial_cmp(b).unwrap())
396            .unwrap();
397
398        let mean_rel_err: T = rel_error.into_iter().sum::<T>() / T::from(direct.len()).unwrap();
399
400        writer
401            .write_record(&[
402                depth.to_string(),
403                expansion_order.to_string(),
404                block_size.to_string(),
405                time.to_string(),
406                min_rel_err.to_string(),
407                mean_rel_err.to_string(),
408                max_rel_err.to_string(),
409                (setup_time.as_millis() as f32).to_string(),
410            ])
411            .unwrap();
412    }
413}
414
415fn main() {
416    let max_m2l_fft_block_size_vec = vec![16, 32, 64, 128];
417    let rsvd_settings_vec = [FmmSvdMode::new(false, None, None, None, None)];
418
419    // Single Precision
420    {
421        let expansion_order_vec: Vec<usize> = vec![3];
422
423        let svd_threshold_vec = vec![None, Some(1e-7), Some(1e-5), Some(1e-3), Some(1e-1)];
424
425        let surface_diff_vec: Vec<usize> = vec![0, 1, 2];
426        let depth_vec: Vec<u64> = vec![4, 5];
427
428        let n_points = 10000;
429
430        grid_search_laplace_fft::<f32>(
431            "grid_search_laplace_fft_f32_m1".to_string(),
432            n_points,
433            &expansion_order_vec,
434            &depth_vec,
435            &max_m2l_fft_block_size_vec,
436        );
437
438        for (i, &rsvd_settings) in rsvd_settings_vec.iter().enumerate() {
439            grid_search_laplace_blas::<f32>(
440                format!("grid_search_laplace_blas_f32_m1_{i}").to_string(),
441                n_points,
442                &expansion_order_vec,
443                &svd_threshold_vec,
444                &surface_diff_vec,
445                &depth_vec,
446                &[rsvd_settings],
447            );
448        }
449    }
450}