green_kernels/
traits.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
//! Trait for Green's function kernels

use crate::types::GreenKernelEvalType;
#[cfg(feature = "mpi")]
use mpi::traits::{Communicator, Equivalence, Root};
use rlst::RlstScalar;
#[cfg(feature = "mpi")]
use rlst::{rlst_dynamic_array1, RawAccess, RawAccessMut};

/// Interface to evaluating Green's functions for given sources and targets.
pub trait Kernel: Sync {
    /// The scalar type
    type T: RlstScalar;

    /// Evaluate the Green's fct. for a single source and single target.
    fn greens_fct(
        &self,
        eval_type: GreenKernelEvalType,
        source: &[<Self::T as RlstScalar>::Real],
        target: &[<Self::T as RlstScalar>::Real],
        result: &mut [Self::T],
    );

    /// Single threaded evaluation of Green's functions.
    ///
    /// - `eval_type`: Either [EvalType::Value] to only return Green's function values
    ///              or [EvalType::ValueDeriv] to return values and derivatives.
    /// - `sources`: A slice defining the source points. The points must be given in the form
    ///            `[x_1, x_2, ... x_N, y_1, y_2, ..., y_N, z_1, z_2, ..., z_N]`, that is
    ///            the value for each dimension must be continuously contained in the slice.
    /// - `targets`: A slice defining the targets. The memory layout is the same as for sources.
    /// - `charges`: A slice defining the charges. For each source point there needs to be one charge.
    /// - `result`: The result array. If the kernel is RlstScalar and `eval_type` has the value [EvalType::Value]
    ///           then `result` has the same number of elemens as there are targets. For a RlstScalar kernel
    ///           in three dimensional space if [EvalType::ValueDeriv] was chosen then `result` contains
    ///           for each target in consecutive order the value of the kernel and the three components
    ///           of its derivative.
    ///
    fn evaluate_st(
        &self,
        eval_type: GreenKernelEvalType,
        sources: &[<Self::T as RlstScalar>::Real],
        targets: &[<Self::T as RlstScalar>::Real],
        charges: &[Self::T],
        result: &mut [Self::T],
    );

    /// Multi-threaded evaluation of a Green's function kernel.
    ///
    /// The method parallelizes over the given targets. It expects a Rayon `ThreadPool`
    /// in which the multi-threaded execution can be scheduled.
    fn evaluate_mt(
        &self,
        eval_type: GreenKernelEvalType,
        sources: &[<Self::T as RlstScalar>::Real],
        targets: &[<Self::T as RlstScalar>::Real],
        charges: &[Self::T],
        result: &mut [Self::T],
    );

    /// Single threaded assembly of a kernel matrix.
    ///
    /// - `eval_type`: Either [EvalType::Value] to only return Green's function values
    ///              or [EvalType::ValueDeriv] to return values and derivatives.
    /// - `sources`: A slice defining the source points. The points must be given in the form
    ///            `[x_1, x_2, ... x_N, y_1, y_2, ..., y_N, z_1, z_2, ..., z_N]`, that is
    ///            the value for each dimension must be continuously contained in the slice.
    /// - `targets`: A slice defining the targets. The memory layout is the same as for sources.
    /// - `result`: The result array. If the kernel is RlstScalar and `eval_type` has the value [EvalType::Value]
    ///           then `result` is equivalent to a column major matrix of dimension [S, T], where S is the number of sources and
    ///           T is the number of targets. Hence, for each target all corresponding source evaluations are consecutively in memory.
    ///           For a RlstScalar kernel in three dimensional space if [EvalType::ValueDeriv] was chosen then `result` is equivalent
    ///           to a column-major matrix of dimension [4 * S, T], where the first 4 rows are the values of Green's fct. value and
    ///           derivatives for the first source and all targets. The next 4 rows correspond to values and derivatives of second source
    ///           with all targets and so on.
    ///
    fn assemble_st(
        &self,
        eval_type: GreenKernelEvalType,
        sources: &[<Self::T as RlstScalar>::Real],
        targets: &[<Self::T as RlstScalar>::Real],
        result: &mut [Self::T],
    );

    /// Multi-threaded version of kernel matrix assembly.
    fn assemble_mt(
        &self,
        eval_type: GreenKernelEvalType,
        sources: &[<Self::T as RlstScalar>::Real],
        targets: &[<Self::T as RlstScalar>::Real],
        result: &mut [Self::T],
    );

    /// Single threaded assembly of the diagonal of a kernel matrix
    fn assemble_pairwise_st(
        &self,
        eval_type: GreenKernelEvalType,
        sources: &[<Self::T as RlstScalar>::Real],
        targets: &[<Self::T as RlstScalar>::Real],
        result: &mut [Self::T],
    );

    /// Return the domain component count of the Green's fct.
    ///
    /// For a RlstScalar kernel this is `1`.
    fn domain_component_count(&self) -> usize;

    /// Return the space dimension.
    fn space_dimension(&self) -> usize;

    /// Return the range component count of the Green's fct.
    ///
    /// For a RlstScalar kernel this is `1` if [EvalType::Value] is
    /// given, and `4` if [EvalType::ValueDeriv] is given.
    fn range_component_count(&self, eval_type: GreenKernelEvalType) -> usize;
}

// Note that we cannot just add the `evaluate_distributed` method to the `Kernel` trait
// since currently the C interface is implemented by making `Kernel` a trait object.
// This requires that methods do not introduce additional template parameters. Can change this
// again once we move to the better C interface in `c-api-tools`.

/// Distributed evaluation of a Green's function kernel.
///
/// If `use_multithreaded` is set to true, the evaluation uses Rayon multi-threading on each rank.
/// Otherwise, the evaluation on each rank is single-threaded.
#[cfg(feature = "mpi")]
pub trait DistributedKernelEvaluator: Kernel {
    fn evaluate_distributed<C: Communicator>(
        &self,
        eval_type: GreenKernelEvalType,
        sources: &[<Self::T as RlstScalar>::Real],
        targets: &[<Self::T as RlstScalar>::Real],
        charges: &[Self::T],
        result: &mut [Self::T],
        use_multithreaded: bool,
        comm: &C,
    ) where
        Self::T: Equivalence,
        <Self::T as RlstScalar>::Real: Equivalence,
    {
        // Check that the number of sources and number of charges are compatible.
        assert_eq!(sources.len(), 3 * charges.len());

        // Check that the output vector has the correct size.
        // Multiply result by 3 since targets have 3 components (x, y, z) direction.
        assert_eq!(
            self.range_component_count(eval_type) * targets.len(),
            3 * result.len()
        );

        let size = comm.size();

        // We now iterate through each rank associated with the sources and communicate from that rank
        // the sources to all target ranks.

        for rank in 0..size as usize {
            // Communicate the sources and charges from `rank` to all ranks.

            // We first need to tell all ranks how many sources and charges we have.
            let root_process = comm.process_at_rank(rank as i32);

            let nsources = {
                let mut nsources;
                if comm.rank() == rank as i32 {
                    nsources = charges.len();
                } else {
                    nsources = 0;
                }
                root_process.broadcast_into(&mut nsources);
                nsources
            };

            let mut root_sources =
                rlst_dynamic_array1!(<Self::T as RlstScalar>::Real, [3 * nsources]);
            let mut root_charges = rlst_dynamic_array1!(Self::T, [nsources]);

            if comm.rank() == rank as i32 {
                root_sources.data_mut().copy_from_slice(sources);
                root_charges.data_mut().copy_from_slice(charges);
            }

            root_process.broadcast_into(&mut root_sources.data_mut()[..]);
            root_process.broadcast_into(&mut root_charges.data_mut()[..]);

            // We now have the sources and charges on all ranks. We can now simply evaluate.

            if use_multithreaded {
                self.evaluate_mt(
                    eval_type,
                    &root_sources.data()[..],
                    targets,
                    &root_charges.data()[..],
                    result,
                );
            } else {
                self.evaluate_st(
                    eval_type,
                    &root_sources.data()[..],
                    targets,
                    &root_charges.data()[..],
                    result,
                );
            }
        }
    }
}

#[cfg(feature = "mpi")]
impl<K: Kernel> DistributedKernelEvaluator for K {}