Skip to content

Attentive Kernel

sgptools.kernels.attentive_kernel

Attentive Kernel function

AttentiveKernel

Bases: Kernel

Attentive Kernel function (non-stationary kernel function). Based on the implementation from this repo

Refer to the following papers for more details
  • AK: Attentive Kernel for Information Gathering [Chen et al., 2022]

Parameters:

Name Type Description Default
lengthscales List

List of lengthscales to use in the mixture components. The lengthscales are not trained.

required
amplitude int

Initial amplitude of the kernel function

1.0
dim_hidden int

Number of MLP hidden layer nodes (The NN will have two of these layers)

10
num_dim int

Number of dimensions of the data points

2
Source code in sgptools/kernels/attentive_kernel.py
class AttentiveKernel(gpflow.kernels.Kernel):
    """Attentive Kernel function (non-stationary kernel function). 
    Based on the implementation from this [repo](https://github.com/Weizhe-Chen/attentive_kernels)

    Refer to the following papers for more details:
        - AK: Attentive Kernel for Information Gathering [Chen et al., 2022]

    Args:
        lengthscales (List): List of lengthscales to use in the mixture components. The lengthscales are not trained.
        amplitude (int): Initial amplitude of the kernel function
        dim_hidden (int): Number of MLP hidden layer nodes (The NN will have two of these layers)
        num_dim (int): Number of dimensions of the data points
    """
    def __init__(self, 
                 lengthscales, 
                 dim_hidden=10,
                 amplitude=1.0,
                 num_dim=2): 
        super().__init__()
        with self.name_scope:
            self.num_lengthscales = len(lengthscales)
            self._free_amplitude = tf.Variable(amplitude, 
                                               shape=[],
                                               trainable=True,
                                               dtype=float_type)
            self.lengthscales = tf.Variable(lengthscales, 
                                            shape=[self.num_lengthscales], 
                                            trainable=False,
                                            dtype=float_type)

            self.nn = NN([num_dim, dim_hidden, dim_hidden, self.num_lengthscales])

    def get_representations(self, X):
        Z = self.nn(X)
        representations = Z / tf.norm(Z, axis=1, keepdims=True)
        return representations

    def K(self, X, X2=None):
        """Computes the covariances between/amongst the input variables

        Args:
            X (ndarray): Variables to compute the covariance matrix
            X2 (ndarray): If passed, the covariance between X and X2 is computed. Otherwise, 
                          the covariance between X and X is computed.

        Returns:
            cov (ndarray): covariance matrix
        """
        if X2 is None:
            X2 = X

        dist = cdist(X, X2)
        repre1 = self.get_representations(X)
        repre2 = self.get_representations(X2)

        def get_mixture_component(i):
            attention_lengthscales = tf.tensordot(repre1[:, i], repre2[:, i], axes=0)
            cov_mat = rbf(dist, self.lengthscales[i]) * attention_lengthscales   
            return cov_mat

        cov_mat = tf.map_fn(fn=get_mixture_component, 
                            elems=tf.range(self.num_lengthscales, dtype=tf.int64), 
                            fn_output_signature=dist.dtype)
        cov_mat = tf.math.reduce_sum(cov_mat, axis=0)
        attention_inputs = repre1 @ tf.transpose(repre2)
        cov_mat *= self._free_amplitude * attention_inputs

        return cov_mat

    def K_diag(self, X):
        return self._free_amplitude * tf.ones((X.shape[0]), dtype=X.dtype)

K(X, X2=None)

Computes the covariances between/amongst the input variables

Parameters:

Name Type Description Default
X ndarray

Variables to compute the covariance matrix

required
X2 ndarray

If passed, the covariance between X and X2 is computed. Otherwise, the covariance between X and X is computed.

None

Returns:

Name Type Description
cov ndarray

covariance matrix

Source code in sgptools/kernels/attentive_kernel.py
def K(self, X, X2=None):
    """Computes the covariances between/amongst the input variables

    Args:
        X (ndarray): Variables to compute the covariance matrix
        X2 (ndarray): If passed, the covariance between X and X2 is computed. Otherwise, 
                      the covariance between X and X is computed.

    Returns:
        cov (ndarray): covariance matrix
    """
    if X2 is None:
        X2 = X

    dist = cdist(X, X2)
    repre1 = self.get_representations(X)
    repre2 = self.get_representations(X2)

    def get_mixture_component(i):
        attention_lengthscales = tf.tensordot(repre1[:, i], repre2[:, i], axes=0)
        cov_mat = rbf(dist, self.lengthscales[i]) * attention_lengthscales   
        return cov_mat

    cov_mat = tf.map_fn(fn=get_mixture_component, 
                        elems=tf.range(self.num_lengthscales, dtype=tf.int64), 
                        fn_output_signature=dist.dtype)
    cov_mat = tf.math.reduce_sum(cov_mat, axis=0)
    attention_inputs = repre1 @ tf.transpose(repre2)
    cov_mat *= self._free_amplitude * attention_inputs

    return cov_mat

cdist(x, y)

Calculate the pairwise euclidean distances

Source code in sgptools/kernels/attentive_kernel.py
def cdist(x, y):
    '''
    Calculate the pairwise euclidean distances
    '''
    # Calculate distance for a single row of x.
    per_x_dist = lambda i : tf.norm(x[i:(i+1),:] - y, axis=1)
    # Compute and stack distances for all rows of x.
    dist = tf.map_fn(fn=per_x_dist, 
                     elems=tf.range(tf.shape(x)[0], dtype=tf.int64), 
                     fn_output_signature=x.dtype)
    return dist

rbf(dist, lengthscale)

RBF kernel function

Source code in sgptools/kernels/attentive_kernel.py
def rbf(dist, lengthscale):
    '''
    RBF kernel function
    '''
    return tf.math.exp(-0.5 * tf.math.square(dist / lengthscale))