Source code for derrom.dim_reducers

import numpy as np


[docs]class base_dim_reducer:
    """
    Base class to define the methods to be implemented
    """
    def __init__(self):
        pass
    
[docs]    def train(self, data_matrix, rdim):
        raise NotImplementedError
        
[docs]    def reduce(self, data_matrix, rdim):
        raise NotImplementedError
        
[docs]    def reconstruct(self, reduced_data_matrix):
        raise NotImplementedError
        
        
[docs]class SVD(base_dim_reducer):
    r"""
    Singular value decomposition (SVD) based linear dimensionality reduction.
    
    The SVD :math:`X^T = U \Sigma V^T` provides an orthonormal basis for the transposed data matrix :math:`X^T` via the unitary matrix :math:`U`. Note that the data vectors :math:`x_n` are stored in the rows of :math:`X` and the data matrix is then transposed to conform to the typical SVD literature.
    The dimensionality reduction is achieved by projecting new data onto the first :math:`r` left singular vectors :math:`u_{l}` of the training data matrix, i.e., onto the truncated basis :math:`U_r`.
    
    
    Class attributes
    
    Attributes
    ----------
    U : 2D numpy.ndarray
        :math:`U` matrix of the SVD, where the columns are the left singular vectors :math:`u_{l}`.
    S : 2D numpy.ndarray
        Diagonal matrix containing the singular values :math:`\sigma_l` in descending order.
    
    """
    
    def __init__(self):
        self.U = None
        self.S = None
        pass
    
[docs]    def train(self, data_matrix, rdim):
        r"""
        Training method that computes the SVD and stores the :math:`U` matrix and the singular values :math:`\Sigma` in U and S for the reduction and reconstruction and analysis.
        
        Parameters
        ----------
        data_matrix : 2D numpy.ndarray
            The data vectors are expected to be stored in the rows (first index). The training data matrix is typically composed of multiple concatenated trajectories.
        rdim : int
            Number of latent space dimensions, to which the trajectory is to be reduced. Not used by the SVD reducer.
        """
        self.U,self.S = np.linalg.svd(data_matrix.T, full_matrices=False)[:2] #SVD of the transposed data matrix since the data is stored in row vectors
        
[docs]    def reduce(self, data_matrix, rdim):
        r"""
        Projects the input into a latent space with reduced dimensionality.
        
        Computes :math:`R = X U_r`, where :math:`X` is the data matrix and :math:`R` the reduced data matrix. Note, that the data vectors are stored in the rows here.
        
        Parameters
        ----------
        data_matrix : 2D numpy.ndarray
            The high dimensional trajectory, which is to be reduced.
        rdim : int
            Number of latent space dimensions, to which the trajectory is to be reduced.
            
            
        Returns
        -------
        reduced_data_matrix : 2D numpy.ndarray
            The reduced dimensionality trajectory
        """
        return data_matrix @ self.U[:,:rdim] #project the data matrix onto the first rdim left singular vectors. The reduced data matrix then carries rdim coefficients in its rows
    
[docs]    def reconstruct(self, reduced_data_matrix):
        r"""
        Reconstructs the data matrix by expanding it in the truncated basis.
        
        Computes :math:`\tilde{X} = R U_r^T`, where :math:`\tilde{X}` is the reconstructed data matrix and :math:`R` the reduced data matrix
        
        
        Parameters
        ----------
        reduced_data_matrix : 2D numpy.ndarray
            The reduced dimensianal trajectory to be reconstructed.
        
        
        Returns
        -------
        reconstructed_data_matrix : 2D numpy.ndarray
            The reconstructed/expanded trajectory
        
        """
        
        dim = reduced_data_matrix.shape[1]
        return reduced_data_matrix @ self.U[:,:dim].T 
      
      
[docs]class DFT(base_dim_reducer):
    """
    Dimensionality reduction with Fourier modes.
    
    The dimensionality reduction is achieved by projecting the data onto a reduced number of fourier modes. The latent space description thus consists of the considered Fourier coefficients. The complex coeffiecients are thereby split into their real and imaginary parts.
    Implemented via the Fast Fourier Transform (FFT).
    
    
    Parameters
    ----------
    sorted : bool
        If set to False, the low frequency modes are used and and the high frequency modes are discarded (cutoff controlled by the reduced dimension :math:`r`.) If set to True, the Fourier coefficients are evaluated for the training data and sorted in descending order of their mean. The dimensionality reduction then utilizes the corresponding first :math:`r` modes with the largest mean coefficients.
        
    """
    
    def __init__(self, sorted=False):
        self.sorted = sorted
        pass
    
[docs]    def train(self, data_matrix, rdim):
        """
        Computes the mean Fourier coefficients to generated arrays with the sorting and unsorting indices if sorted is set to True.
        
        Parameters
        ----------
        data_matrix : 2D numpy.ndarray
            The data vectors are expected to be stored in the rows (first index). The training data matrix is typically composed of multiple concatenated trajectories.
        rdim : int
            Number of latent space dimensions, to which the trajectory is to be reduced. Not used by the DFT reducer.
        """
        
        self.full_dim = data_matrix.shape[1]
        
        if self.sorted:
            FT = np.fft.rfft(data_matrix, axis=1)

            real_matrix = np.zeros((FT.shape[0],FT.shape[1]*2))

            real_matrix[:,::2] = np.real(FT)
            real_matrix[:,1::2] = np.imag(FT)

            self.mean_coefs = np.mean(real_matrix, axis=0)

            self.sort_inds = np.flip(np.argsort(np.abs(self.mean_coefs)))
            self.unsort_inds = np.argsort(self.sort_inds)
        
        
[docs]    def reduce(self, data_matrix, rdim):
        r"""
        Projects the data into the truncated Fourier space via the FFT.
        
        
        Parameters
        ----------
        data_matrix : 2D numpy.ndarray
            The high dimensional trajectory, which is to be reduced.
        rdim : int
            Number of latent space dimensions, to which the trajectory is to be reduced.
            
            
        Returns
        -------
        reduced_data_matrix : 2D numpy.ndarray
            The reduced dimensionality trajectory
        """
        FT = np.fft.rfft(data_matrix, axis=1)
  
        real_matrix = np.zeros((FT.shape[0],FT.shape[1]*2))
        
        real_matrix[:,::2] = np.real(FT)
        real_matrix[:,1::2] = np.imag(FT)
        
        if self.sorted:
            return real_matrix[:,self.sort_inds][:,:rdim]
        else:
            return real_matrix[:,:rdim]
    
[docs]    def reconstruct(self, reduced_data_matrix):
        r"""
        Reconstructs the data matrix by expanding it in the truncated Fourier basis via the FFT.
                
        
        Parameters
        ----------
        reduced_data_matrix : 2D numpy.ndarray
            The reduced dimensianal trajectory to be reconstructed.
        
        
        Returns
        -------
        reconstructed_data_matrix : 2D numpy.ndarray
            The reconstructed/expanded trajectory
        
        """
        if self.sorted:
            real_matrix = np.zeros((reduced_data_matrix.shape[0],2 * self.full_dim))
            real_matrix[:,:reduced_data_matrix.shape[1]] = reduced_data_matrix
            real_matrix = real_matrix[:,self.unsort_inds]
          
            complex_matrix = real_matrix[:,::2] + 1.j*real_matrix[:,1::2]  
        else:
            complex_matrix = reduced_data_matrix[:,::2] + 1.j*reduced_data_matrix[:,1::2]
            
        iFT = np.fft.irfft(complex_matrix, n=self.full_dim, axis=1)
        
        return iFT


from scipy.special import eval_hermite
from scipy.optimize import minimize_scalar
[docs]class Hermite(base_dim_reducer):
    """
    Dimensionality reduction bases on sampled Gauss-Hermite functions.
    
    This reduction scheme is physically motivated and custom tailored to describe perturbations of Fermi-distributions.
    
    Refer to the publication [] and the code for more details.
    """
    
    def __init__(self, sample_max = 1.0, sorted=False, optimize=False, orthogonalize=False, train_rdim=None):
        self.sample_max = sample_max
        self.sorted = sorted
        self.optimize = optimize
        self.orthogonalize = orthogonalize
        self.train_rdim = train_rdim
        pass
    
    def __GramSchmidt_Rows(self, A, eps=0.0):
        M = A.shape[0]
        N = A.shape[1]
        assert(M <= N)
        Q = np.zeros((M,N))
        for k in range(0,M):
            Q[k] = A[k]
            for j in range(k):
                Q[k] = Q[k] - np.dot(Q[j],A[k])*Q[j]

            if (Q[k] > eps).any():
                Q[k] = Q[k] / np.linalg.norm(Q[k])
            else:
                Q[k] = 0.0
        return Q
    
    def __nGramSchmidt_Rows(self, matrix, n=1, eps=0.0):
        o_matrix = matrix
        for k in range(n):
            o_matrix =  self.__GramSchmidt_Rows(o_matrix, eps)
        return o_matrix
    
    
[docs]    def train(self, data_matrix, rdim):
        self.full_dim = data_matrix.shape[1]
        
        if self.train_rdim == None:
            rdim = self.full_dim
        else:
            rdim = self.train_rdim
            
        self.x = np.linspace(0,self.sample_max,self.full_dim)
        
        def get_N(n): # normalization
            if n < 100:
                return 1./np.sqrt( np.sqrt(np.pi) * (2**n) * np.math.factorial(n))
            else:
                return np.exp( -0.5 * ( n*np.log(2.) + n*np.log(n) - n + 0.5*np.log(2.*np.pi*n) + 0.5*np.log(np.pi) ) )
        
        
        self.H_matrix = np.zeros((self.full_dim, self.full_dim))
        for k in range(self.full_dim):
            self.H_matrix[k]  = eval_hermite(k,self.x) * np.exp(-0.5*self.x**2) * get_N(k)
            
        
        if self.optimize == True:
            
            def loss(sample_max):
                x_test = np.linspace(0,sample_max,self.full_dim)
                for k in range(self.full_dim):
                    self.H_matrix[k] = eval_hermite(k,x_test) * np.exp(-0.5*x_test**2) * get_N(k)
                
                if self.orthogonalize:
                    self.H_matrix = self.__nGramSchmidt_Rows(self.H_matrix,10)
                
                if self.sorted:
                    train_coefs = data_matrix @ self.H_matrix.T
                    self.mean_coefs = np.mean(train_coefs, axis=0)
                    self.sort_inds = np.flip(np.argsort(np.abs(self.mean_coefs)))
                    self.sorted_H_matrix = self.H_matrix[self.sort_inds]
                        
                apprx = ( data_matrix @ self.H_matrix[:rdim].T ) @ np.linalg.pinv(self.H_matrix[:rdim]).T
              
                return np.linalg.norm(data_matrix-apprx, ord='fro')
              
            res = minimize_scalar(loss, bounds=(0,40))
            self.sample_max = res.x
            #print(res.x)
            
            self.x = np.linspace(0,self.sample_max,self.full_dim)
            for k in range(self.full_dim):
                self.H_matrix[k]  = eval_hermite(k,self.x) * np.exp(-0.5*self.x**2) * get_N(k)
            
        if self.orthogonalize:
            self.H_matrix = self.__nGramSchmidt_Rows(self.H_matrix,10)
        
        if self.sorted:
            train_coefs = data_matrix @ self.H_matrix.T
            self.mean_coefs = np.mean(train_coefs, axis=0)
            self.sort_inds = np.flip(np.argsort(np.abs(self.mean_coefs)))
            self.sorted_H_matrix = self.H_matrix[self.sort_inds]
        
        
[docs]    def reduce(self,data_matrix,rdim):
        if self.sorted:
            return data_matrix @ self.sorted_H_matrix[:rdim].T
        else:
            return data_matrix @ self.H_matrix[:rdim].T
        
[docs]    def reconstruct(self, reduced_data_matrix):
        dim = reduced_data_matrix.shape[1]
        if self.sorted:
            return reduced_data_matrix @ np.linalg.pinv(self.sorted_H_matrix[:dim]).T
        else:
            return reduced_data_matrix @ np.linalg.pinv(self.H_matrix[:dim]).T