Source code for pyttb.sptenmat

# Copyright 2022 National Technology & Engineering Solutions of Sandia,
# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the
# U.S. Government retains certain rights in this software.

"""Classes and functions for working with Kruskal tensors."""
from __future__ import annotations

from typing import Optional, Tuple, Union

import numpy as np
from numpy_groupies import aggregate as accumarray
from scipy import sparse

import pyttb as ttb
from pyttb.pyttb_utils import gather_wrap_dims, tt_ind2sub


[docs]class sptenmat: """ SPTENMAT Store sparse tensor as a sparse matrix. """ __slots__ = ("tshape", "rdims", "cdims", "subs", "vals") def __init__( # noqa: PLR0913 self, subs: Optional[np.ndarray] = None, vals: Optional[np.ndarray] = None, rdims: Optional[np.ndarray] = None, cdims: Optional[np.ndarray] = None, tshape: Tuple[int, ...] = (), ): """ Construct a :class:`pyttb.sptenmat` from a set of 2D subscripts (subs) and values (vals) along with the mappings of the row (rdims) and column indices (cdims) and the shape of the original tensor (tshape). Parameters ---------- subs: Location of non-zero entries, in sptenmat. vals: Values for non-zero entries, in sptenmat. rdims: Mapping of row indices. cdims: Mapping of column indices. tshape: Shape of the original tensor. Examples -------- Create an empty :class:`pyttb.sptenmat`: >>> S = ttb.sptenmat() >>> S # doctest: +NORMALIZE_WHITESPACE sptenmat corresponding to a sptensor of shape () with 0 nonzeros rdims = [ ] (modes of sptensor corresponding to rows) cdims = [ ] (modes of sptensor corresponding to columns) Create a :class:`pyttb.sptenmat` from subscripts, values, and unwrapping dimensions: >>> subs = np.array([[1, 6], [1, 7]]) >>> vals = np.array([[6], [7]]) >>> tshape = (4, 4, 4) >>> S = ttb.sptenmat(\ subs,\ vals,\ rdims=np.array([0]),\ cdims=np.array([1,2]),\ tshape=tshape\ ) >>> S # doctest: +NORMALIZE_WHITESPACE sptenmat corresponding to a sptensor of shape (4, 4, 4) with 2 nonzeros rdims = [ 0 ] (modes of sptensor corresponding to rows) cdims = [ 1, 2 ] (modes of sptensor corresponding to columns) [1, 6] = 6 [1, 7] = 7 """ # Empty case if rdims is None and cdims is None: assert ( subs is None and vals is None ), "Must provide rdims or cdims with values" self.subs = np.array([], ndmin=2, dtype=int) self.vals = np.array([], ndmin=2) self.rdims = np.array([], dtype=int) self.cdims = np.array([], dtype=int) self.tshape: Union[Tuple[()], Tuple[int, ...]] = () return if subs is None: subs = np.array([], ndmin=2, dtype=int) if vals is None: vals = np.array([], ndmin=2) n = len(tshape) alldims = np.array([range(n)]) # Error check rdims, cdims = gather_wrap_dims(n, rdims, cdims) # if rdims or cdims is empty, hstack will output an array of float not int if rdims.size == 0: dims = cdims.copy() elif cdims.size == 0: dims = rdims.copy() else: dims = np.hstack([rdims, cdims], dtype=int) assert len(dims) == n and (alldims == np.sort(dims)).all(), ( "Incorrect specification of dimensions, the sorted concatenation of " "rdims and cdims must be range(len(tshape))." ) assert subs.size == 0 or np.prod(np.array(tshape)[rdims]) >= np.max( subs[:, 0] ), "Invalid row index." assert subs.size == 0 or np.prod(np.array(tshape)[cdims]) >= np.max( subs[:, 1] ), "Invalid column index." # Sum any duplicates if vals.size == 0: assert vals.size == 0, "Empty subs requires empty vals" newsubs = np.array([]) newvals = np.array([]) else: # Identify only the unique indices newsubs, loc = np.unique(subs, axis=0, return_inverse=True) # Sum the corresponding values # Squeeze to convert from column vector to row vector newvals = accumarray( loc.flatten(), np.squeeze(vals, axis=1), size=newsubs.shape[0], func=sum ) # Find the nonzero indices of the new values nzidx = np.nonzero(newvals) newsubs = newsubs[nzidx] # None index to convert from row back to column vector newvals = newvals[nzidx] if newvals.size > 0: newvals = newvals[:, None] self.tshape = tshape self.rdims = rdims.copy().astype(int) self.cdims = cdims.copy().astype(int) self.subs = newsubs self.vals = newvals
[docs] @classmethod def from_array( cls, array: Union[sparse.coo_matrix, np.ndarray], rdims: Optional[np.ndarray] = None, cdims: Optional[np.ndarray] = None, tshape: Tuple[int, ...] = (), ): """ Construct a :class:`pyttb.sptenmat` from a coo_matrix along with the mappings of the row (rdims) and column indices (cdims) and the shape of the original tensor (tshape). Parameters ---------- array: Representation of sparse tensor data (sparse or dense). rdims: Mapping of row indices. cdims: Mapping of column indices. tshape: Shape of the original tensor. Examples -------- Create a :class:`pyttb.sptenmat` from a sparse matrix and unwrapping dimensions. Infer column dimensions from row dimensions specification. >>> data = np.array([6, 7]) >>> rows = np.array([1, 1]) >>> cols = np.array([6, 7]) >>> sparse_matrix = sparse.coo_matrix((data, (rows, cols))) >>> tshape = (4, 4, 4) >>> S = ttb.sptenmat.from_array(\ sparse_matrix,\ rdims=np.array([0]),\ tshape=tshape\ ) >>> S # doctest: +NORMALIZE_WHITESPACE sptenmat corresponding to a sptensor of shape (4, 4, 4) with 2 nonzeros rdims = [ 0 ] (modes of sptensor corresponding to rows) cdims = [ 1, 2 ] (modes of sptensor corresponding to columns) [1, 6] = 6 [1, 7] = 7 """ vals = None if isinstance(array, np.ndarray): vals = np.expand_dims(array[array.nonzero()], axis=1) elif sparse.issparse(array): vals = np.expand_dims(array.tocoo(False).data, axis=1) else: raise ValueError( f"Expected sparse matrix or array but received: {type(array)}" ) subs = np.vstack(array.nonzero()).transpose() return ttb.sptenmat(subs, vals, rdims, cdims, tshape)
[docs] def copy(self) -> sptenmat: """ Return a deep copy of the :class:`pyttb.sptenmat`. Examples -------- Create a :class:`pyttb.sptenmat` (ST1) and make a deep copy. Verify the deep copy (ST3) is not just a reference (like ST2) to the original. >>> S1 = ttb.sptensor(shape=(2,2)) >>> S1[0,0] = 1 >>> ST1 = S1.to_sptenmat(np.array([0])) >>> ST2 = ST1 >>> ST3 = ST1.copy() >>> ST1[0,0] = 3 >>> ST1.to_sptensor().isequal(ST2.to_sptensor()) True >>> ST1.to_sptensor().isequal(ST3.to_sptensor()) False """ return sptenmat( self.subs.copy(), self.vals.copy(), self.rdims.copy(), self.cdims.copy(), self.tshape, )
def __deepcopy__(self, memo): return self.copy()
[docs] def to_sptensor(self) -> ttb.sptensor: """ Contruct a :class:`pyttb.sptensor` from `:class:pyttb.sptenmat` """ vals = None subs = None if self.subs.size > 0: tshape = np.array(self.tshape) rdims = tt_ind2sub(tshape[self.rdims], self.subs[:, 0]) cdims = tt_ind2sub(tshape[self.cdims], self.subs[:, 1]) subs = np.zeros( (rdims.shape[0], rdims.shape[1] + cdims.shape[1]), dtype=int ) subs[:, self.rdims] = rdims subs[:, self.cdims] = cdims vals = self.vals return ttb.sptensor(subs, vals, self.tshape)
@property def shape(self) -> Tuple[int, ...]: """ Return the shape of a sptenmat """ if self.tshape == (): return () else: m = np.prod(np.array(self.tshape)[self.rdims]) n = np.prod(np.array(self.tshape)[self.cdims]) return m, n
[docs] def double(self) -> sparse.coo_matrix: """ Convert a :class:`pyttb.sptenmat` to a COO :class:`scipy.sparse.coo_matrix`. """ if self.subs.size == 0: return sparse.coo_matrix(self.shape) return sparse.coo_matrix( (self.vals.transpose()[0], self.subs.transpose()), self.shape )
[docs] def full(self) -> ttb.tenmat: """ Convert a :class:`pyttb.sptenmat` to a (dense) :class:`pyttb.tenmat`. """ # Create empty dense tenmat result = ttb.tenmat(np.zeros(self.shape), self.rdims, self.cdims, self.tshape) # Assign nonzero values result[tuple(self.subs.transpose())] = np.squeeze(self.vals) return result
@property def nnz(self) -> int: """ Number of nonzero values in the :class:`pyttb.sptenmat`. """ return len(self.vals)
[docs] def norm(self) -> np.floating: """ Compute the norm (i.e., Frobenius norm, or square root of the sum of squares of entries) of the :class:`pyttb.sptenmat`. """ return np.linalg.norm(self.vals)
[docs] def isequal(self, other: sptenmat) -> bool: """ Exact equality for :class:`pyttb.sptenmat` """ if not isinstance(other, ttb.sptenmat): raise ValueError( f"Can only compares against other sptenmat but received: {type(other)}" ) return ( np.array_equal(self.vals, other.vals) and np.array_equal(self.subs, other.subs) and self.tshape == other.tshape and np.array_equal(self.cdims, other.cdims) and np.array_equal(self.rdims, other.rdims) )
[docs] def __pos__(self): """ Unary plus operator (+). """ return self.copy()
[docs] def __neg__(self): """ Unary minus operator (-). """ result = self.copy() result.vals *= -1 return result
[docs] def __setitem__(self, key, value): # noqa: PLR0912 """ Subscripted assignment for the :class:`pyttb.sptenmat`. """ if not isinstance(key, tuple): raise IndexError("Sptenmat takes two arguments as a 2D array") if len(key) != 2: raise IndexError( f"Wrong number of indices. Expected 2 received: {len(key)}" ) rsubs = key[0] if isinstance(rsubs, slice): rsubs = np.arange(0, self.shape[0])[rsubs] rsubs = np.asarray(rsubs, dtype=int) if rsubs.shape == (): rsubs = np.array([rsubs]) csubs = key[1] if isinstance(csubs, slice): csubs = np.arange(0, self.shape[1])[csubs] csubs = np.asarray(csubs, dtype=int) if csubs.shape == (): csubs = np.array([csubs]) if isinstance(value, (int, float, np.floating)): value = value * np.ones((len(csubs) * len(rsubs), 1)) value = np.asarray(value) newsubs = [] newvals = [] k = -1 # Loop over row and column indices, finding appropriate row index for (i,j) for j in range(len(csubs)): indxc = np.array([], dtype=int) if self.subs.size > 0: indxc = np.where(self.subs[:, 1] == csubs[j])[0] for i in range(len(rsubs)): indxr = np.array([], dtype=int) if self.subs.size > 0: indxr = np.where(self.subs[indxc, 0] == rsubs[i])[0] indx = indxc[indxr] k += 1 if indx.size == 0: newsubs.append(np.hstack([rsubs[i], csubs[j]])) newvals.append(value[k]) else: self.vals[indx] = value[k] # If there are new values to append, then add them on and sort if len(newvals) != 0: if self.subs.size > 0: self.subs = np.vstack((self.subs, newsubs)) self.vals = np.vstack((self.vals, newvals)) else: self.subs = np.vstack(newsubs) self.vals = np.vstack(newvals) sort_idx = np.lexsort(self.subs.transpose()[::-1]) self.subs = self.subs[sort_idx] self.vals = self.vals[sort_idx]
[docs] def __repr__(self): """ String representation of a sptenmat. Returns ------- str Contains the shape, row indices (rindices), column indices (cindices) and data as strings on different lines. """ s = "" s += "sptenmat corresponding to a sptensor of shape " if self.vals.size == 0: s += str(self.shape) else: s += f"{self.tshape!r}" s += " with " + str(self.vals.size) + " nonzeros" s += "\n" s += "rdims = " s += "[ " + (", ").join([str(int(d)) for d in self.rdims]) + " ] " s += "(modes of sptensor corresponding to rows)\n" s += "cdims = " s += "[ " + (", ").join([str(int(d)) for d in self.cdims]) + " ] " s += "(modes of sptensor corresponding to columns)\n" # Stop insane printouts if self.vals.size > 10000: # pragma: no cover r = input("Are you sure you want to print all nonzeros? (Y/N)") if r.upper() != "Y": return s # An empty ndarray with minimum dimensions still has a shape if self.subs.size > 0: for i in range(0, self.subs.shape[0]): s += "\t" s += "[" idx = self.subs[i, :] s += str(idx.tolist())[1:] s += " = " s += str(self.vals[i][0]) if i < self.subs.shape[0] - 1: s += "\n" return s
__str__ = __repr__