SRF(
rank: int = 10,
rho: float = 3.0,
max_outer: int = 30,
max_inner: int = 20,
tol: float = 0.0001,
verbose: int = 0,
init: str = "random_sqrt",
random_state: int | None = None,
missing_values: float | None = np.nan,
bounds: tuple[float, float] | None = (None, None),
)
Bases: TransformerMixin, BaseEstimator
Symmetric Non-negative Matrix Factorization using SRF.
This class implements symmetric non-negative matrix factorization (SymNMF) using
the Alternating Direction Method of Multipliers (SRF). It can handle missing
entries and optional bound constraints on the factorization.
The algorithm solves: min_{w>=0,v} ||M o (S - v)||^2_F + rho/2 ||v - ww^T||^2_F
subject to optional bounds on v, where M is an observation mask.
| Parameters: |
-
rank
(int, default:
10
)
–
Number of factors (dimensionality of the latent space)
-
rho
(float, default:
3.0
)
–
SRF penalty parameter controlling constraint enforcement
-
max_outer
(int, default:
10
)
–
Maximum number of SRF outer iterations
-
max_inner
(int, default:
30
)
–
Maximum iterations for w-subproblem per outer iteration
-
tol
(float, default:
1e-4
)
–
Convergence tolerance for constraint violation
-
verbose
(int, default:
0
)
–
Whether to print optimization progress
-
init
(str, default:
'random_sqrt'
)
–
Method for factor initialization ('random', 'random_sqrt', 'nndsvd',
'nndsvdar', 'eigenspectrum')
-
random_state
(int or None, default:
None
)
–
Random seed for reproducible initialization
-
missing_values
(float or None, default:
np.nan
)
–
Values to be treated as missing to mask the matrix
-
bounds
(tuple of (float, float) or None, default:
(None, None)
)
–
Tuple of (lower, upper) bounds for the auxiliary variable v.
If None, the bounds are inferred from the data.
In practice, one can also pass the expected bounds of the matrix
(e.g. (0, 1) for cosine similarity)
|
| Attributes: |
-
w_
(np.ndarray of shape (n_samples, rank))
–
-
components_
(np.ndarray of shape (n_samples, rank))
–
Alias for w_ (sklearn compatibility)
-
n_iter_
(int)
–
Number of SRF iterations performed
-
history_
(dict)
–
Dictionary containing optimization metrics per iteration
|
Examples:
>>> # Basic usage with complete data
>>> from pysrf import SRF
>>> model = SRF(rank=10, random_state=42)
>>> w = model.fit_transform(similarity_matrix)
>>> reconstruction = w @ w.T
>>> # Usage with missing data (NaN values)
>>> similarity_matrix[mask] = np.nan
>>> model = SRF(rank=10, missing_values=np.nan)
>>> w = model.fit_transform(similarity_matrix)
References
.. [1] Shi et al. (2016). "Inexact Block Coordinate Descent Methods For
Symmetric Nonnegative Matrix Factorization"
Source code in pysrf/model.py
| def __init__(
self,
rank: int = 10,
rho: float = 3.0,
max_outer: int = 30,
max_inner: int = 20,
tol: float = 1e-4,
verbose: int = 0,
init: str = "random_sqrt",
random_state: int | None = None,
missing_values: float | None = np.nan,
bounds: tuple[float, float] | None = (None, None),
) -> None:
self.rank = rank
self.rho = rho
self.max_outer = max_outer
self.max_inner = max_inner
self.tol = tol
self.verbose = verbose
self.init = init
self.random_state = random_state
self.missing_values = missing_values
self.bounds = bounds
|
fit
fit(x: np.ndarray, y: np.ndarray | None = None) -> SRF
Fit the symmetric NMF model to the data.
| Parameters: |
-
x
(array-like of shape (n_samples, n_samples))
–
Symmetric similarity matrix. Missing values are allowed and should
be marked according to the missing_values parameter.
-
y
(Ignored, default:
None
)
–
Not used, present here for API consistency by convention.
|
Source code in pysrf/model.py
| def fit(self, x: np.ndarray, y: np.ndarray | None = None) -> SRF:
"""
Fit the symmetric NMF model to the data.
Parameters
----------
x : array-like of shape (n_samples, n_samples)
Symmetric similarity matrix. Missing values are allowed and should
be marked according to the missing_values parameter.
y : Ignored
Not used, present here for API consistency by convention.
Returns
-------
self : object
Fitted estimator.
"""
self._validate_params()
_validate_bounds(self.bounds)
x = validate_data(
self,
x,
reset=True,
ensure_all_finite="allow-nan" if self.missing_values is np.nan else True,
ensure_2d=True,
dtype=np.float64,
copy=True,
)
self._missing_mask = _get_missing_mask(x, self.missing_values)
if np.all(self._missing_mask):
raise ValueError(
"No observed entries found in the data. All values are missing."
)
check_symmetric(self._missing_mask, raise_exception=True)
self._observation_mask = ~self._missing_mask
x[self._missing_mask] = 0.0
x = check_symmetric(x, raise_exception=True)
if np.all(self._observation_mask):
return self._fit_complete_data(x)
else:
return self._fit_missing_data(x)
|
fit_transform(
x: np.ndarray, y: np.ndarray | None = None
) -> np.ndarray
Fit the model and return the learned factors.
| Parameters: |
-
x
(array-like of shape (n_samples, n_samples))
–
Symmetric similarity matrix
-
y
(Ignored, default:
None
)
–
Not used, present here for API consistency by convention.
|
| Returns: |
-
w( array-like of shape (n_samples, rank)
) –
|
Source code in pysrf/model.py
| def fit_transform(self, x: np.ndarray, y: np.ndarray | None = None) -> np.ndarray:
"""
Fit the model and return the learned factors.
Parameters
----------
x : array-like of shape (n_samples, n_samples)
Symmetric similarity matrix
y : Ignored
Not used, present here for API consistency by convention.
Returns
-------
w : array-like of shape (n_samples, rank)
Learned factor matrix
"""
return self.fit(x, y).transform(x)
|
reconstruct
reconstruct(w: np.ndarray | None = None) -> np.ndarray
Reconstruct the similarity matrix from factors.
| Parameters: |
-
w
(array-like of shape (n_samples, rank) or None, default:
None
)
–
Factor matrix to use for reconstruction.
If None, uses the fitted factors.
|
| Returns: |
-
s_hat( array-like of shape (n_samples, n_samples)
) –
Reconstructed similarity matrix
|
Source code in pysrf/model.py
| def reconstruct(self, w: np.ndarray | None = None) -> np.ndarray:
"""
Reconstruct the similarity matrix from factors.
Parameters
----------
w : array-like of shape (n_samples, rank) or None
Factor matrix to use for reconstruction.
If None, uses the fitted factors.
Returns
-------
s_hat : array-like of shape (n_samples, n_samples)
Reconstructed similarity matrix
"""
if w is None:
check_is_fitted(self)
w = self.w_
return w @ w.T
|
score
score(x: np.ndarray, y: np.ndarray | None = None) -> float
Score the model using reconstruction error on observed entries only.
| Parameters: |
-
x
(array-like of shape (n_samples, n_samples))
–
Symmetric similarity matrix. Missing values are allowed and should
be marked according to the missing_values parameter.
-
y
(Ignored, default:
None
)
–
Not used, present here for API consistency by convention.
|
| Returns: |
-
mse( float
) –
Mean squared error of the reconstruction on observed entries.
|
Source code in pysrf/model.py
| def score(self, x: np.ndarray, y: np.ndarray | None = None) -> float:
"""
Score the model using reconstruction error on observed entries only.
Parameters
----------
x : array-like of shape (n_samples, n_samples)
Symmetric similarity matrix. Missing values are allowed and should
be marked according to the missing_values parameter.
y : Ignored
Not used, present here for API consistency by convention.
Returns
-------
mse : float
Mean squared error of the reconstruction on observed entries.
"""
check_is_fitted(self)
x = validate_data(
self,
x,
reset=False,
ensure_2d=True,
dtype=np.float64,
ensure_all_finite="allow-nan" if self.missing_values is np.nan else True,
)
observation_mask = ~_get_missing_mask(x, self.missing_values)
reconstruction = self.reconstruct()
mse = np.mean((x[observation_mask] - reconstruction[observation_mask]) ** 2)
return -mse
|
transform(x: np.ndarray) -> np.ndarray
Project data onto the learned factor space.
| Parameters: |
-
x
(array-like of shape (n_samples, n_samples))
–
Symmetric matrix to transform
|
| Returns: |
-
w( array-like of shape (n_samples, rank)
) –
|
Source code in pysrf/model.py
| def transform(self, x: np.ndarray) -> np.ndarray:
"""
Project data onto the learned factor space.
Parameters
----------
x : array-like of shape (n_samples, n_samples)
Symmetric matrix to transform
Returns
-------
w : array-like of shape (n_samples, rank)
Transformed data
"""
check_is_fitted(self)
return self.w_
|