-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
253 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
import itertools | ||
import pathlib | ||
from os import PathLike | ||
from typing import TypeGuard, Union | ||
import numpy as np | ||
from scipy import io, sparse, optimize as opt | ||
from pyliger.factorization._utilities import nla, nnlsm_blockpivot | ||
from nnlsm_activeset import nnlsm_activeset | ||
import sys | ||
|
||
def parse_glob(p: pathlib.Path) -> list[pathlib.Path]: | ||
if p.is_dir(): | ||
return list(p.glob('*.mtx')) | ||
else: | ||
return [p] | ||
|
||
|
||
def safe_mmread(mt: PathLike) -> Union[sparse.csr_array, None]: | ||
try: | ||
return sparse.csr_array(io.mmread(mt).T) | ||
except ValueError as e: | ||
raise e | ||
|
||
|
||
def is_csr_array(k: sparse.csr_array | None) -> TypeGuard[sparse.csr_array]: | ||
return isinstance(k, sparse.csr_array) | ||
|
||
|
||
def _test_nnlsm(A_in, B_in): | ||
### 1. Initialization (W, V_i, H_i) | ||
A = A_in.toarray() | ||
B = B_in.toarray() | ||
X_org = A.T.dot(B) | ||
# B = np.random.rand(m,k) | ||
# A = np.random.rand(m,n/2) | ||
# A = np.concatenate((A,A),axis=1) | ||
# A = A + np.random.rand(m,n)*0.01 | ||
# B = np.random.rand(m,k) | ||
import time | ||
start = time.time() | ||
C1, info = nnlsm_blockpivot(A, B) | ||
elapsed2 = time.time() - start | ||
rel_norm2 = nla.norm(C1 - X_org) / nla.norm(X_org) | ||
print('nnlsm_blockpivot: ', 'OK ' if info[0] else 'Fail', | ||
'elapsed:{0:.4f} error:{1:.4e}'.format(elapsed2, rel_norm2)) | ||
start = time.time() | ||
C2, info = nnlsm_activeset(A, B) | ||
elapsed1 = time.time() - start | ||
rel_norm1 = nla.norm(C2 - X_org) / nla.norm(X_org) | ||
print('nnlsm_activeset: ', 'OK ' if info[0] else 'Fail', | ||
'elapsed:{0:.4f} error:{1:.4e}'.format(elapsed1, rel_norm1)) | ||
start = time.time() | ||
C3 = np.zeros(X_org.shape) | ||
for i in iter(range(0, X_org.shape[1])): | ||
res = opt.nnls(A, B[:, i]) | ||
C3[:, i] = res[0] | ||
elapsed3 = time.time() - start | ||
rel_norm3 = nla.norm(C3 - X_org) / nla.norm(X_org) | ||
print ('scipy.optimize.nnls: ', 'OK ',\ | ||
'elapsed:{0:.4f} error:{1:.4e}'.format(elapsed3, rel_norm3)) | ||
|
||
|
||
benchlist = [] | ||
assert len(sys.argv) == 3 | ||
for i in sys.argv[1:]: | ||
benchlist.append([path.resolve() for path in parse_glob(pathlib.Path(i))]) | ||
benchlist = [safe_mmread(m) for m in itertools.chain.from_iterable(benchlist)] | ||
benchlist = list(filter(is_csr_array, benchlist)) | ||
_test_nnlsm(benchlist[0], benchlist[1].T) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
from pyliger import create_liger, read_10X_h5, normalize, scale_not_center, select_genes | ||
from pyliger.factorization._online_iNMF import _get_scale_data | ||
from scipy import io, sparse | ||
import numpy as np | ||
from os import listdir | ||
|
||
titlelist = [] | ||
h5list = [] | ||
for file in listdir('../../repos/theAeon/pyliger/h5'): | ||
titlelist.append(file) | ||
h5list.append(read_10X_h5(sample_dir='../../repos/theAeon/pyliger/h5', | ||
sample_name=file.split(".")[0], file_name=file, backed=True)) | ||
for i in range(0, len(h5list)//2): | ||
j = i + len(h5list)//2 | ||
titlepair = (titlelist[i], titlelist[j]) | ||
mouse_cortex = create_liger([h5list[i], h5list[j]]) | ||
normalize(mouse_cortex) | ||
select_genes(mouse_cortex, var_thresh=0.2, do_plot=False) | ||
scale_not_center(mouse_cortex) | ||
outdata = [_get_scale_data(adata)['scale_data'] for adata in mouse_cortex.adata_list for i in range(2)] | ||
outdata = [mat.value.toarray() for mat in outdata] | ||
outdata = [sparse.coo_array(np.hstack((mat, np.zeros(mat.shape))).T) for mat in outdata] | ||
for i in range(len(titlepair)): | ||
io.mmwrite( "%s.mtx" % titlepair[i], outdata[i]) | ||
densedata = np.random.uniform(0, 2, (10, 2 * len(mouse_cortex.var_genes))) | ||
io.mmwrite("%s.dense.mtx" % titlepair[0], densedata) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
import numpy as np | ||
from pyliger.factorization._utilities import normal_eq_comb | ||
import scipy.sparse as sps | ||
|
||
|
||
def nnlsm_activeset(A, B, overwrite=False, is_input_prod=False, init=None): | ||
""" Nonnegativity-constrained least squares with active-set method and column grouping | ||
Solves min ||AX-B||_2^2 s.t. X >= 0 element-wise. | ||
Algorithm of this routine is close to the one presented in the following paper but | ||
is different in organising inner- and outer-loops: | ||
M. H. Van Benthem and M. R. Keenan, J. Chemometrics 2004; 18: 441-450 | ||
Parameters | ||
---------- | ||
A : numpy.array, shape (m,n) | ||
B : numpy.array or scipy.sparse matrix, shape (m,k) | ||
Optional Parameters | ||
------------------- | ||
is_input_prod : True/False. - If True, the A and B arguments are interpreted as | ||
AtA and AtB, respectively. Default is False. | ||
init: numpy.array, shape (n,k). - If provided, init is used as an initial value for the algorithm. | ||
Default is None. | ||
Returns | ||
------- | ||
X, (success, Y, num_cholesky, num_eq, num_backup) | ||
X : numpy.array, shape (n,k) - solution | ||
success : True/False - True if the solution is found. False if the algorithm did not terminate | ||
due to numerical errors. | ||
Y : numpy.array, shape (n,k) - Y = A.T * A * X - A.T * B | ||
num_cholesky : int - the number of Cholesky factorizations needed | ||
num_eq : int - the number of linear systems of equations needed to be solved | ||
""" | ||
if is_input_prod: | ||
AtA = A | ||
AtB = B | ||
else: | ||
AtA = A.T.dot(A) | ||
if sps.issparse(B): | ||
AtB = B.T.dot(A) | ||
AtB = AtB.T | ||
else: | ||
AtB = A.T.dot(B) | ||
|
||
(n, k) = AtB.shape | ||
MAX_ITER = n * 5 | ||
num_cholesky = 0 | ||
num_eq = 0 | ||
not_opt_set = np.ones([k], dtype=bool) | ||
|
||
if overwrite: | ||
X, num_cholesky, num_eq = normal_eq_comb(AtA, AtB) | ||
PassSet = X > 0 | ||
not_opt_set = np.any(X < 0, axis=0) | ||
elif init is not None: | ||
X = init | ||
X[X < 0] = 0 | ||
PassSet = X > 0 | ||
else: | ||
X = np.zeros([n, k]) | ||
PassSet = np.zeros([n, k], dtype=bool) | ||
|
||
Y = np.zeros([n, k]) | ||
opt_cols = (~not_opt_set).nonzero()[0] | ||
not_opt_cols = not_opt_set.nonzero()[0] | ||
|
||
Y[:, opt_cols] = AtA.dot(X[:, opt_cols]) - AtB[:, opt_cols] | ||
|
||
big_iter = 0 | ||
success = True | ||
while not_opt_cols.size > 0: | ||
big_iter += 1 | ||
if MAX_ITER > 0 and big_iter > MAX_ITER: | ||
success = False | ||
break | ||
|
||
(Z, temp_cholesky, temp_eq) = normal_eq_comb( | ||
AtA, AtB[:, not_opt_cols], PassSet[:, not_opt_cols]) | ||
num_cholesky += temp_cholesky | ||
num_eq += temp_eq | ||
|
||
Z[abs(Z) < 1e-12] = 0 | ||
|
||
infea_subset = Z < 0 | ||
temp = np.any(infea_subset, axis=0) | ||
infea_subcols = temp.nonzero()[0] | ||
fea_subcols = (~temp).nonzero()[0] | ||
|
||
if infea_subcols.size > 0: | ||
infea_cols = not_opt_cols[infea_subcols] | ||
|
||
(ix0, ix1_subsub) = infea_subset[:, infea_subcols].nonzero() | ||
ix1_sub = infea_subcols[ix1_subsub] | ||
ix1 = not_opt_cols[ix1_sub] | ||
|
||
X_infea = X[(ix0, ix1)] | ||
|
||
alpha = np.zeros([n, len(infea_subcols)]) | ||
alpha[:] = np.inf | ||
alpha[(ix0, ix1_subsub)] = X_infea / (X_infea - Z[(ix0, ix1_sub)]) | ||
min_ix = np.argmin(alpha, axis=0) | ||
min_vals = alpha[(min_ix, range(0, alpha.shape[1]))] | ||
|
||
X[:, infea_cols] = X[:, infea_cols] + \ | ||
(Z[:, infea_subcols] - X[:, infea_cols]) * min_vals | ||
X[(min_ix, infea_cols)] = 0 | ||
PassSet[(min_ix, infea_cols)] = False | ||
|
||
elif fea_subcols.size > 0: | ||
fea_cols = not_opt_cols[fea_subcols] | ||
|
||
X[:, fea_cols] = Z[:, fea_subcols] | ||
Y[:, fea_cols] = AtA.dot(X[:, fea_cols]) - AtB[:, fea_cols] | ||
|
||
Y[abs(Y) < 1e-12] = 0 | ||
|
||
not_opt_subset = np.logical_and( | ||
Y[:, fea_cols] < 0, ~PassSet[:, fea_cols]) | ||
new_opt_cols = fea_cols[np.all(~not_opt_subset, axis=0)] | ||
update_cols = fea_cols[np.any(not_opt_subset, axis=0)] | ||
|
||
if update_cols.size > 0: | ||
val = Y[:, update_cols] * ~PassSet[:, update_cols] | ||
min_ix = np.argmin(val, axis=0) | ||
PassSet[(min_ix, update_cols)] = True | ||
|
||
not_opt_set[new_opt_cols] = False | ||
not_opt_cols = not_opt_set.nonzero()[0] | ||
|
||
return X, (success, Y, num_cholesky, num_eq) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,29 +1,28 @@ | ||
[build-system] | ||
build-backend = "pdm.pep517.api" | ||
requires = [ | ||
"pdm-pep517>=1.0.0", | ||
] | ||
build-backend = "pdm.pep517.api" | ||
requires = ["pdm-pep517>=1.0.0"] | ||
|
||
[tool.black] | ||
exclude = "/(\n \\.eggs\n | \\.git\n | \\.hg\n | \\.mypy_cache\n | \\.nox\n | \\.tox\n | \\.venv\n | _build\n | buck-out\n | build\n | dist\n)/\n" | ||
include = "\\.pyi?$" | ||
exclude = "/(\n \\.eggs\n | \\.git\n | \\.hg\n | \\.mypy_cache\n | \\.nox\n | \\.tox\n | \\.venv\n | _build\n | buck-out\n | build\n | dist\n)/\n" | ||
include = "\\.pyi?$" | ||
|
||
[tool.pdm] | ||
package-dir = "src" | ||
|
||
package-dir = "src" | ||
[tool.pdm.build] | ||
excludes = ["contrib"] | ||
[project] | ||
authors = [ | ||
authors = [ | ||
{ name = "Joshua Welch", email = "[email protected]" }, | ||
{ name = "Lu Lu", email = "[email protected]" }, | ||
] | ||
classifiers = [ | ||
] | ||
classifiers = [ | ||
"Development Status :: 4 - Beta", | ||
"License :: OSI Approved :: MIT License", | ||
"Natural Language :: English", | ||
"Operating System :: OS Independent", | ||
"Programming Language :: Python :: 3.8", | ||
] | ||
dependencies = [ | ||
] | ||
dependencies = [ | ||
"adjustText>=0.7.3", | ||
"anndata>=0.8.0", | ||
"annoy", | ||
|
@@ -44,22 +43,18 @@ dependencies = [ | |
"scikit-learn", | ||
"seaborn>=0.12.2", | ||
"umap-learn", | ||
] | ||
description = "The Python version of LIGER package." | ||
keywords = [ | ||
"LIGER", | ||
] | ||
maintainers = [ | ||
{ name = "Andrew Robbins", email = "[email protected]" }, | ||
] | ||
name = "pyliger" | ||
readme = "README.md" | ||
requires-python = "<3.11, >=3.8" | ||
version = "0.1.1" | ||
] | ||
description = "The Python version of LIGER package." | ||
keywords = ["LIGER"] | ||
maintainers = [{ name = "Andrew Robbins", email = "[email protected]" }] | ||
name = "pyliger" | ||
readme = "README.md" | ||
requires-python = "<3.11, >=3.8" | ||
version = "0.1.1" | ||
|
||
[project.license] | ||
text = "MIT" | ||
[project.license] | ||
text = "MIT" | ||
|
||
[project.urls] | ||
homepage = "https://welch-lab.github.io" | ||
repository = "https://github.com/welch-lab/pyliger" | ||
[project.urls] | ||
homepage = "https://welch-lab.github.io" | ||
repository = "https://github.com/welch-lab/pyliger" |