Skip to content

Commit

Permalink
add benchmark harness
Browse files Browse the repository at this point in the history
  • Loading branch information
theAeon committed Feb 14, 2023
1 parent 1b8e71e commit f6aeafc
Show file tree
Hide file tree
Showing 4 changed files with 253 additions and 30 deletions.
69 changes: 69 additions & 0 deletions contrib/bench_python.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import itertools
import pathlib
from os import PathLike
from typing import TypeGuard, Union
import numpy as np
from scipy import io, sparse, optimize as opt
from pyliger.factorization._utilities import nla, nnlsm_blockpivot
from nnlsm_activeset import nnlsm_activeset
import sys

def parse_glob(p: pathlib.Path) -> list[pathlib.Path]:
if p.is_dir():
return list(p.glob('*.mtx'))
else:
return [p]


def safe_mmread(mt: PathLike) -> Union[sparse.csr_array, None]:
try:
return sparse.csr_array(io.mmread(mt).T)
except ValueError as e:
raise e


def is_csr_array(k: sparse.csr_array | None) -> TypeGuard[sparse.csr_array]:
return isinstance(k, sparse.csr_array)


def _test_nnlsm(A_in, B_in):
### 1. Initialization (W, V_i, H_i)
A = A_in.toarray()
B = B_in.toarray()
X_org = A.T.dot(B)
# B = np.random.rand(m,k)
# A = np.random.rand(m,n/2)
# A = np.concatenate((A,A),axis=1)
# A = A + np.random.rand(m,n)*0.01
# B = np.random.rand(m,k)
import time
start = time.time()
C1, info = nnlsm_blockpivot(A, B)
elapsed2 = time.time() - start
rel_norm2 = nla.norm(C1 - X_org) / nla.norm(X_org)
print('nnlsm_blockpivot: ', 'OK ' if info[0] else 'Fail',
'elapsed:{0:.4f} error:{1:.4e}'.format(elapsed2, rel_norm2))
start = time.time()
C2, info = nnlsm_activeset(A, B)
elapsed1 = time.time() - start
rel_norm1 = nla.norm(C2 - X_org) / nla.norm(X_org)
print('nnlsm_activeset: ', 'OK ' if info[0] else 'Fail',
'elapsed:{0:.4f} error:{1:.4e}'.format(elapsed1, rel_norm1))
start = time.time()
C3 = np.zeros(X_org.shape)
for i in iter(range(0, X_org.shape[1])):
res = opt.nnls(A, B[:, i])
C3[:, i] = res[0]
elapsed3 = time.time() - start
rel_norm3 = nla.norm(C3 - X_org) / nla.norm(X_org)
print ('scipy.optimize.nnls: ', 'OK ',\
'elapsed:{0:.4f} error:{1:.4e}'.format(elapsed3, rel_norm3))


benchlist = []
assert len(sys.argv) == 3
for i in sys.argv[1:]:
benchlist.append([path.resolve() for path in parse_glob(pathlib.Path(i))])
benchlist = [safe_mmread(m) for m in itertools.chain.from_iterable(benchlist)]
benchlist = list(filter(is_csr_array, benchlist))
_test_nnlsm(benchlist[0], benchlist[1].T)
26 changes: 26 additions & 0 deletions contrib/initialize_mtx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from pyliger import create_liger, read_10X_h5, normalize, scale_not_center, select_genes
from pyliger.factorization._online_iNMF import _get_scale_data
from scipy import io, sparse
import numpy as np
from os import listdir

titlelist = []
h5list = []
for file in listdir('../../repos/theAeon/pyliger/h5'):
titlelist.append(file)
h5list.append(read_10X_h5(sample_dir='../../repos/theAeon/pyliger/h5',
sample_name=file.split(".")[0], file_name=file, backed=True))
for i in range(0, len(h5list)//2):
j = i + len(h5list)//2
titlepair = (titlelist[i], titlelist[j])
mouse_cortex = create_liger([h5list[i], h5list[j]])
normalize(mouse_cortex)
select_genes(mouse_cortex, var_thresh=0.2, do_plot=False)
scale_not_center(mouse_cortex)
outdata = [_get_scale_data(adata)['scale_data'] for adata in mouse_cortex.adata_list for i in range(2)]
outdata = [mat.value.toarray() for mat in outdata]
outdata = [sparse.coo_array(np.hstack((mat, np.zeros(mat.shape))).T) for mat in outdata]
for i in range(len(titlepair)):
io.mmwrite( "%s.mtx" % titlepair[i], outdata[i])
densedata = np.random.uniform(0, 2, (10, 2 * len(mouse_cortex.var_genes)))
io.mmwrite("%s.dense.mtx" % titlepair[0], densedata)
133 changes: 133 additions & 0 deletions contrib/nnlsm_activeset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import numpy as np
from pyliger.factorization._utilities import normal_eq_comb
import scipy.sparse as sps


def nnlsm_activeset(A, B, overwrite=False, is_input_prod=False, init=None):
""" Nonnegativity-constrained least squares with active-set method and column grouping
Solves min ||AX-B||_2^2 s.t. X >= 0 element-wise.
Algorithm of this routine is close to the one presented in the following paper but
is different in organising inner- and outer-loops:
M. H. Van Benthem and M. R. Keenan, J. Chemometrics 2004; 18: 441-450
Parameters
----------
A : numpy.array, shape (m,n)
B : numpy.array or scipy.sparse matrix, shape (m,k)
Optional Parameters
-------------------
is_input_prod : True/False. - If True, the A and B arguments are interpreted as
AtA and AtB, respectively. Default is False.
init: numpy.array, shape (n,k). - If provided, init is used as an initial value for the algorithm.
Default is None.
Returns
-------
X, (success, Y, num_cholesky, num_eq, num_backup)
X : numpy.array, shape (n,k) - solution
success : True/False - True if the solution is found. False if the algorithm did not terminate
due to numerical errors.
Y : numpy.array, shape (n,k) - Y = A.T * A * X - A.T * B
num_cholesky : int - the number of Cholesky factorizations needed
num_eq : int - the number of linear systems of equations needed to be solved
"""
if is_input_prod:
AtA = A
AtB = B
else:
AtA = A.T.dot(A)
if sps.issparse(B):
AtB = B.T.dot(A)
AtB = AtB.T
else:
AtB = A.T.dot(B)

(n, k) = AtB.shape
MAX_ITER = n * 5
num_cholesky = 0
num_eq = 0
not_opt_set = np.ones([k], dtype=bool)

if overwrite:
X, num_cholesky, num_eq = normal_eq_comb(AtA, AtB)
PassSet = X > 0
not_opt_set = np.any(X < 0, axis=0)
elif init is not None:
X = init
X[X < 0] = 0
PassSet = X > 0
else:
X = np.zeros([n, k])
PassSet = np.zeros([n, k], dtype=bool)

Y = np.zeros([n, k])
opt_cols = (~not_opt_set).nonzero()[0]
not_opt_cols = not_opt_set.nonzero()[0]

Y[:, opt_cols] = AtA.dot(X[:, opt_cols]) - AtB[:, opt_cols]

big_iter = 0
success = True
while not_opt_cols.size > 0:
big_iter += 1
if MAX_ITER > 0 and big_iter > MAX_ITER:
success = False
break

(Z, temp_cholesky, temp_eq) = normal_eq_comb(
AtA, AtB[:, not_opt_cols], PassSet[:, not_opt_cols])
num_cholesky += temp_cholesky
num_eq += temp_eq

Z[abs(Z) < 1e-12] = 0

infea_subset = Z < 0
temp = np.any(infea_subset, axis=0)
infea_subcols = temp.nonzero()[0]
fea_subcols = (~temp).nonzero()[0]

if infea_subcols.size > 0:
infea_cols = not_opt_cols[infea_subcols]

(ix0, ix1_subsub) = infea_subset[:, infea_subcols].nonzero()
ix1_sub = infea_subcols[ix1_subsub]
ix1 = not_opt_cols[ix1_sub]

X_infea = X[(ix0, ix1)]

alpha = np.zeros([n, len(infea_subcols)])
alpha[:] = np.inf
alpha[(ix0, ix1_subsub)] = X_infea / (X_infea - Z[(ix0, ix1_sub)])
min_ix = np.argmin(alpha, axis=0)
min_vals = alpha[(min_ix, range(0, alpha.shape[1]))]

X[:, infea_cols] = X[:, infea_cols] + \
(Z[:, infea_subcols] - X[:, infea_cols]) * min_vals
X[(min_ix, infea_cols)] = 0
PassSet[(min_ix, infea_cols)] = False

elif fea_subcols.size > 0:
fea_cols = not_opt_cols[fea_subcols]

X[:, fea_cols] = Z[:, fea_subcols]
Y[:, fea_cols] = AtA.dot(X[:, fea_cols]) - AtB[:, fea_cols]

Y[abs(Y) < 1e-12] = 0

not_opt_subset = np.logical_and(
Y[:, fea_cols] < 0, ~PassSet[:, fea_cols])
new_opt_cols = fea_cols[np.all(~not_opt_subset, axis=0)]
update_cols = fea_cols[np.any(not_opt_subset, axis=0)]

if update_cols.size > 0:
val = Y[:, update_cols] * ~PassSet[:, update_cols]
min_ix = np.argmin(val, axis=0)
PassSet[(min_ix, update_cols)] = True

not_opt_set[new_opt_cols] = False
not_opt_cols = not_opt_set.nonzero()[0]

return X, (success, Y, num_cholesky, num_eq)
55 changes: 25 additions & 30 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,29 +1,28 @@
[build-system]
build-backend = "pdm.pep517.api"
requires = [
"pdm-pep517>=1.0.0",
]
build-backend = "pdm.pep517.api"
requires = ["pdm-pep517>=1.0.0"]

[tool.black]
exclude = "/(\n \\.eggs\n | \\.git\n | \\.hg\n | \\.mypy_cache\n | \\.nox\n | \\.tox\n | \\.venv\n | _build\n | buck-out\n | build\n | dist\n)/\n"
include = "\\.pyi?$"
exclude = "/(\n \\.eggs\n | \\.git\n | \\.hg\n | \\.mypy_cache\n | \\.nox\n | \\.tox\n | \\.venv\n | _build\n | buck-out\n | build\n | dist\n)/\n"
include = "\\.pyi?$"

[tool.pdm]
package-dir = "src"

package-dir = "src"
[tool.pdm.build]
excludes = ["contrib"]
[project]
authors = [
authors = [
{ name = "Joshua Welch", email = "[email protected]" },
{ name = "Lu Lu", email = "[email protected]" },
]
classifiers = [
]
classifiers = [
"Development Status :: 4 - Beta",
"License :: OSI Approved :: MIT License",
"Natural Language :: English",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3.8",
]
dependencies = [
]
dependencies = [
"adjustText>=0.7.3",
"anndata>=0.8.0",
"annoy",
Expand All @@ -44,22 +43,18 @@ dependencies = [
"scikit-learn",
"seaborn>=0.12.2",
"umap-learn",
]
description = "The Python version of LIGER package."
keywords = [
"LIGER",
]
maintainers = [
{ name = "Andrew Robbins", email = "[email protected]" },
]
name = "pyliger"
readme = "README.md"
requires-python = "<3.11, >=3.8"
version = "0.1.1"
]
description = "The Python version of LIGER package."
keywords = ["LIGER"]
maintainers = [{ name = "Andrew Robbins", email = "[email protected]" }]
name = "pyliger"
readme = "README.md"
requires-python = "<3.11, >=3.8"
version = "0.1.1"

[project.license]
text = "MIT"
[project.license]
text = "MIT"

[project.urls]
homepage = "https://welch-lab.github.io"
repository = "https://github.com/welch-lab/pyliger"
[project.urls]
homepage = "https://welch-lab.github.io"
repository = "https://github.com/welch-lab/pyliger"

0 comments on commit f6aeafc

Please sign in to comment.