Skip to content

Commit

Permalink
adding changes to pass tests
Browse files Browse the repository at this point in the history
  • Loading branch information
nandsra21 committed Mar 15, 2024
1 parent d155194 commit 5f85e39
Showing 1 changed file with 16 additions and 5 deletions.
21 changes: 16 additions & 5 deletions src/pathogen_embed/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,17 @@
from sys import argv
from .pathogen_embed import embed, distance, cluster

class AutoOrFloatAction(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
if values == "auto":
setattr(namespace, self.dest, "auto")
else:
try:
setattr(namespace, self.dest, float(values))
except ValueError:
raise argparse.ArgumentTypeError(f"Invalid value: {values}. Must be a float or 'auto'.")


def make_parser_embed():
parser = argparse.ArgumentParser(description = "Reduced dimension embeddings for pathogen sequences", formatter_class=argparse.ArgumentDefaultsHelpFormatter)

Expand All @@ -20,21 +31,21 @@ def make_parser_embed():
required=True
)

pca = subparsers.add_parser("pca", description="Principal Component Analysis")
pca = subparsers.add_parser("pca", description="Principal Component Analysis", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
pca.add_argument("--components", default=10, type=int, help="the number of components for PCA")
pca.add_argument("--explained-variance", help="the path for the CSV explained variance for each component")

tsne = subparsers.add_parser("t-sne", description="t-distributed Stochastic Neighborhood Embedding")
tsne = subparsers.add_parser("t-sne", description="t-distributed Stochastic Neighborhood Embedding", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
tsne.add_argument("--components", default=2, type=int, help="the number of components for t-SNE")
tsne.add_argument("--perplexity", default=30.0, type=float, help="The perplexity is related to the number of nearest neighbors. Because of this, the size of the dataset is proportional to the best perplexity value (large dataset -> large perplexity). Values between 5 and 50 work best. The default value is the value consistently the best for pathogen analyses, results from an exhaustive grid search.")
tsne.add_argument("--learning-rate", default=200.0, type=float, help="The learning rate for t-SNE is usually between 10.0 and 1000.0. Values out of these bounds may create innacurate results. The default value is the value consistently the best for pathogen analyses, results from an exhaustive grid search.")
tsne.add_argument("--learning-rate", default="auto", type=AutoOrFloatAction, help="The learning rate for t-SNE is usually between 10.0 and 1000.0. Values out of these bounds may create innacurate results. The default value is the value consistently the best for pathogen analyses, results from an exhaustive grid search.")

umap = subparsers.add_parser("umap", description="Uniform Manifold Approximation and Projection")
umap = subparsers.add_parser("umap", description="Uniform Manifold Approximation and Projection", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
umap.add_argument("--components", default=2, type=int, help="the number of components for UMAP")
umap.add_argument("--nearest-neighbors", default=200, type=int, help="Nearest neighbors controls how UMAP balances local versus global structure in the data (finer detail patterns versus global structure). This value is proportional to the size of the data (large dataset -> large nearest neighbors). The default value is the value consistently the best for pathogen analyses, results from an exhaustive grid search.")
umap.add_argument("--min-dist", default=.5, type=float, help="Minimum Distance controls how tightly packed the UMAP embedding is. While it does not change the structure of the data, it does change the embedding's shape. The default value is the value consistently the best for pathogen analyses, results from an exhaustive grid search.")

mds = subparsers.add_parser("mds", description="Multidimensional Scaling")
mds = subparsers.add_parser("mds", description="Multidimensional Scaling", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
mds.add_argument("--components", default=10, type=int, help="the number of components for MDS")
mds.add_argument("--stress", help="the path for the CSV stress for the embedding")

Expand Down

0 comments on commit 5f85e39

Please sign in to comment.