From 5f85e39134a0a4ff423c7e228615a7a405a8d024 Mon Sep 17 00:00:00 2001 From: Sravani Nanduri Date: Fri, 15 Mar 2024 13:40:21 -0700 Subject: [PATCH] adding changes to pass tests --- src/pathogen_embed/__main__.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/pathogen_embed/__main__.py b/src/pathogen_embed/__main__.py index 41bad60..c29d36f 100644 --- a/src/pathogen_embed/__main__.py +++ b/src/pathogen_embed/__main__.py @@ -3,6 +3,17 @@ from sys import argv from .pathogen_embed import embed, distance, cluster +class AutoOrFloatAction(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + if values == "auto": + setattr(namespace, self.dest, "auto") + else: + try: + setattr(namespace, self.dest, float(values)) + except ValueError: + raise argparse.ArgumentTypeError(f"Invalid value: {values}. Must be a float or 'auto'.") + + def make_parser_embed(): parser = argparse.ArgumentParser(description = "Reduced dimension embeddings for pathogen sequences", formatter_class=argparse.ArgumentDefaultsHelpFormatter) @@ -20,21 +31,21 @@ def make_parser_embed(): required=True ) - pca = subparsers.add_parser("pca", description="Principal Component Analysis") + pca = subparsers.add_parser("pca", description="Principal Component Analysis", formatter_class=argparse.ArgumentDefaultsHelpFormatter) pca.add_argument("--components", default=10, type=int, help="the number of components for PCA") pca.add_argument("--explained-variance", help="the path for the CSV explained variance for each component") - tsne = subparsers.add_parser("t-sne", description="t-distributed Stochastic Neighborhood Embedding") + tsne = subparsers.add_parser("t-sne", description="t-distributed Stochastic Neighborhood Embedding", formatter_class=argparse.ArgumentDefaultsHelpFormatter) tsne.add_argument("--components", default=2, type=int, help="the number of components for t-SNE") tsne.add_argument("--perplexity", default=30.0, type=float, help="The perplexity is related to the number of nearest neighbors. Because of this, the size of the dataset is proportional to the best perplexity value (large dataset -> large perplexity). Values between 5 and 50 work best. The default value is the value consistently the best for pathogen analyses, results from an exhaustive grid search.") - tsne.add_argument("--learning-rate", default=200.0, type=float, help="The learning rate for t-SNE is usually between 10.0 and 1000.0. Values out of these bounds may create innacurate results. The default value is the value consistently the best for pathogen analyses, results from an exhaustive grid search.") + tsne.add_argument("--learning-rate", default="auto", type=AutoOrFloatAction, help="The learning rate for t-SNE is usually between 10.0 and 1000.0. Values out of these bounds may create innacurate results. The default value is the value consistently the best for pathogen analyses, results from an exhaustive grid search.") - umap = subparsers.add_parser("umap", description="Uniform Manifold Approximation and Projection") + umap = subparsers.add_parser("umap", description="Uniform Manifold Approximation and Projection", formatter_class=argparse.ArgumentDefaultsHelpFormatter) umap.add_argument("--components", default=2, type=int, help="the number of components for UMAP") umap.add_argument("--nearest-neighbors", default=200, type=int, help="Nearest neighbors controls how UMAP balances local versus global structure in the data (finer detail patterns versus global structure). This value is proportional to the size of the data (large dataset -> large nearest neighbors). The default value is the value consistently the best for pathogen analyses, results from an exhaustive grid search.") umap.add_argument("--min-dist", default=.5, type=float, help="Minimum Distance controls how tightly packed the UMAP embedding is. While it does not change the structure of the data, it does change the embedding's shape. The default value is the value consistently the best for pathogen analyses, results from an exhaustive grid search.") - mds = subparsers.add_parser("mds", description="Multidimensional Scaling") + mds = subparsers.add_parser("mds", description="Multidimensional Scaling", formatter_class=argparse.ArgumentDefaultsHelpFormatter) mds.add_argument("--components", default=10, type=int, help="the number of components for MDS") mds.add_argument("--stress", help="the path for the CSV stress for the embedding")