forked from serrano-pozo-lab/nd-diff-expr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
references.bib
275 lines (258 loc) · 28.1 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
@article{wang_detecting_2012,
title = {Detecting disease-associated genes with confounding variable adjustment and the impact on genomic meta-analysis: {With} application to major depressive disorder},
volume = {13},
issn = {1471-2105},
shorttitle = {Detecting disease-associated genes with confounding variable adjustment and the impact on genomic meta-analysis},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3342232/},
doi = {10.1186/1471-2105-13-52},
abstract = {Background
Detecting candidate markers in transcriptomic studies often encounters difficulties in complex diseases, particularly when overall signals are weak and sample size is small. Covariates including demographic, clinical and technical variables are often confounded with the underlying disease effects, which further hampers accurate biomarker detection. Our motivating example came from an analysis of five microarray studies in major depressive disorder (MDD), a heterogeneous psychiatric illness with mostly uncharacterized genetic mechanisms.
Results
We applied a random intercept model to account for confounding variables and case-control paired design. A variable selection scheme was developed to determine the effective confounders in each gene. Meta-analysis methods were used to integrate information from five studies and post hoc analyses enhanced biological interpretations. Simulations and application results showed that the adjustment for confounding variables and meta-analysis improved detection of biomarkers and associated pathways.
Conclusions
The proposed framework simultaneously considers correction for confounding variables, selection of effective confounders, random effects from paired design and integration by meta-analysis. The approach improved disease-related biomarker and pathway detection, which greatly enhanced understanding of MDD neurobiology. The statistical framework can be applied to similar experimental design encountered in other complex and heterogeneous diseases.},
urldate = {2020-10-12},
journal = {BMC Bioinformatics},
author = {Wang, Xingbin and Lin, Yan and Song, Chi and Sibille, Etienne and Tseng, George C},
month = mar,
year = {2012},
pmid = {22458711},
pmcid = {PMC3342232},
pages = {52},
file = {PubMed Central Full Text PDF:C\:\\Users\\ayush\\Zotero\\storage\\UQSJDRD8\\Wang et al. - 2012 - Detecting disease-associated genes with confoundin.pdf:application/pdf},
}
@article{walsh_microarray_2015,
title = {Microarray {Meta}-{Analysis} and {Cross}-{Platform} {Normalization}: {Integrative} {Genomics} for {Robust} {Biomarker} {Discovery}},
volume = {4},
issn = {2076-3905},
shorttitle = {Microarray {Meta}-{Analysis} and {Cross}-{Platform} {Normalization}},
doi = {10.3390/microarrays4030389},
abstract = {The diagnostic and prognostic potential of the vast quantity of publicly-available microarray data has driven the development of methods for integrating the data from different microarray platforms. Cross-platform integration, when appropriately implemented, has been shown to improve reproducibility and robustness of gene signature biomarkers. Microarray platform integration can be conceptually divided into approaches that perform early stage integration (cross-platform normalization) versus late stage data integration (meta-analysis). A growing number of statistical methods and associated software for platform integration are available to the user, however an understanding of their comparative performance and potential pitfalls is critical for best implementation. In this review we provide evidence-based, practical guidance to researchers performing cross-platform integration, particularly with an objective to discover biomarkers.},
language = {eng},
number = {3},
journal = {Microarrays (Basel, Switzerland)},
author = {Walsh, Christopher J. and Hu, Pingzhao and Batt, Jane and Santos, Claudia C. Dos},
month = aug,
year = {2015},
pmid = {27600230},
pmcid = {PMC4996376},
keywords = {biomarker, meta-analysis, microarray platform, normalization},
pages = {389--406},
file = {Full Text:C\:\\Users\\ayush\\Zotero\\storage\\MBWBL5G3\\Walsh et al. - 2015 - Microarray Meta-Analysis and Cross-Platform Normal.pdf:application/pdf},
}
@article{leek_capturing_2007,
title = {Capturing {Heterogeneity} in {Gene} {Expression} {Studies} by {Surrogate} {Variable} {Analysis}},
volume = {3},
issn = {1553-7404},
url = {https://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.0030161},
doi = {10.1371/journal.pgen.0030161},
abstract = {It has unambiguously been shown that genetic, environmental, demographic, and technical factors may have substantial effects on gene expression levels. In addition to the measured variable(s) of interest, there will tend to be sources of signal due to factors that are unknown, unmeasured, or too complicated to capture through simple models. We show that failing to incorporate these sources of heterogeneity into an analysis can have widespread and detrimental effects on the study. Not only can this reduce power or induce unwanted dependence across genes, but it can also introduce sources of spurious signal to many genes. This phenomenon is true even for well-designed, randomized studies. We introduce âsurrogate variable analysisâ (SVA) to overcome the problems caused by heterogeneity in expression studies. SVA can be applied in conjunction with standard analysis techniques to accurately capture the relationship between expression and any modeled variables of interest. We apply SVA to disease class, time course, and genetics of gene expression studies. We show that SVA increases the biological accuracy and reproducibility of analyses in genome-wide expression studies.},
language = {en},
number = {9},
urldate = {2020-10-12},
journal = {PLOS Genetics},
author = {Leek, Jeffrey T. and Storey, John D.},
month = sep,
year = {2007},
note = {Publisher: Public Library of Science},
keywords = {Algorithms, Breast cancer, Gene expression, Genetic causes of cancer, Genetic loci, Genomics, Microarrays, Singular value decomposition},
pages = {e161},
file = {Snapshot:C\:\\Users\\ayush\\Zotero\\storage\\B8HUPWFA\\article.html:text/html;Full Text PDF:C\:\\Users\\ayush\\Zotero\\storage\\G7LRFYQ4\\Leek and Storey - 2007 - Capturing Heterogeneity in Gene Expression Studies.pdf:application/pdf},
}
@article{leek_sva_2012,
title = {The sva package for removing batch effects and other unwanted variation in high-throughput experiments},
volume = {28},
issn = {1367-4803},
url = {https://academic.oup.com/bioinformatics/article/28/6/882/311263},
doi = {10.1093/bioinformatics/bts034},
abstract = {Abstract. Summary: Heterogeneity and latent variables are now widely recognized as major sources of bias and variability in high-throughput experiments. The mo},
language = {en},
number = {6},
urldate = {2020-10-12},
journal = {Bioinformatics},
author = {Leek, Jeffrey T. and Johnson, W. Evan and Parker, Hilary S. and Jaffe, Andrew E. and Storey, John D.},
month = mar,
year = {2012},
note = {Publisher: Oxford Academic},
pages = {882--883},
file = {Snapshot:C\:\\Users\\ayush\\Zotero\\storage\\QEVT4SGB\\311263.html:text/html;Full Text PDF:C\:\\Users\\ayush\\Zotero\\storage\\UHDME6NG\\Leek et al. - 2012 - The sva package for removing batch effects and oth.pdf:application/pdf},
}
@article{kauffmann_microarray_2010,
title = {Microarray data quality control improves the detection of differentially expressed genes},
volume = {95},
issn = {0888-7543},
url = {http://www.sciencedirect.com/science/article/pii/S0888754310000042},
doi = {10.1016/j.ygeno.2010.01.003},
abstract = {Microarrays have become a routine tool for biomedical research. Data quality assessment is an essential part of the analysis, but it is still not easy to perform objectively or in an automated manner, and as a result it is often neglected. Here, we compared two strategies of array-level quality control using five publicly available microarray experiments: outlier removal and array weights. We also compared them against no outlier removal and random array removal. We find that removing outlier arrays can improve the signal-to-noise ratio and thus strengthen the power of detecting differentially expressed genes. Using array weights is similarly effective, but its applicability is more limited. The quality metrics presented here are implemented in the Bioconductor package arrayQualityMetrics.},
language = {en},
number = {3},
urldate = {2020-10-12},
journal = {Genomics},
author = {Kauffmann, Audrey and Huber, Wolfgang},
month = mar,
year = {2010},
keywords = {Microarray, Outlier, Quality},
pages = {138--142},
file = {ScienceDirect Full Text PDF:C\:\\Users\\ayush\\Zotero\\storage\\LXE3X2M3\\Kauffmann and Huber - 2010 - Microarray data quality control improves the detec.pdf:application/pdf;ScienceDirect Snapshot:C\:\\Users\\ayush\\Zotero\\storage\\938LR6IT\\S0888754310000042.html:text/html},
}
@article{kauffmann_arrayqualitymetrics_2009,
title = {{arrayQualityMetrics} - a {Bioconductor} {Package} for {Quality} {Assessment} of {Microarray} {Data}},
volume = {25},
issn = {1367-4803},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2639074/},
doi = {10.1093/bioinformatics/btn647},
abstract = {Summary:: The assessment of data quality is a major concern in microarray analysis. arrayQualityMetrics is a Bioconductor package that provides a report with diagnostic plots for one or two colour microarray data. The quality metrics assess reproducibility, identify apparent outlier arrays and compute measures of signal-to-noise ratio. The tool handles most current microarray technologies and is amenable to use in automated analysis pipelines or for automatic report generation, as well as for use by individuals. The diagnosis of quality remains, in principle, a context-dependent judgement, but our tool provides powerful, automated, objective and comprehensive instruments on which to base a decision., Availability:: arrayQualityMetrics is a free and open source package, under LGPL license, available from the Bioconductor project at www.bioconductor.org. A users guide and examples are provided with the package. Some examples of HTML reports generated by arrayQualityMetrics can be found at http://www.microarray-quality.org, Contact:: [email protected], Supplementary information:: Supplementary data are available at Bioinformatics online.},
number = {3},
urldate = {2020-10-12},
journal = {Bioinformatics},
author = {Kauffmann, Audrey and Gentleman, Robert and Huber, Wolfgang},
month = feb,
year = {2009},
pmid = {19106121},
pmcid = {PMC2639074},
pages = {415--416},
file = {PubMed Central Full Text PDF:C\:\\Users\\ayush\\Zotero\\storage\\EUFBDWCZ\\Kauffmann et al. - 2009 - arrayQualityMetricsâa bioconductor package for qua.pdf:application/pdf},
}
@article{ritchie_limma_2015,
title = {limma powers differential expression analyses for {RNA}-sequencing and microarray studies},
volume = {43},
issn = {0305-1048},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4402510/},
doi = {10.1093/nar/gkv007},
abstract = {limma is an R/Bioconductor software package that provides an integrated solution for analysing data from gene expression experiments. It contains rich features for handling complex experimental designs and for information borrowing to overcome the problem of small sample sizes. Over the past decade, limma has been a popular choice for gene discovery through differential expression analyses of microarray and high-throughput PCR data. The package contains particularly strong facilities for reading, normalizing and exploring such data. Recently, the capabilities of limma have been significantly expanded in two important directions. First, the package can now perform both differential expression and differential splicing analyses of RNA sequencing (RNA-seq) data. All the downstream analysis tools previously restricted to microarray data are now available for RNA-seq as well. These capabilities allow users to analyse both RNA-seq and microarray data with very similar pipelines. Second, the package is now able to go past the traditional gene-wise expression analyses in a variety of ways, analysing expression profiles in terms of co-regulated sets of genes or in terms of higher-order expression signatures. This provides enhanced possibilities for biological interpretation of gene expression differences. This article reviews the philosophy and design of the limma package, summarizing both new and historical features, with an emphasis on recent enhancements and features that have not been previously described.},
number = {7},
urldate = {2020-06-03},
journal = {Nucleic Acids Research},
author = {Ritchie, Matthew E. and Phipson, Belinda and Wu, Di and Hu, Yifang and Law, Charity W. and Shi, Wei and Smyth, Gordon K.},
month = apr,
year = {2015},
pmid = {25605792},
pmcid = {PMC4402510},
pages = {e47},
file = {PubMed Central Full Text PDF:C\:\\Users\\ayush\\Zotero\\storage\\69TZ6DT4\\Ritchie et al. - 2015 - limma powers differential expression analyses for .pdf:application/pdf;Snapshot:C\:\\Users\\ayush\\Zotero\\storage\\2C4WBST2\\2414268.html:text/html;Full Text PDF:C\:\\Users\\ayush\\Zotero\\storage\\BIY247E8\\Ritchie et al. - 2015 - limma powers differential expression analyses for .pdf:application/pdf;PubMed Central Full Text PDF:C\:\\Users\\ayush\\Zotero\\storage\\2HWWDB8W\\Ritchie et al. - 2015 - limma powers differential expression analyses for .pdf:application/pdf},
}
@article{carvalho_framework_2010,
title = {A framework for oligonucleotide microarray preprocessing},
volume = {26},
issn = {1367-4803},
url = {https://academic.oup.com/bioinformatics/article/26/19/2363/228760},
doi = {10.1093/bioinformatics/btq431},
abstract = {Abstract. Motivation: The availability of flexible open source software for the analysis of gene expression raw level data has greatly facilitated the developm},
language = {en},
number = {19},
urldate = {2020-10-12},
journal = {Bioinformatics},
author = {Carvalho, Benilton S. and Irizarry, Rafael A.},
month = oct,
year = {2010},
note = {Publisher: Oxford Academic},
pages = {2363--2367},
file = {Snapshot:C\:\\Users\\ayush\\Zotero\\storage\\QG8A5M2J\\228760.html:text/html;Full Text PDF:C\:\\Users\\ayush\\Zotero\\storage\\VA4CZAXJ\\Carvalho and Irizarry - 2010 - A framework for oligonucleotide microarray preproc.pdf:application/pdf},
}
@article{irizarry_exploration_2003,
title = {Exploration, normalization, and summaries of high density oligonucleotide array probe level data},
volume = {4},
issn = {1465-4644},
doi = {10.1093/biostatistics/4.2.249},
abstract = {In this paper we report exploratory analyses of high-density oligonucleotide array data from the Affymetrix GeneChip system with the objective of improving upon currently used measures of gene expression. Our analyses make use of three data sets: a small experimental study consisting of five MGU74A mouse GeneChip arrays, part of the data from an extensive spike-in study conducted by Gene Logic and Wyeth's Genetics Institute involving 95 HG-U95A human GeneChip arrays; and part of a dilution study conducted by Gene Logic involving 75 HG-U95A GeneChip arrays. We display some familiar features of the perfect match and mismatch probe (PM and MM) values of these data, and examine the variance-mean relationship with probe-level data from probes believed to be defective, and so delivering noise only. We explain why we need to normalize the arrays to one another using probe level intensities. We then examine the behavior of the PM and MM using spike-in data and assess three commonly used summary measures: Affymetrix's (i) average difference (AvDiff) and (ii) MAS 5.0 signal, and (iii) the Li and Wong multiplicative model-based expression index (MBEI). The exploratory data analyses of the probe level data motivate a new summary measure that is a robust multi-array average (RMA) of background-adjusted, normalized, and log-transformed PM values. We evaluate the four expression summary measures using the dilution study data, assessing their behavior in terms of bias, variance and (for MBEI and RMA) model fit. Finally, we evaluate the algorithms in terms of their ability to detect known levels of differential expression using the spike-in data. We conclude that there is no obvious downside to using RMA and attaching a standard error (SE) to this quantity using a linear model which removes probe-specific affinities.},
language = {eng},
number = {2},
journal = {Biostatistics (Oxford, England)},
author = {Irizarry, Rafael A. and Hobbs, Bridget and Collin, Francois and Beazer-Barclay, Yasmin D. and Antonellis, Kristen J. and Scherf, Uwe and Speed, Terence P.},
month = apr,
year = {2003},
pmid = {12925520},
keywords = {Algorithms, Animals, Data Interpretation, Statistical, DNA Probes, Gene Expression Profiling, Humans, Linear Models, Mice, Normal Distribution, Oligonucleotide Array Sequence Analysis, Reproducibility of Results, Statistics, Nonparametric},
pages = {249--264},
file = {Full Text:C\:\\Users\\ayush\\Zotero\\storage\\ZI3KYYP3\\Irizarry et al. - 2003 - Exploration, normalization, and summaries of high .pdf:application/pdf},
}
@article{maglott_entrez_2007,
title = {Entrez {Gene}: gene-centered information at {NCBI}},
volume = {35},
issn = {0305-1048},
shorttitle = {Entrez {Gene}},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1761442/},
doi = {10.1093/nar/gkl993},
abstract = {Entrez Gene () is NCBI's database for gene-specific information. Entrez Gene includes records from genomes that have been completely sequenced, that have an active research community to contribute gene-specific information or that are scheduled for intense sequence analysis. The content of Entrez Gene represents the result of both curation and automated integration of data from NCBI's Reference Sequence project (RefSeq), from collaborating model organism databases and from other databases within NCBI. Records in Entrez Gene are assigned unique, stable and tracked integers as identifiers. The content (nomenclature, map location, gene products and their attributes, markers, phenotypes and links to citations, sequences, variation details, maps, expression, homologs, protein domains and external databases) is provided via interactive browsing through NCBI's Entrez system, via NCBI's Entrez programing utilities (E-Utilities), and for bulk transfer by ftp.},
number = {Database issue},
urldate = {2019-10-08},
journal = {Nucleic Acids Research},
author = {Maglott, Donna and Ostell, Jim and Pruitt, Kim D. and Tatusova, Tatiana},
month = jan,
year = {2007},
pmid = {17148475},
pmcid = {PMC1761442},
pages = {D26--D31},
file = {PubMed Central Full Text PDF:C\:\\Users\\ayush\\Zotero\\storage\\9LAP563I\\Maglott et al. - 2007 - Entrez Gene gene-centered information at NCBI.pdf:application/pdf},
}
@article{phipson_robust_2016,
title = {Robust hyperparameter estimation protects against hypervariable genes and improves power to detect differential expression},
volume = {10},
issn = {1932-6157},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5373812/},
doi = {10.1214/16-AOAS920},
abstract = {One of the most common analysis tasks in genomic research is to identify genes that are differentially expressed (DE) between experimental conditions. Empirical Bayes (EB) statistical tests using moderated genewise variances have been very effective for this purpose, especially when the number of biological replicate samples is small. The EB procedures can however be heavily influenced by a small number of genes with very large or very small variances. This article improves the differential expression tests by robustifying the hyperparameter estimation procedure. The robust procedure has the effect of decreasing the informativeness of the prior distribution for outlier genes while increasing its informativeness for other genes. This effect has the double benefit of reducing the chance that hypervariable genes will be spuriously identified as DE while increasing statistical power for the main body of genes. The robust EB algorithm is fast and numerically stable. The procedure allows exact small-sample null distributions for the test statistics and reduces exactly to the original EB procedure when no outlier genes are present. Simulations show that the robustified tests have similar performance to the original tests in the absence of outlier genes but have greater power and robustness when outliers are present. The article includes case studies for which the robust method correctly identifies and downweights genes associated with hidden covariates and detects more genes likely to be scientifically relevant to the experimental conditions. The new procedure is implemented in the limma software package freely available from the Bioconductor repository.},
number = {2},
urldate = {2019-10-06},
journal = {The annals of applied statistics},
author = {Phipson, Belinda and Lee, Stanley and Majewski, Ian J. and Alexander, Warren S. and Smyth, Gordon K.},
month = jun,
year = {2016},
pmid = {28367255},
pmcid = {PMC5373812},
pages = {946--963},
file = {PubMed Central Full Text PDF:C\:\\Users\\ayush\\Zotero\\storage\\CUGA658D\\Phipson et al. - 2016 - ROBUST HYPERPARAMETER ESTIMATION PROTECTS AGAINST .pdf:application/pdf},
}
@misc{blighe_enhancedvolcano_2019,
title = {{EnhancedVolcano}: {Publication}-ready volcano plots with enhanced colouring and labeling},
url = {https://github.com/kevinblighe/EnhancedVolcano},
author = {Blighe, Kevin},
year = {2019},
}
@book{wickham_ggplot2_2016,
address = {Cham},
edition = {2nd ed. 2016},
series = {Use {R}!},
title = {ggplot2: {Elegant} {Graphics} for {Data} {Analysis}},
isbn = {978-3-319-24277-4},
shorttitle = {ggplot2},
abstract = {This new edition to the classic book by ggplot2 creator Hadley Wickham highlights compatibility with knitr and RStudio. ggplot2 is a data visualization package for R that helps users create data graphics, including those that are multi-layered, with ease. With ggplot2, it's easy to: - produce handsome, publication-quality plots with automatic legends created from the plot specification - superimpose multiple layers (points, lines, maps, tiles, box plots) from different data sources with automatically adjusted common scales - add customizable smoothers that use powerful modeling capabilities of R, such as loess, linear models, generalized additive models, and robust regression - save any ggplot2 plot (or part thereof) for later modification or reuse - create custom themes that capture in-house or journal style requirements and that can easily be applied to multiple plots - approach a graph from a visual perspective, thinking about how each component of the data is represented on the final plot This book will be useful to everyone who has struggled with displaying data in an informative and attractive way. Some basic knowledge of R is necessary (e.g., importing data into R). ggplot2 is a mini-language specifically tailored for producing graphics, and you'll learn everything you need in the book. After reading this book you'll be able to produce graphics customized precisely for your problems, and you'll find it easy to get graphics out of your head and on to the screen or page. New to this edition:{\textless} - Brings the book up-to-date with ggplot2 1.0, including major updates to the theme system - New scales, stats and geoms added throughout - Additional practice exercises - A revised introduction that focuses on ggplot() instead of qplot() - Updated chapters on data and modeling using tidyr, dplyr and broom},
publisher = {Springer International Publishing : Imprint: Springer},
author = {Wickham, Hadley},
year = {2016},
doi = {10.1007/978-3-319-24277-4},
keywords = {Computer graphics, Computer Graphics, Mathematics, Statistics, Statistics and Computing/Statistics Programs, Visualization},
}
@article{noori_systematic_2020,
title = {Systematic review and meta-analysis of human transcriptomics reveals neuroinflammation, deficient energy metabolism, and proteostasis failure across neurodegeneration},
volume = {149},
issn = {1095-953X},
doi = {10.1016/j.nbd.2020.105225},
abstract = {Neurodegenerative disorders such as Alzheimer's disease (AD), Lewy body diseases (LBD), and the amyotrophic lateral sclerosis and frontotemporal dementia (ALS-FTD) spectrum are defined by the accumulation of specific misfolded protein aggregates. However, the mechanisms by which each proteinopathy leads to neurodegeneration remain elusive. We hypothesized that there is a common "pan-neurodegenerative" gene expression signature driving pathophysiology across these clinically and pathologically diverse proteinopathies. To test this hypothesis, we performed a systematic review of human CNS transcriptomics datasets from AD, LBD, and ALS-FTD patients and age-matched controls in the Gene Expression Omnibus (GEO) and ArrayExpress databases, followed by consistent processing of each dataset, meta-analysis, pathway enrichment, and overlap analyses. After applying pre-specified eligibility criteria and stringent data pre-processing, a total of 2600 samples from 26 AD, 21 LBD, and 13 ALS-FTD datasets were included in the meta-analysis. The pan-neurodegenerative gene signature is characterized by an upregulation of innate immunity, cytoskeleton, and transcription and RNA processing genes, and a downregulation of the mitochondrial electron transport chain. Pathway enrichment analyses also revealed the upregulation of neuroinflammation (including Toll-like receptor, TNF, and NFκB signaling) and phagocytosis, and the downregulation of mitochondrial oxidative phosphorylation, lysosomal acidification, and ubiquitin-proteasome pathways. Our findings suggest that neuroinflammation and a failure in both neuronal energy metabolism and protein degradation systems are consistent features underlying neurodegenerative diseases, despite differences in the extent of neuronal loss and brain regions involved.},
language = {eng},
journal = {Neurobiology of Disease},
author = {Noori, Ayush and Mezlini, Aziz M. and Hyman, Bradley T. and Serrano-Pozo, Alberto and Das, Sudeshna},
month = dec,
year = {2020},
pmid = {33347974},
keywords = {Alzheimer's disease, Amyotrophic lateral sclerosis, Frontotemporal dementia, Lewy body diseases, Meta-analysis, Mitochondrial energy metabolism, Neurodegeneration, Neuroinflammation, Proteostasis, Transcriptomics},
pages = {105225},
}
@article{leek_asymptotic_2011,
title = {Asymptotic conditional singular value decomposition for high-dimensional genomic data},
volume = {67},
issn = {1541-0420},
doi = {10.1111/j.1541-0420.2010.01455.x},
abstract = {High-dimensional data, such as those obtained from a gene expression microarray or second generation sequencing experiment, consist of a large number of dependent features measured on a small number of samples. One of the key problems in genomics is the identification and estimation of factors that associate with many features simultaneously. Identifying the number of factors is also important for unsupervised statistical analyses such as hierarchical clustering. A conditional factor model is the most common model for many types of genomic data, ranging from gene expression, to single nucleotide polymorphisms, to methylation. Here we show that under a conditional factor model for genomic data with a fixed sample size, the right singular vectors are asymptotically consistent for the unobserved latent factors as the number of features diverges. We also propose a consistent estimator of the dimension of the underlying conditional factor model for a finite fixed sample size and an infinite number of features based on a scaled eigen-decomposition. We propose a practical approach for selection of the number of factors in real data sets, and we illustrate the utility of these results for capturing batch and other unmodeled effects in a microarray experiment using the dependence kernel approach of Leek and Storey (2008, Proceedings of the National Academy of Sciences of the United States of America 105, 18718-18723).},
language = {eng},
number = {2},
journal = {Biometrics},
author = {Leek, Jeffrey T.},
month = jun,
year = {2011},
pmid = {20560929},
pmcid = {PMC3165001},
keywords = {Genomics, Models, Statistical, Oligonucleotide Array Sequence Analysis, Sample Size},
pages = {344--352},
file = {Accepted Version:C\:\\Users\\ayush\\Zotero\\storage\\QYF2AUHF\\Leek - 2011 - Asymptotic conditional singular value decompositio.pdf:application/pdf},
}