@Manual{R, address = {Vienna, Austria}, annote = {\{ISBN\} 3-900051-07-0}, author = {{R Development Core Team}}, organization = {R Foundation for Statistical Computing}, title = {{R: A Language and Environment for Statistical Computing}}, url = {http://www.r-project.org}, year = {2011}, } @Article{Gentleman2004, abstract = {The Bioconductor project is an initiative for the collaborative creation of extensible software for computational biology and bioinformatics. The goals of the project include: fostering collaborative development and widespread use of innovative software, reducing barriers to entry into interdisciplinary scientific research, and promoting the achievement of remote reproducibility of research results. We describe details of our aims and methods, identify current challenges, compare Bioconductor to other open bioinformatics projects, and provide working examples.}, author = {Robert C Gentleman and Vincent J Carey and Douglas M Bates and Ben Bolstad and Marcel Dettling and Sandrine Dudoit and Byron Ellis and Laurent Gautier and Yongchao Ge and Jeff Gentry and Kurt Hornik and Torsten Hothorn and Wolfgang Huber and Stefano Iacus and Rafael Irizarry and Friedrich Leisch and Cheng Li and Martin Maechler and Anthony J Rossini and Gunther Sawitzki and Colin Smith and Gordon Smyth and Luke Tierney and Jean Y H Yang and Jianhua Zhang}, doi = {10.1186/gb-2004-5-10-r80}, issn = {1465-6914}, journal = {Genome biology}, keywords = {Computational Biology,Computational Biology: instrumentation,Computational Biology: methods,Internet,Reproducibility of Results,Software}, number = {10}, pages = {R80}, pmid = {15461798}, shorttitle = {Genome Biol}, title = {{Bioconductor: open software development for computational biology and bioinformatics.}}, url = {http://www.ncbi.nlm.nih.gov/pubmed/15461798}, volume = {5}, year = {2004}, } @Article{Lee2001, author = {D D Lee and HS Seung}, file = {:home/renaud/Documents/articles/NMF/Algorithms for non-negative matrix factorization\_Lee2000.pdf:pdf}, journal = {Advances in neural information processing systems}, title = {{Algorithms for non-negative matrix factorization}}, url = {http://scholar.google.com/scholar?q=intitle:Algorithms+for+non-negative+matrix+factorization\#0}, year = {2001}, } @Article{Li2001, author = {Stan Z Li and Xinwen Hou}, file = {:home/renaud/Documents/articles/NMF/Learning Spatially Localized, Parts-Based Representation\_Li2001.pdf:pdf}, journal = {Convergence}, number = {C}, pages = {1--6}, title = {{Learning Spatially Localized, Parts-Based Representation}}, volume = {00}, year = {2001}, } @Article{Badea2008, abstract = {In this paper we introduce a clustering algorithm capable of simultaneously factorizing two distinct gene expression datasets with the aim of uncovering gene regulatory programs that are common to the two phenotypes. The siNMF algorithm simultaneously searches for two factorizations that share the same gene expression profiles. The two key ingredients of this algorithm are the nonnegativity constraint and the offset variables, which together ensure the sparseness of the factorizations. While cancer is a very heterogeneous disease, there is overwhelming recent evidence that the differences between cancer subtypes implicate entire pathways and biological processes involving large numbers of genes, rather than changes in single genes. We have applied our simultaneous factorization algorithm looking for gene expression profiles that are common between the more homogeneous pancreatic ductal adenocarcinoma (PDAC) and the more heterogeneous colon adenocarcinoma. The fact that the PDAC signature is active in a large fraction of colon adeocarcinoma suggests that the oncogenic mechanisms involved may be similar to those in PDAC, at least in this subset of colon samples. There are many approaches to uncovering common mechanisms involved in different phenotypes, but most are based on comparing gene lists. The approach presented in this paper additionally takes gene expression data into account and can thus be more sensitive.}, author = {Liviu Badea}, file = {:home/renaud/Documents/articles/NMF/Extracting Gene Expression Profiles Common to Colon and Pancreatic Adenocarcinoma Using Simultaneous Nonnegative Matrix Factorization\_Badea2008.pdf:pdf}, issn = {1793-5091}, journal = {Pacific Symposium on Biocomputing. Pacific Symposium on Biocomputing}, keywords = {Adenocarcinoma,Adenocarcinoma: genetics,Algorithms,Carcinoma,Colonic Neoplasms,Colonic Neoplasms: genetics,Computational Biology,Data Interpretation,Databases,Gene Expression Profiling,Gene Expression Profiling: statistics \& numerical,Genetic,Humans,Pancreatic Ductal,Pancreatic Ductal: genetics,Pancreatic Neoplasms,Pancreatic Neoplasms: genetics,Statistical}, month = {jan}, pages = {267--78}, pmid = {18229692}, title = {{Extracting gene expression profiles common to colon and pancreatic adenocarcinoma using simultaneous nonnegative matrix factorization.}}, url = {http://www.ncbi.nlm.nih.gov/pubmed/18229692}, volume = {290}, year = {2008}, } @Article{Zhang2008, abstract = {Independent component analysis (ICA) is a widely applicable and effective approach in blind source separation (BSS), with limitations that sources are statistically independent. However, more common situation is blind source separation for nonnegative linear model (NNLM) where the observations are nonnegative linear combinations of nonnegative sources, and the sources may be statistically dependent. We propose a pattern expression nonnegative matrix factorization (PE-NMF) approach from the view point of using basis vectors most effectively to express patterns. Two regularization or penalty terms are introduced to be added to the original loss function of a standard nonnegative matrix factorization (NMF) for effective expression of patterns with basis vectors in the PE-NMF. Learning algorithm is presented, and the convergence of the algorithm is proved theoretically. Three illustrative examples on blind source separation including heterogeneity correction for gene microarray data indicate that the sources can be successfully recovered with the proposed PE-NMF when the two parameters can be suitably chosen from prior knowledge of the problem.}, author = {Junying Zhang and Le Wei and Xuerong Feng and Zhen Ma and Yue Wang}, doi = {10.1155/2008/168769}, file = {:home/renaud/Documents/articles/NMF/Pattern Expression Nonnegative Matrix Factorization$\backslash$: Algorithm and Applications to Blind Source Separation\_Zhang2008.pdf:pdf}, issn = {1687-5265}, journal = {Computational intelligence and neuroscience}, pages = {168769}, pmid = {18566689}, shorttitle = {Comput Intell Neurosci}, title = {{Pattern expression nonnegative matrix factorization: algorithm and applications to blind source separation.}}, url = {http://www.ncbi.nlm.nih.gov/pubmed/18566689}, volume = {2008}, year = {2008}, } @Article{KimH2007, abstract = {MOTIVATION: Many practical pattern recognition problems require non-negativity constraints. For example, pixels in digital images and chemical concentrations in bioinformatics are non-negative. Sparse non-negative matrix factorizations (NMFs) are useful when the degree of sparseness in the non-negative basis matrix or the non-negative coefficient matrix in an NMF needs to be controlled in approximating high-dimensional data in a lower dimensional space. RESULTS: In this article, we introduce a novel formulation of sparse NMF and show how the new formulation leads to a convergent sparse NMF algorithm via alternating non-negativity-constrained least squares. We apply our sparse NMF algorithm to cancer-class discovery and gene expression data analysis and offer biological analysis of the results obtained. Our experimental results illustrate that the proposed sparse NMF algorithm often achieves better clustering performance with shorter computing time compared to other existing NMF algorithms. AVAILABILITY: The software is available as supplementary material.}, author = {Hyunsoo Kim and Haesun Park}, doi = {10.1093/bioinformatics/btm134}, file = {:home/renaud/Documents/articles/NMF/Sparse non-negative matrix factorizations via alternating non-negativity-constrained least squares for microarray data analysis Kim2007.pdf:pdf}, issn = {1460-2059}, journal = {Bioinformatics (Oxford, England)}, keywords = {Algorithms,Automated,Automated: methods,Cluster Analysis,Computational Biology,Computational Biology: methods,Data Interpretation,Databases,Entropy,Factor Analysis,Gene Expression,Genetic,Humans,Least-Squares Analysis,Microarray Analysis,Neoplasms,Neoplasms: classification,Neoplasms: genetics,Neoplasms: metabolism,Pattern Recognition,Statistical}, number = {12}, pages = {1495--502}, pmid = {17483501}, shorttitle = {Bioinformatics}, title = {{Sparse non-negative matrix factorizations via alternating non-negativity-constrained least squares for microarray data analysis.}}, url = {http://www.ncbi.nlm.nih.gov/pubmed/17483501}, volume = {23}, year = {2007}, } @TechReport{Albright2006, author = {Russell Albright and James Cox and David Duling and Amy N. Langville and C. Meyer}, booktitle = {Matrix}, file = {:home/renaud/Documents/articles/NMF/Algorithms, Initializations, and Convergence for the Nonnegative Matrix Factorization\_Langville2006.pdf:pdf}, institution = {NCSU Technical Report Math 81706. http://meyer. math. ncsu. edu/Meyer/Abstracts/Publications. html}, keywords = {60j22,65b99,65c40,65f10,65f15,65f50,alternating least squares,ams subject classi cations,clustering,convergence crite-,image processing,initializations,nonnegative matrix factorization,rion,text mining}, number = {919}, title = {{Algorithms, initializations, and convergence for the nonnegative matrix factorization}}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.89.2161\&rep=rep1\&type=pdf http://meyer.math.ncsu.edu/Meyer/PS\_Files/NMFInitAlgConv.pdf}, year = {2006}, } @Article{Boutsidis2008, author = {C Boutsidis and E Gallopoulos}, doi = {10.1016/j.patcog.2007.09.010}, file = {:home/renaud/Documents/articles/NMF/SVD based initialization$\backslash$: A head start for nonnegative matrix factorization\_Boutsidis2008.pdf:pdf}, issn = {00313203}, journal = {Pattern Recognition}, month = {apr}, number = {4}, pages = {1350--1362}, title = {{SVD based initialization: A head start for nonnegative matrix factorization}}, url = {http://linkinghub.elsevier.com/retrieve/pii/S0031320307004359}, volume = {41}, year = {2008}, } @Article{Lecuyer2002, author = {Pierre L'Ecuyer and Richard Simard and E.J. Chen}, file = {:home/renaud/Documents/articles/stats/An Object-Oriented Random-Number Package with Many Long Streams and Substreams\_Lecuyer2002.pdf:pdf}, journal = {Operations Research}, number = {6}, pages = {1073--1075}, publisher = {JSTOR}, title = {{An object-oriented random-number package with many long streams and substreams}}, url = {http://www.jstor.org/stable/3088626}, volume = {50}, year = {2002}, } @Article{Hutchins2008, abstract = {MOTIVATION: Cis-acting regulatory elements are frequently constrained by both sequence content and positioning relative to a functional site, such as a splice or polyadenylation site. We describe an approach to regulatory motif analysis based on non-negative matrix factorization (NMF). Whereas existing pattern recognition algorithms commonly focus primarily on sequence content, our method simultaneously characterizes both positioning and sequence content of putative motifs. RESULTS: Tests on artificially generated sequences show that NMF can faithfully reproduce both positioning and content of test motifs. We show how the variation of the residual sum of squares can be used to give a robust estimate of the number of motifs or patterns in a sequence set. Our analysis distinguishes multiple motifs with significant overlap in sequence content and/or positioning. Finally, we demonstrate the use of the NMF approach through characterization of biologically interesting datasets. Specifically, an analysis of mRNA 3'-processing (cleavage and polyadenylation) sites from a broad range of higher eukaryotes reveals a conserved core pattern of three elements.}, author = {Lucie N Hutchins and Sean M Murphy and Priyam Singh and Joel H Graber}, doi = {10.1093/bioinformatics/btn526}, file = {:home/renaud/Documents/articles/NMF/Position-dependent motif characterization using non-negative matrix factorization\_Hutchins2008.pdf:pdf}, issn = {1367-4811}, journal = {Bioinformatics (Oxford, England)}, keywords = {Algorithms,Computational Biology,Computational Biology: methods,Messenger,Messenger: genetics,Messenger: metabolism,RNA,Regulatory Sequences,Ribonucleic Acid,Sequence Analysis}, month = {dec}, number = {23}, pages = {2684--90}, pmid = {18852176}, title = {{Position-dependent motif characterization using non-negative matrix factorization.}}, url = {http://www.ncbi.nlm.nih.gov/pubmed/18852176}, volume = {24}, year = {2008}, } @Article{Frigyesi2008, abstract = {Non-negative matrix factorization (NMF) is a relatively new approach to analyze gene expression data that models data by additive combinations of non-negative basis vectors (metagenes). The non-negativity constraint makes sense biologically as genes may either be expressed or not, but never show negative expression. We applied NMF to five different microarray data sets. We estimated the appropriate number metagens by comparing the residual error of NMF reconstruction of data to that of NMF reconstruction of permutated data, thus finding when a given solution contained more information than noise. This analysis also revealed that NMF could not factorize one of the data sets in a meaningful way. We used GO categories and pre defined gene sets to evaluate the biological significance of the obtained metagenes. By analyses of metagenes specific for the same GO-categories we could show that individual metagenes activated different aspects of the same biological processes. Several of the obtained metagenes correlated with tumor subtypes and tumors with characteristic chromosomal translocations, indicating that metagenes may correspond to specific disease entities. Hence, NMF extracts biological relevant structures of microarray expression data and may thus contribute to a deeper understanding of tumor behavior.}, author = {Attila Frigyesi and Mattias H\"{o}glund}, file = {:home/renaud/Documents/articles/NMF/Non-Negative Matrix Factorization for the Analysis of Complex Gene Expression Data$\backslash$: Identification of Clinically Relevant Tumor Subtypes\_Frigyesi2008.pdf:pdf}, issn = {1176-9351}, journal = {Cancer informatics}, keywords = {gene expression,metagenes,nmf,tumor classifi cation}, month = {jan}, number = {2003}, pages = {275--92}, pmid = {19259414}, title = {{Non-negative matrix factorization for the analysis of complex gene expression data: identification of clinically relevant tumor subtypes.}}, url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2623306/}, volume = {6}, year = {2008}, } @Article{Brunet2004, abstract = {We describe here the use of nonnegative matrix factorization (NMF), an algorithm based on decomposition by parts that can reduce the dimension of expression data from thousands of genes to a handful of metagenes. Coupled with a model selection mechanism, adapted to work for any stochastic clustering algorithm, NMF is an efficient method for identification of distinct molecular patterns and provides a powerful method for class discovery. We demonstrate the ability of NMF to recover meaningful biological information from cancer-related microarray data. NMF appears to have advantages over other methods such as hierarchical clustering or self-organizing maps. We found it less sensitive to a priori selection of genes or initial conditions and able to detect alternative or context-dependent patterns of gene expression in complex biological systems. This ability, similar to semantic polysemy in text, provides a general method for robust molecular pattern discovery.}, author = {Jean-Philippe Brunet and Pablo Tamayo and Todd R Golub and Jill P Mesirov}, doi = {10.1073/pnas.0308531101}, file = {:home/renaud/Documents/articles/NMF/Metagenes and Molecular pattern discovery using matrix factorization Brunet2004.pdf:pdf}, issn = {0027-8424}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, keywords = {Algorithms,Central Nervous System Neoplasms,Central Nervous System Neoplasms: classification,Central Nervous System Neoplasms: genetics,Computational Biology,Data Interpretation,Genetic,Leukemia,Leukemia: classification,Leukemia: genetics,Medulloblastoma,Medulloblastoma: genetics,Models,Neoplasms,Neoplasms: classification,Neoplasms: genetics,Statistical}, number = {12}, pages = {4164--9}, pmid = {15016911}, title = {{Metagenes and molecular pattern discovery using matrix factorization.}}, url = {http://www.ncbi.nlm.nih.gov/pubmed/15016911}, volume = {101}, year = {2004}, } @Article{Pascual-Montano2006, author = {Alberto Pascual-Montano and Jose Maria Carazo and K Kochi and D Lehmann and R D Pascual-marqui}, file = {:home/renaud/Documents/articles/NMF/Nonsmooth nonnegative matrix factorization (nsNMF)\_Paascual-Montano2006.pdf:pdf}, journal = {IEEE Trans. Pattern Anal. Mach. Intell}, pages = {403--415}, title = {{Nonsmooth nonnegative matrix factorization (nsNMF)}}, volume = {28}, year = {2006}, } @Article{Lee1999, abstract = {Is perception of the whole based on perception of its parts? There is psychological and physiological evidence for parts-based representations in the brain, and certain computational theories of object recognition rely on such representations. But little is known about how brains or computers might learn the parts of objects. Here we demonstrate an algorithm for non-negative matrix factorization that is able to learn parts of faces and semantic features of text. This is in contrast to other methods, such as principal components analysis and vector quantization, that learn holistic, not parts-based, representations. Non-negative matrix factorization is distinguished from the other methods by its use of non-negativity constraints. These constraints lead to a parts-based representation because they allow only additive, not subtractive, combinations. When non-negative matrix factorization is implemented as a neural network, parts-based representations emerge by virtue of two properties: the firing rates of neurons are never negative and synaptic strengths do not change sign.}, author = {D D Lee and H S Seung}, doi = {10.1038/44565}, file = {:home/renaud/Documents/articles/NMF/Learning the parts of objects by non-negative matrix factorization\_Lee1999.pdf:pdf}, issn = {0028-0836}, journal = {Nature}, keywords = {Algorithms,Face,Humans,Learning,Models,Neurological,Perception,Perception: physiology,Semantics}, month = {oct}, number = {6755}, pages = {788--91}, pmid = {10548103}, title = {{Learning the parts of objects by non-negative matrix factorization.}}, url = {http://www.ncbi.nlm.nih.gov/pubmed/10548103}, volume = {401}, year = {1999}, } @Article{Paatero1994, abstract = {A new variant ?PMF? of factor analysis is described. It is assumed that X is a matrix of observed data and ? is the known matrix of standard deviations of elements of X. Both X and ? are of dimensions n × m. The method solves the bilinear matrix problem X = GF + E where G is the unknown left hand factor matrix (scores) of dimensions n × p, F is the unknown right hand factor matrix (loadings) of dimensions p × m, and E is the matrix of residuals. The problem is solved in the weighted least squares sense: G and F are determined so that the Frobenius norm of E divided (element-by-element) by ? is minimized. Furthermore, the solution is constrained so that all the elements of G and F are required to be non-negative. It is shown that the solutions by PMF are usually different from any solutions produced by the customary factor analysis (FA, i.e. principal component analysis (PCA) followed by rotations). Usually PMF produces a better fit to the data than FA. Also, the result of PF is guaranteed to be non-negative, while the result of FA often cannot be rotated so that all negative entries would be eliminated. Different possible application areas of the new method are briefly discussed. In environmental data, the error estimates of data can be widely varying and non-negativity is often an essential feature of the underlying models. Thus it is concluded that PMF is better suited than FA or PCA in many environmental applications. Examples of successful applications of PMF are shown in companion papers.}, author = {Pentti Paatero and Unto Tapper}, doi = {10.1002/env.3170050203}, journal = {Environmetrics}, keywords = {algorithm,nmf}, mendeley-tags = {algorithm,nmf}, number = {2}, pages = {111--126}, title = {{Positive matrix factorization: A non-negative factor model with optimal utilization of error estimates of data values}}, type = {Journal article}, url = {http://www3.interscience.wiley.com/cgi-bin/abstract/113468839/ABSTRACT}, volume = {5}, year = {1994}, } @Article{Hoyer2004, author = {PO Hoyer}, file = {:home/renaud/Documents/articles/NMF/Non-negative Matrix Factorization with Sparseness Constraints\_Hoyer2004.pdf:pdf}, journal = {The Journal of Machine Learning Research}, pages = {1457--1469}, title = {{Non-negative matrix factorization with sparseness constraints}}, url = {http://portal.acm.org/citation.cfm?id=1044709}, volume = {5}, year = {2004}, } @Article{Carmona-Saez2006, abstract = {BACKGROUND: The extended use of microarray technologies has enabled the generation and accumulation of gene expression datasets that contain expression levels of thousands of genes across tens or hundreds of different experimental conditions. One of the major challenges in the analysis of such datasets is to discover local structures composed by sets of genes that show coherent expression patterns across subsets of experimental conditions. These patterns may provide clues about the main biological processes associated to different physiological states. RESULTS: In this work we present a methodology able to cluster genes and conditions highly related in sub-portions of the data. Our approach is based on a new data mining technique, Non-smooth Non-Negative Matrix Factorization (nsNMF), able to identify localized patterns in large datasets. We assessed the potential of this methodology analyzing several synthetic datasets as well as two large and heterogeneous sets of gene expression profiles. In all cases the method was able to identify localized features related to sets of genes that show consistent expression patterns across subsets of experimental conditions. The uncovered structures showed a clear biological meaning in terms of relationships among functional annotations of genes and the phenotypes or physiological states of the associated conditions. CONCLUSION: The proposed approach can be a useful tool to analyze large and heterogeneous gene expression datasets. The method is able to identify complex relationships among genes and conditions that are difficult to identify by standard clustering algorithms.}, author = {Pedro Carmona-Saez and Roberto D Pascual-Marqui and Francisco Tirado and Jose Maria Carazo and Alberto Pascual-Montano}, doi = {10.1186/1471-2105-7-78}, file = {:home/renaud/Documents/articles/NMF/Biclustering of gene expression data by non-smooth non-negative matrix factorization\_Carmona-Saez2006.pdf:pdf}, issn = {1471-2105}, journal = {BMC bioinformatics}, keywords = {Algorithms,Artificial Intelligence,Automated,Automated: methods,Cluster Analysis,Factor Analysis,Gene Expression Profiling,Gene Expression Profiling: methods,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Pattern Recognition,Statistical}, pages = {78}, pmid = {16503973}, title = {{Biclustering of gene expression data by Non-smooth Non-negative Matrix Factorization.}}, url = {http://www.ncbi.nlm.nih.gov/pubmed/16503973}, volume = {7}, year = {2006}, } @Article{Wang2006, abstract = {BACKGROUND: Non-negative matrix factorisation (NMF), a machine learning algorithm, has been applied to the analysis of microarray data. A key feature of NMF is the ability to identify patterns that together explain the data as a linear combination of expression signatures. Microarray data generally includes individual estimates of uncertainty for each gene in each condition, however NMF does not exploit this information. Previous work has shown that such uncertainties can be extremely valuable for pattern recognition. RESULTS: We have created a new algorithm, least squares non-negative matrix factorization, LS-NMF, which integrates uncertainty measurements of gene expression data into NMF updating rules. While the LS-NMF algorithm maintains the advantages of original NMF algorithm, such as easy implementation and a guaranteed locally optimal solution, the performance in terms of linking functionally related genes has been improved. LS-NMF exceeds NMF significantly in terms of identifying functionally related genes as determined from annotations in the MIPS database. CONCLUSION: Uncertainty measurements on gene expression data provide valuable information for data analysis, and use of this information in the LS-NMF algorithm significantly improves the power of the NMF technique.}, author = {Guoli Wang and Andrew V Kossenkov and Michael F Ochs}, doi = {10.1186/1471-2105-7-175}, file = {:home/renaud/Documents/articles/NMF/LS-NMF A modified non-negative matrix factorization algorithm utilizing uncertainty estimates\_Wang2006.pdf:pdf}, issn = {1471-2105}, journal = {BMC bioinformatics}, keywords = {Algorithms,Automated,Automated: methods,Databases,Genetic,Messenger,Messenger: genetics,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Oligonucleotide Array Sequence Analysis: statistic,Pattern Recognition,RNA,Uncertainty}, month = {jan}, pages = {175}, pmid = {16569230}, title = {{LS-NMF: a modified non-negative matrix factorization algorithm utilizing uncertainty estimates.}}, url = {http://www.ncbi.nlm.nih.gov/pubmed/16569230}, volume = {7}, year = {2006}, } @Article{VanBenthem2004, author = {Mark H. {Van Benthem} and Michael R. Keenan}, doi = {10.1002/cem.889}, file = {:home/renaud/Documents/articles/NMF/Fast algorithm for the solution of large-scale non-negativity-constrained least squares problems\_Benthem2005.pdf:pdf}, issn = {0886-9383}, journal = {Journal of Chemometrics}, keywords = {als,mcr,nnls,non-negativity}, month = {oct}, number = {10}, pages = {441--450}, title = {{Fast algorithm for the solution of large-scale non-negativity-constrained least squares problems}}, url = {http://doi.wiley.com/10.1002/cem.889}, volume = {18}, year = {2004}, } @Article{Golub1999, abstract = {Although cancer classification has improved over the past 30 years, there has been no general approach for identifying new cancer classes (class discovery) or for assigning tumors to known classes (class prediction). Here, a generic approach to cancer classification based on gene expression monitoring by DNA microarrays is described and applied to human acute leukemias as a test case. A class discovery procedure automatically discovered the distinction between acute myeloid leukemia (AML) and acute lymphoblastic leukemia (ALL) without previous knowledge of these classes. An automatically derived class predictor was able to determine the class of new leukemia cases. The results demonstrate the feasibility of cancer classification based solely on gene expression monitoring and suggest a general strategy for discovering and predicting cancer classes for other types of cancer, independent of previous biological knowledge.}, author = {T R Golub and D K Slonim and P Tamayo and C Huard and M Gaasenbeek and J P Mesirov and H Coller and M L Loh and J R Downing and M a Caligiuri and C D Bloomfield and E S Lander}, file = {:home/renaud/Documents/articles/microarray/Molecular Classification of Cancer$\backslash$: Class Discovery and Class Prediction by Gene Expression\_Golub1999.pdf:pdf}, issn = {0036-8075}, journal = {Science (New York, N.Y.)}, keywords = {Acute Disease,Antineoplastic Combined Chemotherapy Protocols,Antineoplastic Combined Chemotherapy Protocols: th,Cell Adhesion,Cell Adhesion: genetics,Cell Cycle,Cell Cycle: genetics,Gene Expression Profiling,Homeodomain Proteins,Homeodomain Proteins: genetics,Humans,Leukemia, Myeloid,Leukemia, Myeloid: classification,Leukemia, Myeloid: drug therapy,Leukemia, Myeloid: genetics,Neoplasm Proteins,Neoplasm Proteins: genetics,Neoplasms,Neoplasms: classification,Neoplasms: genetics,Oligonucleotide Array Sequence Analysis,Oncogenes,Precursor Cell Lymphoblastic Leukemia-Lymphoma,Precursor Cell Lymphoblastic Leukemia-Lymphoma: cl,Precursor Cell Lymphoblastic Leukemia-Lymphoma: dr,Precursor Cell Lymphoblastic Leukemia-Lymphoma: ge,Predictive Value of Tests,Reproducibility of Results,Treatment Outcome}, month = {oct}, number = {5439}, pages = {531--7}, pmid = {10521349}, title = {{Molecular classification of cancer: class discovery and class prediction by gene expression monitoring.}}, url = {http://www.ncbi.nlm.nih.gov/pubmed/10521349}, volume = {286}, year = {1999}, } @Article{Cichocki2008, author = {Andrzej Cichocki and Rafal Zdunek and Shun-ichi Amari}, file = {:home/renaud/Documents/articles/NMF/Nonnegative Matrix and Tensor Factorization\_Cichocki2008.pdf:pdf}, journal = {IEEE Signal Processing Magazine}, pages = {142--145}, title = {{Nonnegative matrix and tensor factorization}}, volume = {25}, year = {2008}, } @article{Berry2007, author = {Berry, M.W. and Browne, M and Langville, Amy N. and Pauca, V.P. and Plemmons, R.J.}, file = {:home/renaud/Documents/articles/NMF/Algorithms and Applications for Approximate Nonnegative Matrix Factorization\_Berry2006.pdf:pdf}, journal = {Computational Statistics \& Data Analysis}, number = {1}, pages = {155--173}, publisher = {Elsevier}, title = {{Algorithms and applications for approximate nonnegative matrix factorization}}, url = {http://www.sciencedirect.com/science/article/pii/S0167947306004191}, volume = {52}, year = {2007} } @article{Chu2004, author = {Chu, M and Diele, F and Plemmons, R and Ragni, S}, file = {:home/renaud/Documents/articles/NMF/Optimality, computation, and interpretations of nonnegative matrix factorizations\_Chu2004.pdf:pdf}, journal = {SIAM Journal on Matrix Analysis}, keywords = {ellipsoid method,gradient method,kuhn-,least squares,linear model,mass balance,newton method,nonnegative matrix factorization,quadratic programming,reduced quadratic model,tucker condition}, pages = {4--8030}, publisher = {Citeseer}, title = {{Optimality, computation, and interpretation of nonnegative matrix factorizations}}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.61.5758}, year = {2004} } @article{Gao2005, abstract = {MOTIVATION: Identifying different cancer classes or subclasses with similar morphological appearances presents a challenging problem and has important implication in cancer diagnosis and treatment. Clustering based on gene-expression data has been shown to be a powerful method in cancer class discovery. Non-negative matrix factorization is one such method and was shown to be advantageous over other clustering techniques, such as hierarchical clustering or self-organizing maps. In this paper, we investigate the benefit of explicitly enforcing sparseness in the factorization process. RESULTS: We report an improved unsupervised method for cancer classification by the use of gene-expression profile via sparse non-negative matrix factorization. We demonstrate the improvement by direct comparison with classic non-negative matrix factorization on the three well-studied datasets. In addition, we illustrate how to identify a small subset of co-expressed genes that may be directly involved in cancer.}, author = {Gao, Yuan and Church, George}, doi = {10.1093/bioinformatics/bti653}, file = {:home/renaud/Documents/articles/NMF/Improving molecular cancer class discovery through sparse non-negative matrix factorization\_Gao2005.pdf:pdf}, issn = {1367-4803}, journal = {Bioinformatics (Oxford, England)}, keywords = {Algorithms,Biological,Biological: classification,Biological: metabolism,Computer-Assisted,Computer-Assisted: methods,Diagnosis,Factor Analysis,Gene Expression Profiling,Gene Expression Profiling: methods,Humans,Neoplasm Proteins,Neoplasm Proteins: classification,Neoplasm Proteins: metabolism,Neoplasms,Neoplasms: classification,Neoplasms: diagnosis,Neoplasms: metabolism,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Reproducibility of Results,Sensitivity and Specificity,Statistical,Tumor Markers}, month = nov, number = {21}, pages = {3970--5}, pmid = {16244221}, title = {{Improving molecular cancer class discovery through sparse non-negative matrix factorization.}}, url = {http://www.ncbi.nlm.nih.gov/pubmed/16244221}, volume = {21}, year = {2005} } @article{Roux2008, author = {Roux, Jonathan Le and de Cheveign\'{e}, Alain}, file = {:home/renaud/Documents/articles/NMF/Adaptive Template Matching with Shift-Invariant Semi-NMF\_Le Roux2008.pdf:pdf}, journal = {Science And Technology}, title = {{Adaptive template matching with shift-invariant semi-NMF}}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.143.6846&rep=rep1&type=pdf}, year = {2008} } @article{Ding2010, abstract = {We present several new variations on the theme of nonnegative matrix factorization (NMF). Considering factorizations of the form X=FG(T), we focus on algorithms in which G is restricted to containing nonnegative entries, but allowing the data matrix X to have mixed signs, thus extending the applicable range of NMF methods. We also consider algorithms in which the basis vectors of F are constrained to be convex combinations of the data points. This is used for a kernel extension of NMF. We provide algorithms for computing these new factorizations and we provide supporting theoretical analysis. We also analyze the relationships between our algorithms and clustering algorithms, and consider the implications for sparseness of solutions. Finally, we present experimental results that explore the properties of these new methods.}, author = {Ding, Chris and Li, Tao and Jordan, Michael I}, doi = {10.1109/TPAMI.2008.277}, file = {:home/renaud/Documents/articles/NMF/Convex and Semi-Nonnegative Matrix Factorization\_Ding2009.pdf:pdf}, issn = {1939-3539}, journal = {IEEE transactions on pattern analysis and machine intelligence}, month = jan, number = {1}, pages = {45--55}, pmid = {19926898}, title = {{Convex and semi-nonnegative matrix factorizations.}}, url = {http://www.ncbi.nlm.nih.gov/pubmed/19926898}, volume = {32}, year = {2010} }