Source code for PyWGCNA.geneExp

import numpy as np
import pandas as pd
import os
import anndata as ad


# remove runtime warning (divided by zero)
np.seterr(divide='ignore', invalid='ignore')


[docs]class GeneExp: """ A class used to creat gene expression anndata along data trait including both genes and samples information. :param species: species of the data you use i.e mouse, human :type species: str :param level: which type of data you use including gene, transcript (default: gene) :type level: str :param anndata: if the expression data is in anndata format you should pass it through this parameter. X should be expression matrix. var is a gene information and obs is a sample information. :param anndata: anndata :param geneExp: expression matrix which genes are in the rows and samples are columns :type geneExp: pandas dataframe :param geneExpPath: path of expression matrix :type geneExpPath: str :param sep: separation symbol to use for reading data in geneExpPath properly :type sep: str :param geneInfo: dataframe that contains genes information it should have a same index as gene expression column names (gene/transcript ID) :type geneInfo: pandas dataframe :param sampleInfo: dataframe that contains samples information it should have a same index as gene expression index (sample ID) :type sampleInfo: pandas dataframe """ def __init__(self, species=None, level='gene', anndata=None, geneExp=None, geneExpPath=None, sep=',', geneInfo=None, sampleInfo=None): self.species = species self.level = level if geneExpPath is not None: if not os.path.isfile(geneExpPath): raise ValueError("file does not exist!") else: expressionList = pd.read_csv(geneExpPath, sep=sep, index_col=0) elif geneExp is not None: if isinstance(geneExp, pd.DataFrame): expressionList = geneExp else: raise ValueError("geneExp is not data frame!") elif anndata is not None: if isinstance(anndata, ad.AnnData): self.geneExpr = anndata return else: raise ValueError("geneExp is not data frame!") else: raise ValueError("all type of input can not be empty at the same time!") if geneInfo is None: geneInfo = pd.DataFrame(index=expressionList.columns) if sampleInfo is None: sampleInfo = pd.DataFrame(index=expressionList.index) self.geneExpr = ad.AnnData(X=expressionList, obs=sampleInfo, var=geneInfo)
[docs] @staticmethod def updateGeneInfo(geneExpr, geneInfo=None, path=None, sep=','): """ add/update genes info in expr anndata :param geneExpr: gene expression data along with sample and genes/transcript information :type geneExpr: anndata :param geneInfo: gene information table you want to add to your data :type geneInfo: pandas dataframe :param path: path of geneInfo :type path: str :param sep: separation symbol to use for reading data in path properly (default: ',') :type sep: str :return: updated gene expression data along with sample and genes/transcript information :rtype: anndata """ if path is not None: if not os.path.isfile(path): raise ValueError("path does not exist!") geneInfo = pd.read_csv(path, sep=sep, index_col=0) elif geneInfo is not None: if not isinstance(geneInfo, pd.DataFrame): raise ValueError("geneInfo is not pandas dataframe!") else: raise ValueError("path and geneInfo can not be empty at the same time!") same_columns = geneExpr.var.columns.intersection(geneInfo.columns) geneExpr.var.drop(same_columns, axis=1, inplace=True) geneExpr.var = pd.concat([geneExpr.var, geneInfo], axis=1).loc[geneExpr.var.index, :] return geneExpr
[docs] @staticmethod def updateSampleInfo(geneExpr, sampleInfo=None, path=None, sep=','): """ add/update metadata in expr anndata :param geneExpr: gene expression data along with sample and genes/transcript information :type geneExpr: anndata :param sampleInfo: Sample information table you want to add to your data :type sampleInfo: pandas dataframe :param path: path of metaData :type path: str :param sep: separation symbol to use for reading data in path properly (default: ',') :type sep: str :return: updated gene expression data along with sample and genes/transcript information :rtype: anndata """ if path is not None: if not os.path.isfile(path): raise ValueError("path does not exist!") sampleInfo = pd.read_csv(path, sep=sep, index_col=0) elif sampleInfo is not None: if not isinstance(sampleInfo, pd.DataFrame): raise ValueError("meta data is not pandas dataframe!") else: raise ValueError("path and metaData can not be empty at the same time!") same_columns = geneExpr.obs.columns.intersection(sampleInfo.columns) geneExpr.obs.drop(same_columns, axis=1, inplace=True) geneExpr.obs = pd.concat([geneExpr.obs, sampleInfo], axis=1).loc[geneExpr.obs.index, :] return geneExpr