Source code for PyWGCNA.geneExp
import numpy as np
import pandas as pd
import os
import anndata as ad
# remove runtime warning (divided by zero)
np.seterr(divide='ignore', invalid='ignore')
[docs]class GeneExp:
"""
A class used to creat gene expression anndata along data trait including both genes and samples information.
:param species: species of the data you use i.e mouse, human
:type species: str
:param level: which type of data you use including gene, transcript (default: gene)
:type level: str
:param anndata: if the expression data is in anndata format you should pass it through this parameter. X should be expression matrix. var is a gene information and obs is a sample information.
:param anndata: anndata
:param geneExp: expression matrix which genes are in the rows and samples are columns
:type geneExp: pandas dataframe
:param geneExpPath: path of expression matrix
:type geneExpPath: str
:param sep: separation symbol to use for reading data in geneExpPath properly
:type sep: str
:param geneInfo: dataframe that contains genes information it should have a same index as gene expression column names (gene/transcript ID)
:type geneInfo: pandas dataframe
:param sampleInfo: dataframe that contains samples information it should have a same index as gene expression index (sample ID)
:type sampleInfo: pandas dataframe
"""
def __init__(self,
species=None,
level='gene',
anndata=None,
geneExp=None,
geneExpPath=None,
sep=',',
geneInfo=None,
sampleInfo=None):
self.species = species
self.level = level
if geneExpPath is not None:
if not os.path.isfile(geneExpPath):
raise ValueError("file does not exist!")
else:
expressionList = pd.read_csv(geneExpPath, sep=sep, index_col=0)
elif geneExp is not None:
if isinstance(geneExp, pd.DataFrame):
expressionList = geneExp
else:
raise ValueError("geneExp is not data frame!")
elif anndata is not None:
if isinstance(anndata, ad.AnnData):
self.geneExpr = anndata
return
else:
raise ValueError("geneExp is not data frame!")
else:
raise ValueError("all type of input can not be empty at the same time!")
if geneInfo is None:
geneInfo = pd.DataFrame(index=expressionList.columns)
if sampleInfo is None:
sampleInfo = pd.DataFrame(index=expressionList.index)
self.geneExpr = ad.AnnData(X=expressionList, obs=sampleInfo, var=geneInfo)
[docs] @staticmethod
def updateGeneInfo(geneExpr, geneInfo=None, path=None, sep=','):
"""
add/update genes info in expr anndata
:param geneExpr: gene expression data along with sample and genes/transcript information
:type geneExpr: anndata
:param geneInfo: gene information table you want to add to your data
:type geneInfo: pandas dataframe
:param path: path of geneInfo
:type path: str
:param sep: separation symbol to use for reading data in path properly (default: ',')
:type sep: str
:return: updated gene expression data along with sample and genes/transcript information
:rtype: anndata
"""
if path is not None:
if not os.path.isfile(path):
raise ValueError("path does not exist!")
geneInfo = pd.read_csv(path, sep=sep, index_col=0)
elif geneInfo is not None:
if not isinstance(geneInfo, pd.DataFrame):
raise ValueError("geneInfo is not pandas dataframe!")
else:
raise ValueError("path and geneInfo can not be empty at the same time!")
same_columns = geneExpr.var.columns.intersection(geneInfo.columns)
geneExpr.var.drop(same_columns, axis=1, inplace=True)
geneExpr.var = pd.concat([geneExpr.var, geneInfo], axis=1).loc[geneExpr.var.index, :]
return geneExpr
[docs] @staticmethod
def updateSampleInfo(geneExpr, sampleInfo=None, path=None, sep=','):
"""
add/update metadata in expr anndata
:param geneExpr: gene expression data along with sample and genes/transcript information
:type geneExpr: anndata
:param sampleInfo: Sample information table you want to add to your data
:type sampleInfo: pandas dataframe
:param path: path of metaData
:type path: str
:param sep: separation symbol to use for reading data in path properly (default: ',')
:type sep: str
:return: updated gene expression data along with sample and genes/transcript information
:rtype: anndata
"""
if path is not None:
if not os.path.isfile(path):
raise ValueError("path does not exist!")
sampleInfo = pd.read_csv(path, sep=sep, index_col=0)
elif sampleInfo is not None:
if not isinstance(sampleInfo, pd.DataFrame):
raise ValueError("meta data is not pandas dataframe!")
else:
raise ValueError("path and metaData can not be empty at the same time!")
same_columns = geneExpr.obs.columns.intersection(sampleInfo.columns)
geneExpr.obs.drop(same_columns, axis=1, inplace=True)
geneExpr.obs = pd.concat([geneExpr.obs, sampleInfo], axis=1).loc[geneExpr.obs.index, :]
return geneExpr