Source code for PyWGCNA.utils

import pickle
import os
import biomart
import pandas as pd
import requests
import matplotlib.pyplot as plt
import networkx as nx

from PyWGCNA.comparison import *

# bcolors
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKCYAN = '\033[96m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'


# read WGCNA obj
[docs]def readWGCNA(file): """ Read a WGCNA from a saved pickle file. :param file: Name / path of WGCNA object :type file: str :return: PyWGCNA object :rtype: PyWGCNA class """ if not os.path.isfile(file): raise ValueError('WGCNA object not found at given path!') picklefile = open(file, 'rb') wgcna = pickle.load(picklefile) print(f"{BOLD}{OKBLUE}Reading {wgcna.name} WGCNA done!{ENDC}") return wgcna
# compare serveral networks
[docs]def compareNetworks(PyWGCNAs): """ Compare serveral PyWGCNA objects :param PyWGCNAs: list of PyWGCNA objects :type PyWGCNAs: list of PyWGCNA class :return: compare object :rtype: Compare class """ geneModules = {} for PyWGCNA in PyWGCNAs: geneModules[PyWGCNA.name] = PyWGCNA.datExpr.var compare = Comparison(geneModules=geneModules) compare.compareNetworks() return compare
# compare WGCNA to single cell
[docs]def compareSingleCell(PyWGCNAs, sc): """ Compare WGCNA and gene marker from single cell experiment :param PyWGCNAs: WGCNA object :type PyWGCNAs: PyWGCNA class :param sc: gene marker table which has .... :type sc: pandas dataframe :return: compare object :rtype: Compare class """ geneModules = {} for PyWGCNA in PyWGCNAs: geneModules[PyWGCNA.name] = PyWGCNA.datExpr.var geneModules["single_cell"] = sc compare = Comparison(geneModules=geneModules) compare.compareNetworks() return compare
[docs]def getGeneList(dataset='mmusculus_gene_ensembl', attributes=['ensembl_gene_id', 'external_gene_name', 'gene_biotype'], maps=['gene_id', 'gene_name', 'go_id'], server_domain="http://ensembl.org/biomart"): """ get table that map gene ensembl id to gene name from biomart :param dataset: name of the dataset we used from biomart; mouse: mmusculus_gene_ensembl and human: hsapiens_gene_ensembl you can find more information here: https://bioconductor.riken.jp/packages/3.4/bioc/vignettes/biomaRt/inst/doc/biomaRt.html#selecting-a-biomart-database-and-dataset :type dataset: string :param attributes: List the types of data we want :type attributes: list :param maps: mapping between attributes and column names of gene information you want to show :type maps: list :param server_domain: URL of ensembl biomart server that you want to use to pull out the information (options: [‘’, ‘uswest’, ‘asia’]) :type server_domain: string :return: table extracted from biomart related to the datasets including information from attributes :rtype: pandas dataframe """ r = requests.get(f"{server_domain}/martview") if r.status_code != 200: print("The biomart server you requested is currently unavailable! please use other biomart server or try later") return r.close() server = biomart.BiomartServer(server_domain) mart = server.datasets[dataset] # Get the mapping between the attributes response = mart.search({'attributes': attributes}) data = response.raw.data.decode('ascii') geneInfo = pd.DataFrame(columns=attributes) # Store the data in a dict for line in data.splitlines(): line = line.split('\t') tmp = pd.DataFrame(line, index=attributes).T dict = {} for i in range(len(attributes)): dict[attributes[i]] = line[i] geneInfo = pd.concat([geneInfo, tmp], ignore_index=True) geneInfo.index = geneInfo[attributes[0]] geneInfo.drop(attributes[0], axis=1, inplace=True) if maps is not None: geneInfo.columns = maps[1:] return geneInfo
[docs]def getGeneListGOid(dataset='mmusculus_gene_ensembl', attributes=['ensembl_gene_id', 'external_gene_name', 'go_id'], Goid='GO:0003700', server_domain="http://ensembl.org/biomart"): """ get table that find gene id and gene name to specific Go term from biomart :param dataset: name of the dataset we used from biomart; mouse: mmusculus_gene_ensembl and human: hsapiens_gene_ensembl you can find more information here: https://bioconductor.riken.jp/packages/3.4/bioc/vignettes/biomaRt/inst/doc/biomaRt.html#selecting-a-biomart-database-and-dataset :type dataset: string :param attributes: List the types of data we want :type attributes: list :param Goid: GO term id you would like to get genes from them :type Goid: list or str :param server_domain: URL of ensembl biomart server that you want to use to pull out the inforamtion :type server_domain: string :return: table extracted from biomart related to the datasets including information from attributes with filtering :rtype: pandas dataframe """ r = requests.get(f"{server_domain}/martview") if r.status_code != 200: print("The biomart server you requested is currently unavailable! please use other biomart server or try later") return r.close() server = biomart.BiomartServer(server_domain) mart = server.datasets[dataset] # mart.show_attributes() # mart.show_filters() response = mart.search({ 'filters': { 'go': [Goid] }, 'attributes': attributes }) data = response.raw.data.decode('ascii') geneInfo = pd.DataFrame(columns=attributes) # Store the data in a dict for line in data.splitlines(): line = line.split('\t') dict = {} for i in range(len(attributes)): dict[attributes[i]] = line[i] geneInfo = geneInfo.append(dict, ignore_index=True) return geneInfo
# read comparison obj
[docs]def readComparison(file): """ Read a comparison from a saved pickle file. :param file: Name / path of comparison object :type file: string :return: comparison object :rtype: comparison class """ if not os.path.isfile(file): raise ValueError('Comparison object not found at given path!') picklefile = open(file, 'rb') comparison = pickle.load(picklefile) print(f"{BOLD}{OKBLUE}Reading comparison done!{ENDC}") return comparison