Augustin Luna
27 June, 2016
Research Fellow
Department of Biostatistics and Computational Biology
Dana-Farber Cancer Institute
Database | Interaction Count |
---|---|
Reactome | 11924 |
NCI PID | 16017 |
PhosphoSitePlus | 13642 |
HumanCyc | 7024 |
HPRD | 40618 |
PantherDB | 5282 |
DIP | 7102 |
BioGRID | 244843 |
Database | Interaction Count |
---|---|
InAct | 98347 |
BIND | 35566 |
TRANSFAC | 261624 |
mirTarBase | 51214 |
DrugBank | 19159 |
Recon X | 10910 |
CTD | 313174 |
KEGG | 4472 |
Gene Set | Description | Gene 1 | Gene 2 | Gene 3 | … |
---|---|---|---|---|---|
KEGG_GLYCOLYSIS_GLUCONEOGENESIS | KEGG | GCK | PGK2 | PGK1 | … |
REACTOME_SIGNALING_BY_EGFR_IN_CANCER | Reactome | AKT3 | ADAM10 | SPRY1 | … |
library(paxtoolsr)
downloadPc2()
# Single databases
geneSets <- downloadPc2("PathwayCommons.8.Reactome.GSEA.hgnc.gmt.gz", version="8")
sif <- downloadPc2("PathwayCommons.8.kegg.EXTENDED_BINARY_SIF.hgnc.txt.gz", version="8")
# All databases
geneSets <- downloadPc2("PathwayCommons.8.All.GSEA.hgnc.gmt.gz", version="8")
sif <- filterSif(sif$edges, ids=c("GPI"))
nrow(sif)
[1] 26
colnames(sif)
[1] "PARTICIPANT_A" "INTERACTION_TYPE"
[3] "PARTICIPANT_B" "INTERACTION_DATA_SOURCE"
[5] "INTERACTION_PUBMED_ID" "PATHWAY_NAMES"
[7] "MEDIATOR_IDS"
head(sif[, 1:3, with=FALSE], 2)
PARTICIPANT_A INTERACTION_TYPE PARTICIPANT_B
1: GPI catalysis-precedes ADPGK
2: GPI controls-production-of CHEBI:17665
library(igraph); library(data.table) # SIF files read as data.table for speed
setDF(sif) # Convert data.table to data.frame
# graph.edgelist requires a matrix
g <- graph.edgelist(as.matrix(sif[, c(1, 3)]), directed = FALSE)
plot(g, layout = layout.fruchterman.reingold)
library(webchem)
cts_convert('16-hydroxypalmitate', 'Chemical Name', 'ChEBI')
$`16-hydroxypalmitate`
[1] "CHEBI:55328" "CHEBI:55329"
metab <- read.table("example_chebi.txt", sep="\t", header=TRUE, quote="", comment.char="", stringsAsFactors=FALSE)
# KEGG
sifKegg <- downloadPc2("PathwayCommons.8.kegg.EXTENDED_BINARY_SIF.hgnc.txt.gz", version="8")
sif <- sifKegg
paths <- unique(unlist(sif$edges$PATHWAY_NAMES))
purineIdx <- grepl("purine", paths, ignore.case=TRUE)
purinePaths <- paths[purineIdx]
metabFilteredSif <- filterSif(sif$edges, ids=metab$chebi)
tmp <- searchListOfVectors(purinePaths, metabFilteredSif$PATHWAY_NAMES)
purineIdx <- unique(unlist(tmp))
purineOnlySif <- metabFilteredSif[purineIdx]
setDF(purineOnlySif)
purineOnlySif[1:2, 1:6]
PARTICIPANT_A INTERACTION_TYPE PARTICIPANT_B
1 CHEBI:15422 consumption-controlled-by ADCY3
2 CHEBI:15422 used-to-produce CHEBI:15996
INTERACTION_DATA_SOURCE INTERACTION_PUBMED_ID
1 KEGG NA
2 KEGG NA
PATHWAY_NAMES
1 Purine metabolism
2 Metabolic pathways, Purine metabolism
tmp <- c(purineOnlySif[, 1], purineOnlySif[, 3])
idx <- which(!grepl("^CHEBI:", tmp))
resKegg <- sort(table(tmp[idx]))
length(resKegg)
[1] 93
# Load libraries
library(paxtoolsr); library(rcellminer)
# Load data
geneSets <- downloadPc2("PathwayCommons.8.Reactome.GSEA.hgnc.gmt.gz", version="8")
mutData <- getAllFeatureData(rcellminerData::molData)[["mut"]]
hiMutGenes <- head(sort(rowSums(mutData), decreasing=TRUE), 25)
# Initialize variable
pvals <- NULL
for(set in geneSets) {
#set <- hiMutGenes
sampleSize <- length(hiMutGenes) # size drawn
hitInSample <- length(which(hiMutGenes %in% set)) # black drawn
hitInPop <- length(which(rownames(mutData) %in% set)) # all black
failInPop <- nrow(mutData)-hitInPop # number of red
# Calculate over-enrichment for current gene set
pval <- phyper(hitInSample-1, hitInPop, failInPop, sampleSize, lower.tail= FALSE)
# Add current result
pvals <- c(pvals, pval)
}
# Adjust p-values
pvals <- p.adjust(pvals, method="fdr")
length(pvals[pvals < 0.05])
[1] 0
inst/examples/shinyPCViz