Title: | Epilepsy Ontologies' Similarities |
---|---|
Description: | Analysis and visualization of similarities between epilepsy ontologies based on text mining results by comparing ranked lists of co-occurring drug terms in the BioASQ corpus. The ranked result lists of neurological drug terms co-occurring with terms from the epilepsy ontologies EpSO, ESSO, EPILONT, EPISEM and FENICS undergo further analysis. The source data to create the ranked lists of drug names is produced using the text mining workflows described in Mueller, Bernd and Hagelstein, Alexandra (2016) <doi:10.4126/FRL01-006408558>, Mueller, Bernd et al. (2017) <doi:10.1007/978-3-319-58694-6_22>, Mueller, Bernd and Rebholz-Schuhmann, Dietrich (2020) <doi:10.1007/978-3-030-43887-6_52>, and Mueller, Bernd et al. (2022) <doi:10.1186/s13326-021-00258-w>. |
Authors: | Bernd Mueller |
Maintainer: | Bernd Mueller <[email protected]> |
License: | LGPL (>= 3) |
Version: | 1.1 |
Built: | 2025-02-12 04:57:12 UTC |
Source: | https://github.com/bernd-mueller/epos |
Calculate the cosine similarity metric for two lists a and b
calcCosine(a, b)
calcCosine(a, b)
a |
list with elements that should be of same type as in list b |
b |
list with elements |
co list with length of set b containing the cosine similarity coefficient at each position
calcCosine(c(1,2), c(2,3))
calcCosine(c(1,2), c(2,3))
Calculate the dice similarity metric for two lists a and b
calcDice(a, b)
calcDice(a, b)
a |
list with elements that should be of same type as in list b |
b |
list with elements |
di list with length of set b containing the dice similarity coefficient at each list element
calcDice(c(1,2), c(2,3))
calcDice(c(1,2), c(2,3))
Calculate dsea scores of one list in comparison to reference list
calcDSEA(alist, N)
calcDSEA(alist, N)
alist |
list of drug names to be used for calculating dsea |
N |
numeric value with maximum length of lists for dsea calculation |
list with dsea scores
calcDSEA(c("Valproic acid", "Lamotrigine", "Ketamin"), 3)
calcDSEA(c("Valproic acid", "Lamotrigine", "Ketamin"), 3)
Calculate enrichment of one list in comparison to reference list
calcEnrichment(alist)
calcEnrichment(alist)
alist |
the list to compare |
list with calculated enrichment used for plotting
a <- calcEnrichment(c("Clobazam","Oxcarbazepine"))
a <- calcEnrichment(c("Clobazam","Oxcarbazepine"))
Calculate the jaccard coefficient for two lists a and b
calcJaccard(a, b)
calcJaccard(a, b)
a |
list with elements that should be of same type as in list b |
b |
list with elements |
ja list with length of set b containing the jaccard similarity coefficient for each list element
calcJaccard(c(1,2), c(2,3))
calcJaccard(c(1,2), c(2,3))
Calculate cosine similarity metric
cosine(ainterb, lengtha, lengthb)
cosine(ainterb, lengtha, lengthb)
ainterb |
integer value with number of intersecting elements between set a and b |
lengtha |
integer value with the number of items in set a |
lengthb |
integer value with the number of items in set b |
cosine double vlaue with the cosine similarity coefficient
cosine(1,3,4)
cosine(1,3,4)
Main function to call everything and produce the results
createBaseTable(coocepso, coocesso, coocepi, coocepisem, coocfenics)
createBaseTable(coocepso, coocesso, coocepi, coocepisem, coocfenics)
coocepso |
list of drug names sorted by frequency co-occuring with EpSO |
coocesso |
list of drug names sorted by frequency co-occuring with ESSO |
coocepi |
list of drug names sorted by frequency co-occuring with EPILONT |
coocepisem |
list of drug names sorted by frequency co-occuring with EPISEM |
coocfenics |
list of drug names sorted by frequency co-occuring with FENICS |
result table containin the aggregated list of drug terms and their associations
utils::data(rawDrugNamesCoOcEpSO, package="epos") utils::data(rawDrugNamesCoOcESSO, package="epos") utils::data(rawDrugNamesCoOcEPILONT, package="epos") utils::data(rawDrugNamesCoOcEPISEM, package="epos") utils::data(rawDrugNamesCoOcFENICS, package="epos") createBaseTable(coocepso = rawDrugNamesCoOcEpSO[1:150], coocesso=rawDrugNamesCoOcESSO[1:150], coocepi=rawDrugNamesCoOcEPILONT[1:150], coocepisem=rawDrugNamesCoOcEPISEM[1:150], coocfenics=rawDrugNamesCoOcFENICS[1:150])
utils::data(rawDrugNamesCoOcEpSO, package="epos") utils::data(rawDrugNamesCoOcESSO, package="epos") utils::data(rawDrugNamesCoOcEPILONT, package="epos") utils::data(rawDrugNamesCoOcEPISEM, package="epos") utils::data(rawDrugNamesCoOcFENICS, package="epos") createBaseTable(coocepso = rawDrugNamesCoOcEpSO[1:150], coocesso=rawDrugNamesCoOcESSO[1:150], coocepi=rawDrugNamesCoOcEPILONT[1:150], coocepisem=rawDrugNamesCoOcEPISEM[1:150], coocfenics=rawDrugNamesCoOcFENICS[1:150])
Creates a vector with an X at each position where a drug from the druglist matches the ATC class list slatc
createDashVectorForATC(druglist, atchashda, atchashsec, slatc)
createDashVectorForATC(druglist, atchashda, atchashsec, slatc)
druglist |
list of drug names |
atchashda |
hash retrieved from readAtcMapIntoHashMapDrugNamesAtcCodes |
atchashsec |
hash retrieved from readSecondLevelATC |
slatc |
list of ATC classes |
list with crosses if the drug in druglist matches at the position of the ATC class in slatc
## Not run: createDashVectorForATC(druglist, atchashda, atchashsec, slatc) ## End(Not run)
## Not run: createDashVectorForATC(druglist, atchashda, atchashsec, slatc) ## End(Not run)
Creates the plot for all jaccard coefficients amongst the three epilepsy ontologies
createJaccardPlotDBMeSH(jmeshepso, jmeshesso, jmeshepi)
createJaccardPlotDBMeSH(jmeshepso, jmeshesso, jmeshepi)
jmeshepso |
list containing jaccard coefficients between mesh and epso for increasing k |
jmeshesso |
list containing jaccard coefficients between mesh and esso for increasing k |
jmeshepi |
list containing jaccard coefficients between mesh and epi for increasing k |
jaccardepilepsyplot the ggplot object
## Not run: jaccardepilepsyplot <- createJaccardPlotAll(jaccardepso, jaccardesso) ## End(Not run)
## Not run: jaccardepilepsyplot <- createJaccardPlotAll(jaccardepso, jaccardesso) ## End(Not run)
Creates the plot for all jaccard coefficients amongst the three epilepsy ontologies
createJaccardPlotMeSHFive( jmeshepso, jmeshesso, jmeshepi, jmeshepilepsyand, jmeshepilepsyor )
createJaccardPlotMeSHFive( jmeshepso, jmeshesso, jmeshepi, jmeshepilepsyand, jmeshepilepsyor )
jmeshepso |
list of jaccard coefficients between mesh and epso for increasing k |
jmeshesso |
list of jaccard coefficients between mesh and esso for increasing k |
jmeshepi |
list of jaccard coefficients between mesh and epi for increasing k |
jmeshepilepsyand |
list of jaccard coefficients between mesh and the intersection of epso, esso, and epi for increasing k |
jmeshepilepsyor |
list of jaccard coefficients between mesh and the union of epso, esso, and epi for increasing k |
jaccardepilepsyplot the ggplot object
## Not run: jaccardepilepsyplot <- createJaccardPlotAll(jaccardepso, jaccardesso) ## End(Not run)
## Not run: jaccardepilepsyplot <- createJaccardPlotAll(jaccardepso, jaccardesso) ## End(Not run)
Create the final resulting data frame
createNeuroTable(atchashda, atchashsec, dneuromaxk)
createNeuroTable(atchashda, atchashsec, dneuromaxk)
atchashda |
hashmap retrieved from readAtcMapIntoHashMapDrugNamesAtcCodes |
atchashsec |
hashmap retrieved from readSecondLevelATC |
dneuromaxk |
data frame containing columns for each intersection, ATC class, and reference list |
data frame containing drug names with additional columns listing association to ATC classes
utils::data(rawDrugNamesCoOcEpSO, package="epos") utils::data(rawDrugNamesCoOcESSO, package="epos") utils::data(rawDrugNamesCoOcEPILONT, package="epos") utils::data(rawDrugNamesCoOcEPISEM, package="epos") utils::data(rawDrugNamesCoOcFENICS, package="epos") atchashda <- readAtcMapIntoHashMapDrugNamesAtcCodes( system.file("extdata", "db-atc.map", package = "epos"), "\t") atchashaa <- readAtcMapIntoHashMapAtcCodesAtcNames( system.file("extdata", "db-atc.map", package = "epos"), "\t") atchashsec <- readSecondLevelATC( system.file("extdata", "atc-secondlevel.map", package = "epos"), "\t") epso <- rawDrugNamesCoOcEpSO neuroepso <- filterNeuroDrugs(epso, atchashda) esso <- rawDrugNamesCoOcESSO neuroesso <- filterNeuroDrugs(esso, atchashda) epi <- rawDrugNamesCoOcEPILONT neuroepi <- filterNeuroDrugs(epi, atchashda) episem <- rawDrugNamesCoOcEPISEM neuroepisem <- filterNeuroDrugs(episem, atchashda) fenics <- rawDrugNamesCoOcFENICS neurofenics <- filterNeuroDrugs(fenics, atchashda) mx <- max( c(length(neuroepso), length(neuroesso), length(neuroepi), length(neuroepisem), length(neurofenics))) dneuro <- data.frame(EpSO = c(neuroepso, rep(1, (mx-length(neuroepso)))), ESSO = c(neuroesso, rep(1, (mx-length(neuroesso)))), EPILONT = c(neuroepi, rep(1, (mx-length(neuroepi)))), EPISEM = c(neuroepisem, rep(1, (mx-length(neuroepisem)))), FENICS = c(neurofenics, rep(1, (mx-length(neurofenics))))) dneuromaxk <- TopKLists::calculate.maxK(dneuro, L=5, d=5, v=10) neurotable <- createNeuroTable(atchashda, atchashsec, dneuromaxk)
utils::data(rawDrugNamesCoOcEpSO, package="epos") utils::data(rawDrugNamesCoOcESSO, package="epos") utils::data(rawDrugNamesCoOcEPILONT, package="epos") utils::data(rawDrugNamesCoOcEPISEM, package="epos") utils::data(rawDrugNamesCoOcFENICS, package="epos") atchashda <- readAtcMapIntoHashMapDrugNamesAtcCodes( system.file("extdata", "db-atc.map", package = "epos"), "\t") atchashaa <- readAtcMapIntoHashMapAtcCodesAtcNames( system.file("extdata", "db-atc.map", package = "epos"), "\t") atchashsec <- readSecondLevelATC( system.file("extdata", "atc-secondlevel.map", package = "epos"), "\t") epso <- rawDrugNamesCoOcEpSO neuroepso <- filterNeuroDrugs(epso, atchashda) esso <- rawDrugNamesCoOcESSO neuroesso <- filterNeuroDrugs(esso, atchashda) epi <- rawDrugNamesCoOcEPILONT neuroepi <- filterNeuroDrugs(epi, atchashda) episem <- rawDrugNamesCoOcEPISEM neuroepisem <- filterNeuroDrugs(episem, atchashda) fenics <- rawDrugNamesCoOcFENICS neurofenics <- filterNeuroDrugs(fenics, atchashda) mx <- max( c(length(neuroepso), length(neuroesso), length(neuroepi), length(neuroepisem), length(neurofenics))) dneuro <- data.frame(EpSO = c(neuroepso, rep(1, (mx-length(neuroepso)))), ESSO = c(neuroesso, rep(1, (mx-length(neuroesso)))), EPILONT = c(neuroepi, rep(1, (mx-length(neuroepi)))), EPISEM = c(neuroepisem, rep(1, (mx-length(neuroepisem)))), FENICS = c(neurofenics, rep(1, (mx-length(neurofenics))))) dneuromaxk <- TopKLists::calculate.maxK(dneuro, L=5, d=5, v=10) neurotable <- createNeuroTable(atchashda, atchashsec, dneuromaxk)
Creates the plot for all jaccard coefficients amongst the three epilepsy ontologies
createTanimotoBaseline(neuroepso, neuroesso, neuroepi, dneuromaxk)
createTanimotoBaseline(neuroepso, neuroesso, neuroepi, dneuromaxk)
neuroepso |
list of neuro drug names co-occurring with epso |
neuroesso |
list of neuro drug names co-occurring with esso |
neuroepi |
list of neuro drug names co-occurring with epi |
dneuromaxk |
object returned from TopKLists::calculate.maxk |
jaccardepilepsyplot the ggplot object
utils::data(rawDrugNamesCoOcEpSO, package="epos") utils::data(rawDrugNamesCoOcESSO, package="epos") utils::data(rawDrugNamesCoOcEPILONT, package="epos") utils::data(rawDrugNamesCoOcEPISEM, package="epos") atchashda <- readAtcMapIntoHashMapDrugNamesAtcCodes( system.file("extdata", "db-atc.map", package = "epos"), "\t") atchashaa <- readAtcMapIntoHashMapAtcCodesAtcNames( system.file("extdata", "db-atc.map", package = "epos"), "\t") atchashsec <- readSecondLevelATC( system.file("extdata", "atc-secondlevel.map", package = "epos"), "\t") tepso <- rawDrugNamesCoOcEpSO tesso <- rawDrugNamesCoOcESSO tepi <- rawDrugNamesCoOcEPILONT tepisem <- rawDrugNamesCoOcEPISEM tfenics <- rawDrugNamesCoOcFENICS neuroepso <- filterNeuroDrugs(tepso, atchashda) neuroesso <- filterNeuroDrugs(tesso, atchashda) neuroepi <- filterNeuroDrugs(tepi, atchashda) neuroepisem <- filterNeuroDrugs(tepisem, atchashda) neurofenics <- filterNeuroDrugs(tfenics, atchashda) dneuro <- data.frame(EpSO = neuroepso[1:210], ESSO = neuroesso[1:210], EPILONT = neuroepi[1:210], EPISEM = neuroepisem[1:210], FENICS = neurofenics[1:210]) dneuromaxk <- TopKLists::calculate.maxK(dneuro, 5, 5, 5) tanimotobaseline <- createTanimotoBaseline(neuroepso, neuroesso, neuroepi, dneuromaxk)
utils::data(rawDrugNamesCoOcEpSO, package="epos") utils::data(rawDrugNamesCoOcESSO, package="epos") utils::data(rawDrugNamesCoOcEPILONT, package="epos") utils::data(rawDrugNamesCoOcEPISEM, package="epos") atchashda <- readAtcMapIntoHashMapDrugNamesAtcCodes( system.file("extdata", "db-atc.map", package = "epos"), "\t") atchashaa <- readAtcMapIntoHashMapAtcCodesAtcNames( system.file("extdata", "db-atc.map", package = "epos"), "\t") atchashsec <- readSecondLevelATC( system.file("extdata", "atc-secondlevel.map", package = "epos"), "\t") tepso <- rawDrugNamesCoOcEpSO tesso <- rawDrugNamesCoOcESSO tepi <- rawDrugNamesCoOcEPILONT tepisem <- rawDrugNamesCoOcEPISEM tfenics <- rawDrugNamesCoOcFENICS neuroepso <- filterNeuroDrugs(tepso, atchashda) neuroesso <- filterNeuroDrugs(tesso, atchashda) neuroepi <- filterNeuroDrugs(tepi, atchashda) neuroepisem <- filterNeuroDrugs(tepisem, atchashda) neurofenics <- filterNeuroDrugs(tfenics, atchashda) dneuro <- data.frame(EpSO = neuroepso[1:210], ESSO = neuroesso[1:210], EPILONT = neuroepi[1:210], EPISEM = neuroepisem[1:210], FENICS = neurofenics[1:210]) dneuromaxk <- TopKLists::calculate.maxK(dneuro, 5, 5, 5) tanimotobaseline <- createTanimotoBaseline(neuroepso, neuroesso, neuroepi, dneuromaxk)
Calculate dice similarity metric
dice(ainterb, lengtha, lengthb)
dice(ainterb, lengtha, lengthb)
ainterb |
integer value with number of intersecting elements between set a and b |
lengtha |
integer value with the number of items in set a |
lengthb |
integer value with the number of items in set b |
dice double vlaue with the dice similarity coefficient
dice(1, 3, 4)
dice(1, 3, 4)
Does the full plot on one page
doFullPlot( cosinemeshplot, cosinedrugbankplot, cosineepilepsyplot, dicemeshplot, dicedrugbankplot, diceepilepsyplot, jaccardmeshplot, jaccarddrugbankplot, jaccardepilepsyplot )
doFullPlot( cosinemeshplot, cosinedrugbankplot, cosineepilepsyplot, dicemeshplot, dicedrugbankplot, diceepilepsyplot, jaccardmeshplot, jaccarddrugbankplot, jaccardepilepsyplot )
cosinemeshplot |
plot with cosine coefficients against MeSH |
cosinedrugbankplot |
plot with cosine coefficients against DrugBank |
cosineepilepsyplot |
plot with cosine coefficients of Epilepsy Ontologies |
dicemeshplot |
plot with dice coefficients against MeSH |
dicedrugbankplot |
plot with dice coefficients against DrugBank |
diceepilepsyplot |
plot with dice coefficients of Epilepsy Ontologies |
jaccardmeshplot |
plot with jaccard coefficients against MeSH |
jaccarddrugbankplot |
plot with jaccard coefficients against DrugBank |
jaccardepilepsyplot |
plot with jaccard coefficients of Epilepsy Ontologies |
full
## Not run: full <- doFullPlot (cosinemeshplot, cosinedrugbankplot, cosineepilepsyplot, dicemeshplot, dicedrugbankplot, diceepilepsyplot, jaccardmeshplot, jaccarddrugbankplot, jaccardepilepsyplot) ## End(Not run)
## Not run: full <- doFullPlot (cosinemeshplot, cosinedrugbankplot, cosineepilepsyplot, dicemeshplot, dicedrugbankplot, diceepilepsyplot, jaccardmeshplot, jaccarddrugbankplot, jaccardepilepsyplot) ## End(Not run)
Create quad Venn Diagramm for overlapping concepts between EpSO, ESSO, EPILONT and EPISEM
drawVenn4()
drawVenn4()
plot object
## Not run: ggplot2::ggsave("venn4.png", plot = drawVenn4(), width=240, height=160, units = "mm", dpi = 300) ## End(Not run)
## Not run: ggplot2::ggsave("venn4.png", plot = drawVenn4(), width=240, height=160, units = "mm", dpi = 300) ## End(Not run)
Create quintuple Venn Diagramm for shared documents with co-occurrences of drug names between EpSO, ESSO, EPILONT and EPISEM
drawVenn4Doc()
drawVenn4Doc()
plot object
## Not run: ggplot2::ggsave("venn4doc.png", plot = drawVenn4Doc(), width=240, height=160, units = "mm", dpi = 300) ## End(Not run)
## Not run: ggplot2::ggsave("venn4doc.png", plot = drawVenn4Doc(), width=240, height=160, units = "mm", dpi = 300) ## End(Not run)
Create quad Venn Diagramm for shared documents with co-occurrences of drug names between EpSO, ESSO, EPILONT and EPISEM
drawVenn4DrugDoc()
drawVenn4DrugDoc()
plot object
## Not run: ggplot2::ggsave("venn4drugdoc.png", plot = drawVenn4DrugDoc(), width=240, height=160, units = "mm", dpi = 300) ## End(Not run)
## Not run: ggplot2::ggsave("venn4drugdoc.png", plot = drawVenn4DrugDoc(), width=240, height=160, units = "mm", dpi = 300) ## End(Not run)
Create quad Venn Diagramm for shared synonyms between EpSO, ESSO, EPILONT and EPISEM
drawVenn4Syn()
drawVenn4Syn()
plot object
## Not run: ggplot2::ggsave("venn4syn.png", plot = drawVenn4Syn(), width=240, height=160, units = "mm", dpi = 300) ## End(Not run)
## Not run: ggplot2::ggsave("venn4syn.png", plot = drawVenn4Syn(), width=240, height=160, units = "mm", dpi = 300) ## End(Not run)
Create quintuple Venn Diagramm for overlapping concepts between EpSO, ESSO, EPILONT, EPISEM and FENICS
drawVenn5()
drawVenn5()
plot object
## Not run: ggplot2::ggsave("venn5.png", plot = drawVenn5(), width=240, height=160, units = "mm", dpi = 300) ## End(Not run)
## Not run: ggplot2::ggsave("venn5.png", plot = drawVenn5(), width=240, height=160, units = "mm", dpi = 300) ## End(Not run)
Create quintuple Venn Diagramm for shared documents between EpSO, ESSO, EPILONT, EPISEM and FENICS
drawVenn5Doc()
drawVenn5Doc()
plot object
## Not run: ggplot2::ggsave("venn5doc.png", plot = drawVenn5Doc(), width=240, height=160, units = "mm", dpi = 300) ## End(Not run)
## Not run: ggplot2::ggsave("venn5doc.png", plot = drawVenn5Doc(), width=240, height=160, units = "mm", dpi = 300) ## End(Not run)
Create quintuple Venn Diagramm for shared documents with co-occurrences of drug names between EpSO, ESSO, EPILONT, EPISEM and FENICS
drawVenn5DrugDoc()
drawVenn5DrugDoc()
plot object
## Not run: ggplot2::ggsave("venn5drugdoc.png", plot = drawVenn5DrugDoc(), width=240, height=160, units = "mm", dpi = 300) ## End(Not run)
## Not run: ggplot2::ggsave("venn5drugdoc.png", plot = drawVenn5DrugDoc(), width=240, height=160, units = "mm", dpi = 300) ## End(Not run)
Create quintuple Venn Diagramm for shared synonyms between EpSO, ESSO, EPILONT, EPISEM and FENICS
drawVenn5Syn()
drawVenn5Syn()
plot object
## Not run: ggplot2::ggsave("venn5syn.png", plot = drawVenn5Syn(), width=240, height=160, units = "mm", dpi = 300) ## End(Not run)
## Not run: ggplot2::ggsave("venn5syn.png", plot = drawVenn5Syn(), width=240, height=160, units = "mm", dpi = 300) ## End(Not run)
Create plot_grid from multiple plots
drawVennGrid()
drawVennGrid()
plot object
## Not run: cowplot::plot_grid(drawVenn4 (), drawVenn4Syn(), drawVenn5Doc (), drawVenn5DrugDoc ()) ggplot2::ggsave("vennAB.png", plot = cowplot::plot_grid(drawVenn4 (), drawVenn4Syn(), labels = c('A', 'B'), ncol = 1), width=240, height=320, units = "mm", dpi = 300) ggplot2::ggsave("vennAB.png", plot = cowplot::plot_grid(drawVenn4 (), drawVenn4Syn(), labels = c('Concepts:', 'Synonyms:'), ncol = 1), width=240, height=320, units = "mm", dpi = 300) ggplot2::ggsave("vennCD.png", plot = cowplot::plot_grid(drawVenn5Doc (), drawVenn5DrugDoc(), labels = c('Documents with B-Terms:', 'Documents with B- and C-Terms:'), ncol = 1), width=240, height=320, units = "mm", dpi = 300) ggplot2::ggsave("vennCD.png", plot = cowplot::plot_grid(drawVenn5Doc (), drawVenn5DrugDoc(), labels = c('Documents with B-Terms:', 'Documents with B- and C-Terms:'), ncol = 1), width=240, height=320, units = "mm", dpi = 300) ggplot2::ggsave("vennCD.png", plot = cowplot::plot_grid(drawVenn4Doc (), drawVenn4DrugDoc(), labels = c('Documents with B-Terms:', 'Documents with B- and C-Terms:'), ncol = 1), width=240, height=320, units = "mm", dpi = 300) ggplot2::ggsave("vennCD.png", plot = cowplot::plot_grid(drawVenn4Doc (), drawVenn4DrugDoc(), labels = c('Documents\nwith B-Terms: ', 'Documents\nwith B- and C-Terms:'), ncol = 1), width=240, height=320, units = "mm", dpi = 300) ggplot2::ggsave("vennAB.png", plot = cowplot::plot_grid(drawVenn4 (), drawVenn4Syn(), labels = c('i) Concepts:', 'ii) Synonyms:'), ncol = 1), width=240, height=320, units = "mm", dpi = 300) ggplot2::ggsave("vennCD.png", plot = cowplot::plot_grid(NULL, drawVenn4Doc (), drawVenn4DrugDoc(), labels = c('iii) Documents with B-Terms:', 'iv) Documents with B- and C-Terms:'), ncol = 1, label_x = c(-0.105, -0.14), label_fontfamily = "Arial Nova Light", label_fontface = "bold"), width=240, height=320, units = "mm", dpi = 300) ## End(Not run)
## Not run: cowplot::plot_grid(drawVenn4 (), drawVenn4Syn(), drawVenn5Doc (), drawVenn5DrugDoc ()) ggplot2::ggsave("vennAB.png", plot = cowplot::plot_grid(drawVenn4 (), drawVenn4Syn(), labels = c('A', 'B'), ncol = 1), width=240, height=320, units = "mm", dpi = 300) ggplot2::ggsave("vennAB.png", plot = cowplot::plot_grid(drawVenn4 (), drawVenn4Syn(), labels = c('Concepts:', 'Synonyms:'), ncol = 1), width=240, height=320, units = "mm", dpi = 300) ggplot2::ggsave("vennCD.png", plot = cowplot::plot_grid(drawVenn5Doc (), drawVenn5DrugDoc(), labels = c('Documents with B-Terms:', 'Documents with B- and C-Terms:'), ncol = 1), width=240, height=320, units = "mm", dpi = 300) ggplot2::ggsave("vennCD.png", plot = cowplot::plot_grid(drawVenn5Doc (), drawVenn5DrugDoc(), labels = c('Documents with B-Terms:', 'Documents with B- and C-Terms:'), ncol = 1), width=240, height=320, units = "mm", dpi = 300) ggplot2::ggsave("vennCD.png", plot = cowplot::plot_grid(drawVenn4Doc (), drawVenn4DrugDoc(), labels = c('Documents with B-Terms:', 'Documents with B- and C-Terms:'), ncol = 1), width=240, height=320, units = "mm", dpi = 300) ggplot2::ggsave("vennCD.png", plot = cowplot::plot_grid(drawVenn4Doc (), drawVenn4DrugDoc(), labels = c('Documents\nwith B-Terms: ', 'Documents\nwith B- and C-Terms:'), ncol = 1), width=240, height=320, units = "mm", dpi = 300) ggplot2::ggsave("vennAB.png", plot = cowplot::plot_grid(drawVenn4 (), drawVenn4Syn(), labels = c('i) Concepts:', 'ii) Synonyms:'), ncol = 1), width=240, height=320, units = "mm", dpi = 300) ggplot2::ggsave("vennCD.png", plot = cowplot::plot_grid(NULL, drawVenn4Doc (), drawVenn4DrugDoc(), labels = c('iii) Documents with B-Terms:', 'iv) Documents with B- and C-Terms:'), ncol = 1, label_x = c(-0.105, -0.14), label_fontfamily = "Arial Nova Light", label_fontface = "bold"), width=240, height=320, units = "mm", dpi = 300) ## End(Not run)
Filter a given list of drug names for having an ATC code, if not they are dropped
filterApprovedDrugs(druglist, atchashda)
filterApprovedDrugs(druglist, atchashda)
druglist |
a list of drug names |
atchashda |
a hash containing the drug names as keys |
approveddrugs a hash filtered for having an ATC code
utils::data(rawDrugNamesCoOcEpSO, package="epos") atchashda <- readAtcMapIntoHashMapDrugNamesAtcCodes( system.file("extdata", "db-atc.map", package = "epos"), "\t") tepso <- genDictListFromRawFreq(rawDrugNamesCoOcEpSO) filterApprovedDrugs(tepso, atchashda)
utils::data(rawDrugNamesCoOcEpSO, package="epos") atchashda <- readAtcMapIntoHashMapDrugNamesAtcCodes( system.file("extdata", "db-atc.map", package = "epos"), "\t") tepso <- genDictListFromRawFreq(rawDrugNamesCoOcEpSO) filterApprovedDrugs(tepso, atchashda)
Filter a given list of drug names for having an ATC code starting with N indicating to be a drug for the Nervous System
filterNeuroDrugs(druglist, atchashda)
filterNeuroDrugs(druglist, atchashda)
druglist |
a list of drug names |
atchashda |
a hash containing the drug names as keys |
neurodrugs a hash filtered for having an ATC code starting with N
utils::data(rawDrugNamesCoOcEpSO, package="epos") atchashda <- readAtcMapIntoHashMapDrugNamesAtcCodes( system.file("extdata", "db-atc.map", package = "epos"), "\t") tepso <- genDictListFromRawFreq(rawDrugNamesCoOcEpSO) nepso <- filterNeuroDrugs(tepso, atchashda)
utils::data(rawDrugNamesCoOcEpSO, package="epos") atchashda <- readAtcMapIntoHashMapDrugNamesAtcCodes( system.file("extdata", "db-atc.map", package = "epos"), "\t") tepso <- genDictListFromRawFreq(rawDrugNamesCoOcEpSO) nepso <- filterNeuroDrugs(tepso, atchashda)
Clears object that was loaded from harddrive into a list of terms sorted by frequency
Clears object that was loaded from harddrive into a list of terms sorted by frequency
genDictListFromRawFreq(topfreqdictraw) genDictListFromRawFreq(topfreqdictraw)
genDictListFromRawFreq(topfreqdictraw) genDictListFromRawFreq(topfreqdictraw)
topfreqdictraw |
list with terms from a dictionary sorted by frequency |
a sorted list of terms
a sorted list of terms
## Not run: genDictListFromRawFreq(epi) ## End(Not run) utils::data(rawDrugNamesCoOcEpSO, package="epos") genDictListFromRawFreq(rawDrugNamesCoOcEpSO)
## Not run: genDictListFromRawFreq(epi) ## End(Not run) utils::data(rawDrugNamesCoOcEpSO, package="epos") genDictListFromRawFreq(rawDrugNamesCoOcEpSO)
Retrieve the list of drugs from the union of all reference lists
getRefAll()
getRefAll()
list of drugs from all reference lists
d <- getRefAll()
d <- getRefAll()
Receives a sorted hashmap with found entities from a dictionary
getTermMatrix(dictionary, database, collection)
getTermMatrix(dictionary, database, collection)
dictionary |
Character vector that is the name of a dicitonary having pre-calculated stats. This can be MeSH, DrugBank, Agrovoc, EpSO, ESSO, or EPILONT |
database |
the name of the MongoDB database to be used |
collection |
the name of the MongoDB collection to be used |
a sorted hashmap containing all found entities from the respective dictionaries with frequencies
## Not run: mesh <- getTermMatrix("MeSH") ## End(Not run)
## Not run: mesh <- getTermMatrix("MeSH") ## End(Not run)
Calculate jaccard similarity metric for two sets a and b
jaccard(ainterb, aunionb, lengtha, lengthb)
jaccard(ainterb, aunionb, lengtha, lengthb)
ainterb |
integer value with number of intersecting elements between set a and b |
aunionb |
integer value with number of union elements between set a and b |
lengtha |
length of set a |
lengthb |
length of set b |
jac double value with the jaccard similarity coefficient
jaccard(1,3, 2, 3)
jaccard(1,3, 2, 3)
Plotting functions for DSEA lists
plotDSEA(dsepso, dsesso, dsepi, dsepisem, dsfenics, dsspace, k)
plotDSEA(dsepso, dsesso, dsepi, dsepisem, dsfenics, dsspace, k)
dsepso |
list with enrichment for EpSO |
dsesso |
list with enrichment for ESSO |
dsepi |
list with enrichment for EPILONT |
dsepisem |
list with enrichment for EPISEM |
dsfenics |
list with enrichment for FENICS |
dsspace |
list with enrichment for the combined ranked list |
k |
numeric value for the length to be plotted |
the plot object
utils::data(rawDrugNamesCoOcEpSO, package="epos") utils::data(rawDrugNamesCoOcESSO, package="epos") utils::data(rawDrugNamesCoOcEPILONT, package="epos") utils::data(rawDrugNamesCoOcEPISEM, package="epos") utils::data(rawDrugNamesCoOcFENICS, package="epos") atchashda <- readAtcMapIntoHashMapDrugNamesAtcCodes( system.file("extdata", "db-atc.map", package = "epos"), "\t") epso <- rawDrugNamesCoOcEpSO neuroepso <- filterNeuroDrugs(epso, atchashda) esso <- rawDrugNamesCoOcESSO neuroesso <- filterNeuroDrugs(esso, atchashda) epi <- rawDrugNamesCoOcEPILONT neuroepi <- filterNeuroDrugs(epi, atchashda) episem <- rawDrugNamesCoOcEPISEM neuroepisem <- filterNeuroDrugs(episem, atchashda) fenics <- rawDrugNamesCoOcFENICS neurofenics <- filterNeuroDrugs(fenics, atchashda) mx <- max( c(length(neuroepso), length(neuroesso), length(neuroepi), length(neuroepisem), length(neurofenics))) dneuro <- data.frame(EpSO = c(neuroepso, rep("", (mx-length(neuroepso)))), ESSO = c(neuroesso, rep("", (mx-length(neuroesso)))), EPILONT = c(neuroepi, rep("", (mx-length(neuroepi)))), EPISEM = c(neuroepisem, rep("", (mx-length(neuroepisem)))), FENICS = c(neurofenics, rep("", (mx-length(neurofenics))))) dneuromaxk <- TopKLists::calculate.maxK(dneuro, L=5, d=5, v=5) neurospace <- as.character(dneuromaxk$topkspace) dsepso <- calcDSEA(neuroepso, mx) dsesso <- calcDSEA(neuroesso, mx) dsepi <- calcDSEA(neuroepi, mx) dsepisem <- calcDSEA(neuroepisem, mx) dsfenics <- calcDSEA(neurofenics, mx) dsspace <- calcDSEA (neurospace, mx) p <- plotDSEA(dsepso, dsesso, dsepi, dsepisem, dsfenics, dsspace, dneuromaxk$maxK) ## Not run: ggplot2::ggsave("dsea.png", p <- plotDSEA(dsepso, dsesso, dsepi, dsepisem, dsfenics, dsspace, dneuromaxk$maxK), width=480, height=320, units = "mm", dpi = 300) ## End(Not run)
utils::data(rawDrugNamesCoOcEpSO, package="epos") utils::data(rawDrugNamesCoOcESSO, package="epos") utils::data(rawDrugNamesCoOcEPILONT, package="epos") utils::data(rawDrugNamesCoOcEPISEM, package="epos") utils::data(rawDrugNamesCoOcFENICS, package="epos") atchashda <- readAtcMapIntoHashMapDrugNamesAtcCodes( system.file("extdata", "db-atc.map", package = "epos"), "\t") epso <- rawDrugNamesCoOcEpSO neuroepso <- filterNeuroDrugs(epso, atchashda) esso <- rawDrugNamesCoOcESSO neuroesso <- filterNeuroDrugs(esso, atchashda) epi <- rawDrugNamesCoOcEPILONT neuroepi <- filterNeuroDrugs(epi, atchashda) episem <- rawDrugNamesCoOcEPISEM neuroepisem <- filterNeuroDrugs(episem, atchashda) fenics <- rawDrugNamesCoOcFENICS neurofenics <- filterNeuroDrugs(fenics, atchashda) mx <- max( c(length(neuroepso), length(neuroesso), length(neuroepi), length(neuroepisem), length(neurofenics))) dneuro <- data.frame(EpSO = c(neuroepso, rep("", (mx-length(neuroepso)))), ESSO = c(neuroesso, rep("", (mx-length(neuroesso)))), EPILONT = c(neuroepi, rep("", (mx-length(neuroepi)))), EPISEM = c(neuroepisem, rep("", (mx-length(neuroepisem)))), FENICS = c(neurofenics, rep("", (mx-length(neurofenics))))) dneuromaxk <- TopKLists::calculate.maxK(dneuro, L=5, d=5, v=5) neurospace <- as.character(dneuromaxk$topkspace) dsepso <- calcDSEA(neuroepso, mx) dsesso <- calcDSEA(neuroesso, mx) dsepi <- calcDSEA(neuroepi, mx) dsepisem <- calcDSEA(neuroepisem, mx) dsfenics <- calcDSEA(neurofenics, mx) dsspace <- calcDSEA (neurospace, mx) p <- plotDSEA(dsepso, dsesso, dsepi, dsepisem, dsfenics, dsspace, dneuromaxk$maxK) ## Not run: ggplot2::ggsave("dsea.png", p <- plotDSEA(dsepso, dsesso, dsepi, dsepisem, dsfenics, dsspace, dneuromaxk$maxK), width=480, height=320, units = "mm", dpi = 300) ## End(Not run)
Plotting functions for enrichment lists
plotEnrichment(enepso, enesso, enepi, enepisem, enfenics, enspace, k)
plotEnrichment(enepso, enesso, enepi, enepisem, enfenics, enspace, k)
enepso |
list with enrichment for EpSO |
enesso |
list with enrichment for ESSO |
enepi |
list with enrichment for EPILONT |
enepisem |
list with enrichment for EPISEM |
enfenics |
list with enrichment for FENICS |
enspace |
list with enrichment for the combined ranked list |
k |
numeric value for the length to be plotted |
the plot object
utils::data(rawDrugNamesCoOcEpSO, package="epos") utils::data(rawDrugNamesCoOcESSO, package="epos") utils::data(rawDrugNamesCoOcEPILONT, package="epos") utils::data(rawDrugNamesCoOcEPISEM, package="epos") utils::data(rawDrugNamesCoOcFENICS, package="epos") atchashda <- readAtcMapIntoHashMapDrugNamesAtcCodes( system.file("extdata", "db-atc.map", package = "epos"), "\t") epso <- rawDrugNamesCoOcEpSO neuroepso <- filterNeuroDrugs(epso, atchashda) esso <- rawDrugNamesCoOcESSO neuroesso <- filterNeuroDrugs(esso, atchashda) epi <- rawDrugNamesCoOcEPILONT neuroepi <- filterNeuroDrugs(epi, atchashda) episem <- rawDrugNamesCoOcEPISEM neuroepisem <- filterNeuroDrugs(episem, atchashda) fenics <- rawDrugNamesCoOcFENICS neurofenics <- filterNeuroDrugs(fenics, atchashda) mx <- max( c(length(neuroepso), length(neuroesso), length(neuroepi), length(neuroepisem), length(neurofenics))) dneuro <- data.frame(EpSO = c(neuroepso, rep("", (mx-length(neuroepso)))), ESSO = c(neuroesso, rep("", (mx-length(neuroesso)))), EPILONT = c(neuroepi, rep("", (mx-length(neuroepi)))), EPISEM = c(neuroepisem, rep("", (mx-length(neuroepisem)))), FENICS = c(neurofenics, rep("", (mx-length(neurofenics))))) dneuromaxk <- TopKLists::calculate.maxK(dneuro, L=5, d=5, v=5) neurospace <- as.character(dneuromaxk$topkspace) enepso <- calcEnrichment(neuroepso) enesso <- calcEnrichment(neuroesso) enepi <- calcEnrichment(neuroepi) enepisem <- calcEnrichment(neuroepisem) enfenics <- calcEnrichment(neurofenics) enspace <- calcEnrichment (neurospace) p <- plotEnrichment(enepso, enesso, enepi, enepisem, enfenics, enspace, dneuromaxk$maxK)
utils::data(rawDrugNamesCoOcEpSO, package="epos") utils::data(rawDrugNamesCoOcESSO, package="epos") utils::data(rawDrugNamesCoOcEPILONT, package="epos") utils::data(rawDrugNamesCoOcEPISEM, package="epos") utils::data(rawDrugNamesCoOcFENICS, package="epos") atchashda <- readAtcMapIntoHashMapDrugNamesAtcCodes( system.file("extdata", "db-atc.map", package = "epos"), "\t") epso <- rawDrugNamesCoOcEpSO neuroepso <- filterNeuroDrugs(epso, atchashda) esso <- rawDrugNamesCoOcESSO neuroesso <- filterNeuroDrugs(esso, atchashda) epi <- rawDrugNamesCoOcEPILONT neuroepi <- filterNeuroDrugs(epi, atchashda) episem <- rawDrugNamesCoOcEPISEM neuroepisem <- filterNeuroDrugs(episem, atchashda) fenics <- rawDrugNamesCoOcFENICS neurofenics <- filterNeuroDrugs(fenics, atchashda) mx <- max( c(length(neuroepso), length(neuroesso), length(neuroepi), length(neuroepisem), length(neurofenics))) dneuro <- data.frame(EpSO = c(neuroepso, rep("", (mx-length(neuroepso)))), ESSO = c(neuroesso, rep("", (mx-length(neuroesso)))), EPILONT = c(neuroepi, rep("", (mx-length(neuroepi)))), EPISEM = c(neuroepisem, rep("", (mx-length(neuroepisem)))), FENICS = c(neurofenics, rep("", (mx-length(neurofenics))))) dneuromaxk <- TopKLists::calculate.maxK(dneuro, L=5, d=5, v=5) neurospace <- as.character(dneuromaxk$topkspace) enepso <- calcEnrichment(neuroepso) enesso <- calcEnrichment(neuroesso) enepi <- calcEnrichment(neuroepi) enepisem <- calcEnrichment(neuroepisem) enfenics <- calcEnrichment(neurofenics) enspace <- calcEnrichment (neurospace) p <- plotEnrichment(enepso, enesso, enepi, enepisem, enfenics, enspace, dneuromaxk$maxK)
Print Top 10 Drugs
printTop10Drugs(neuroepso, neuroesso, neuroepi, neuroepisem, neurofenics)
printTop10Drugs(neuroepso, neuroesso, neuroepi, neuroepisem, neurofenics)
neuroepso |
Ranked list of drug names co-occurring with EpSO |
neuroesso |
Ranked list of drug names co-occurring with ESSO |
neuroepi |
Ranked list of drug names co-occurring with EPILONT |
neuroepisem |
Ranked list of drug names co-occurring with EPISEM |
neurofenics |
Ranked list of drug names co-occurring with FENICS |
data frame with top 10 drugs for each ontology
utils::data(rawDrugNamesCoOcEpSO, package="epos") utils::data(rawDrugNamesCoOcESSO, package="epos") utils::data(rawDrugNamesCoOcEPILONT, package="epos") utils::data(rawDrugNamesCoOcEPISEM, package="epos") utils::data(rawDrugNamesCoOcFENICS, package="epos") atchashda <- readAtcMapIntoHashMapDrugNamesAtcCodes( system.file("extdata", "db-atc.map", package = "epos"), "\t") atchashaa <- readAtcMapIntoHashMapAtcCodesAtcNames( system.file("extdata", "db-atc.map", package = "epos"), "\t") atchashsec <- readSecondLevelATC( system.file("extdata", "atc-secondlevel.map", package = "epos"), "\t") epso <- rawDrugNamesCoOcEpSO neuroepso <- filterNeuroDrugs(epso, atchashda) esso <- rawDrugNamesCoOcESSO neuroesso <- filterNeuroDrugs(esso, atchashda) epi <- rawDrugNamesCoOcEPILONT neuroepi <- filterNeuroDrugs(epi, atchashda) episem <- rawDrugNamesCoOcEPISEM neuroepisem <- filterNeuroDrugs(episem, atchashda) fenics <- rawDrugNamesCoOcFENICS neurofenics <- filterNeuroDrugs(fenics, atchashda) top10table <- printTop10Drugs(neuroepso, neuroesso, neuroepi, neuroepisem, neurofenics) ## Not run: print(xtable::xtable(top10table, type = "latex"), file = "top10table.tex") ## End(Not run)
utils::data(rawDrugNamesCoOcEpSO, package="epos") utils::data(rawDrugNamesCoOcESSO, package="epos") utils::data(rawDrugNamesCoOcEPILONT, package="epos") utils::data(rawDrugNamesCoOcEPISEM, package="epos") utils::data(rawDrugNamesCoOcFENICS, package="epos") atchashda <- readAtcMapIntoHashMapDrugNamesAtcCodes( system.file("extdata", "db-atc.map", package = "epos"), "\t") atchashaa <- readAtcMapIntoHashMapAtcCodesAtcNames( system.file("extdata", "db-atc.map", package = "epos"), "\t") atchashsec <- readSecondLevelATC( system.file("extdata", "atc-secondlevel.map", package = "epos"), "\t") epso <- rawDrugNamesCoOcEpSO neuroepso <- filterNeuroDrugs(epso, atchashda) esso <- rawDrugNamesCoOcESSO neuroesso <- filterNeuroDrugs(esso, atchashda) epi <- rawDrugNamesCoOcEPILONT neuroepi <- filterNeuroDrugs(epi, atchashda) episem <- rawDrugNamesCoOcEPISEM neuroepisem <- filterNeuroDrugs(episem, atchashda) fenics <- rawDrugNamesCoOcFENICS neurofenics <- filterNeuroDrugs(fenics, atchashda) top10table <- printTop10Drugs(neuroepso, neuroesso, neuroepi, neuroepisem, neurofenics) ## Not run: print(xtable::xtable(top10table, type = "latex"), file = "top10table.tex") ## End(Not run)
List drug terms with their frequency co-occurring with terms from the EPILONT ontology in publications since 2015 from the BioASQ 2020 corpus.
rawDrugNamesCoOcEPILONT
rawDrugNamesCoOcEPILONT
A named list of drug term frequencies
The text mining workflows for data generation are described in Mueller, Bernd and Hagelstein, Alexandra (2016) <doi:10.4126/FRL01-006408558>, Mueller, Bernd et al. (2017) <doi:10.1007/978-3-319-58694-6_22>, and Mueller, Bernd and Rebholz-Schuhmann, Dietrich (2020) <doi:10.1007/978-3-030-43887-6_52>. The source data set for generating the data co-occurrence lists is the BioASQ 2020 corpus. The source ontology for the creation of the dictionary is the Epilepsy Ontology (EPILONT) from https://bioportal.bioontology.org/ontologies/EPILONT
utils::data(rawDrugNamesCoOcEPILONT, package="epos")
utils::data(rawDrugNamesCoOcEPILONT, package="epos")
List drug terms with their frequency co-occurring with terms from the EPISEM ontology in publications since 2015 from the BioASQ 2020 corpus.
rawDrugNamesCoOcEPISEM
rawDrugNamesCoOcEPISEM
A named list of drug term frequencies
The text mining workflows for data generation are described in Mueller, Bernd and Hagelstein, Alexandra (2016) <doi:10.4126/FRL01-006408558>, Mueller, Bernd et al. (2017) <doi:10.1007/978-3-319-58694-6_22>, and Mueller, Bernd and Rebholz-Schuhmann, Dietrich (2020) <doi:10.1007/978-3-030-43887-6_52>. The source data set for generating the data co-occurrence lists is the BioASQ 2020 corpus. The source ontology for the creation of the dictionary is the Epilepsy Semiology Ontology (EPISEM) from https://bioportal.bioontology.org/ontologies/EPISEM
utils::data(rawDrugNamesCoOcEPISEM, package="epos")
utils::data(rawDrugNamesCoOcEPISEM, package="epos")
List drug terms with their frequency co-occurring with terms from the EpSO ontology in publications since 2015 from the BioASQ 2020 corpus.
rawDrugNamesCoOcEpSO
rawDrugNamesCoOcEpSO
A named list of drug term frequencies
The text mining workflows for data generation are described in Mueller, Bernd and Hagelstein, Alexandra (2016) <doi:10.4126/FRL01-006408558>, Mueller, Bernd et al. (2017) <doi:10.1007/978-3-319-58694-6_22>, and Mueller, Bernd and Rebholz-Schuhmann, Dietrich (2020) <doi:10.1007/978-3-030-43887-6_52>. The source data set for generating the data co-occurrence lists is the BioASQ 2020 corpus. The source ontology for the creation of the dictionary is the Epilepsy and Seizure Ontology (EpSO) from https://bioportal.bioontology.org/ontologies/EPSO
utils::data(rawDrugNamesCoOcEpSO, package="epos")
utils::data(rawDrugNamesCoOcEpSO, package="epos")
List drug terms with their frequency co-occurring with terms from the ESSO ontology in publications since 2015 from the BioASQ 2020 corpus.
rawDrugNamesCoOcESSO
rawDrugNamesCoOcESSO
An object of class character
of length 8620.
The text mining workflows for data generation are described in Mueller, Bernd and Hagelstein, Alexandra (2016) <doi:10.4126/FRL01-006408558>, Mueller, Bernd et al. (2017) <doi:10.1007/978-3-319-58694-6_22>, and Mueller, Bernd and Rebholz-Schuhmann, Dietrich (2020) <doi:10.1007/978-3-030-43887-6_52>. The source data set for generating the data co-occurrence lists is the BioASQ 2020 corpus. The source ontology for the creation of the dictionary is Epilepsy Syndrome Seizure Ontology (ESSO) from https://bioportal.bioontology.org/ontologies/ESSO
utils::data(rawDrugNamesCoOcESSO, package="epos")
utils::data(rawDrugNamesCoOcESSO, package="epos")
List drug terms with their frequency co-occurring with terms from the FENICS ontology in publications from the BioASQ 2020 corpus.
rawDrugNamesCoOcFENICS
rawDrugNamesCoOcFENICS
A named list of drug term frequencies
The text mining workflows for data generation are described in Mueller, Bernd and Hagelstein, Alexandra (2016) <doi:10.4126/FRL01-006408558>, Mueller, Bernd et al. (2017) <doi:10.1007/978-3-319-58694-6_22>, and Mueller, Bernd and Rebholz-Schuhmann, Dietrich (2020) <doi:10.1007/978-3-030-43887-6_52>. The source data set for generating the data co-occurrence lists is the BioASQ 2020 corpus. The source ontology for the creation of the dictionary is the Functional Epilepsy Nomenclature for Ion Channels (FENICS) from https://bioportal.bioontology.org/ontologies/FENICS
utils::data(rawDrugNamesCoOcFENICS, package="epos")
utils::data(rawDrugNamesCoOcFENICS, package="epos")
Processes the input file db-atc.map to form a HashMap containing the drug names with ATC codes
readAtcMapIntoHashMapAtcCodesAtcNames(filename, seperator)
readAtcMapIntoHashMapAtcCodesAtcNames(filename, seperator)
filename |
character vector with the file name of the file db-atc.map |
seperator |
character vector with the seperator used within the map-file |
atchashaa hash with atc codes as keys and atc names as values
atchashaa <- readAtcMapIntoHashMapAtcCodesAtcNames( system.file("extdata", "db-atc.map", package = "epos"), "\t")
atchashaa <- readAtcMapIntoHashMapAtcCodesAtcNames( system.file("extdata", "db-atc.map", package = "epos"), "\t")
Processes the input file db-atc.map to form a HashMap containing the drug names with ATC codes
readAtcMapIntoHashMapDrugNamesAtcCodes(filename, seperator)
readAtcMapIntoHashMapDrugNamesAtcCodes(filename, seperator)
filename |
character vector with the file name of the file db-atc.map |
seperator |
character vector with the seperator used within the map-file |
atchashda hash with drug names as keys and atc codes as values
atchashda <- readAtcMapIntoHashMapDrugNamesAtcCodes( system.file("extdata", "db-atc.map", package = "epos"), "\t")
atchashda <- readAtcMapIntoHashMapDrugNamesAtcCodes( system.file("extdata", "db-atc.map", package = "epos"), "\t")
Read the second level ATC classes from the file atc-secondlevel.map
readSecondLevelATC(filename, seperator)
readSecondLevelATC(filename, seperator)
filename |
the file name that is supposed to be atc-secondlevel.map |
seperator |
the csv file delimiter |
atchashsec a hash with second level ATC classes as keys and their names as values
atchashsec <- readSecondLevelATC( system.file("extdata", "atc-secondlevel.map", package = "epos"), "\t")
atchashsec <- readSecondLevelATC( system.file("extdata", "atc-secondlevel.map", package = "epos"), "\t")
Sort table by scoring for each row
sortTableByRefMatches(dntk)
sortTableByRefMatches(dntk)
dntk |
the table returned from writeNeuroTable |
the sorted table
utils::data(rawDrugNamesCoOcEpSO, package="epos") utils::data(rawDrugNamesCoOcESSO, package="epos") utils::data(rawDrugNamesCoOcEPILONT, package="epos") utils::data(rawDrugNamesCoOcEPISEM, package="epos") utils::data(rawDrugNamesCoOcFENICS, package="epos") atchashda <- readAtcMapIntoHashMapDrugNamesAtcCodes( system.file("extdata", "db-atc.map", package = "epos"), "\t") atchashaa <- readAtcMapIntoHashMapAtcCodesAtcNames( system.file("extdata", "db-atc.map", package = "epos"), "\t") atchashsec <- readSecondLevelATC( system.file("extdata", "atc-secondlevel.map", package = "epos"), "\t") epso <- rawDrugNamesCoOcEpSO neuroepso <- filterNeuroDrugs(epso, atchashda) esso <- rawDrugNamesCoOcESSO neuroesso <- filterNeuroDrugs(esso, atchashda) epi <- rawDrugNamesCoOcEPILONT neuroepi <- filterNeuroDrugs(epi, atchashda) episem <- rawDrugNamesCoOcEPISEM neuroepisem <- filterNeuroDrugs(episem, atchashda) fenics <- rawDrugNamesCoOcFENICS neurofenics <- filterNeuroDrugs(fenics, atchashda) mx <- max( c(length(neuroepso), length(neuroesso), length(neuroepi), length(neuroepisem), length(neurofenics))) dneuro <- data.frame(EpSO = c(neuroepso, rep("", (mx-length(neuroepso)))), ESSO = c(neuroesso, rep("", (mx-length(neuroesso)))), EPILONT = c(neuroepi, rep("", (mx-length(neuroepi)))), EPISEM = c(neuroepisem, rep("", (mx-length(neuroepisem)))), FENICS = c(neurofenics, rep("", (mx-length(neurofenics))))) suppressWarnings(dneuromaxk <- TopKLists::calculate.maxK(dneuro, L=5, d=5, v=5)) neurotable <- createNeuroTable(atchashda, atchashsec, dneuromaxk) sortedNeuroTable <- sortTableByRefMatches(neurotable) print(xtable::xtable(sortedNeuroTable, type = "latex"), file = "sortedNeuroTable.tex", include.rownames=FALSE)
utils::data(rawDrugNamesCoOcEpSO, package="epos") utils::data(rawDrugNamesCoOcESSO, package="epos") utils::data(rawDrugNamesCoOcEPILONT, package="epos") utils::data(rawDrugNamesCoOcEPISEM, package="epos") utils::data(rawDrugNamesCoOcFENICS, package="epos") atchashda <- readAtcMapIntoHashMapDrugNamesAtcCodes( system.file("extdata", "db-atc.map", package = "epos"), "\t") atchashaa <- readAtcMapIntoHashMapAtcCodesAtcNames( system.file("extdata", "db-atc.map", package = "epos"), "\t") atchashsec <- readSecondLevelATC( system.file("extdata", "atc-secondlevel.map", package = "epos"), "\t") epso <- rawDrugNamesCoOcEpSO neuroepso <- filterNeuroDrugs(epso, atchashda) esso <- rawDrugNamesCoOcESSO neuroesso <- filterNeuroDrugs(esso, atchashda) epi <- rawDrugNamesCoOcEPILONT neuroepi <- filterNeuroDrugs(epi, atchashda) episem <- rawDrugNamesCoOcEPISEM neuroepisem <- filterNeuroDrugs(episem, atchashda) fenics <- rawDrugNamesCoOcFENICS neurofenics <- filterNeuroDrugs(fenics, atchashda) mx <- max( c(length(neuroepso), length(neuroesso), length(neuroepi), length(neuroepisem), length(neurofenics))) dneuro <- data.frame(EpSO = c(neuroepso, rep("", (mx-length(neuroepso)))), ESSO = c(neuroesso, rep("", (mx-length(neuroesso)))), EPILONT = c(neuroepi, rep("", (mx-length(neuroepi)))), EPISEM = c(neuroepisem, rep("", (mx-length(neuroepisem)))), FENICS = c(neurofenics, rep("", (mx-length(neurofenics))))) suppressWarnings(dneuromaxk <- TopKLists::calculate.maxK(dneuro, L=5, d=5, v=5)) neurotable <- createNeuroTable(atchashda, atchashsec, dneuromaxk) sortedNeuroTable <- sortTableByRefMatches(neurotable) print(xtable::xtable(sortedNeuroTable, type = "latex"), file = "sortedNeuroTable.tex", include.rownames=FALSE)