Skip to contents
library(PubmedPlot)
library(jsonlite)
library(dplyr)
library(XML)


term <- '"Mendelian randomisation" [Title] OR "Mendelian randomization" [Title]'

search_url <- "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
search_params <- list(
    db = "pubmed",
    term = term,
    retmode = "json",
    usehistory = "y",
    retmax = 20000
)

search_response <- httr::GET(url = search_url, query = search_params)
search_content <- httr::content(search_response, "text")
search_result <- jsonlite::fromJSON(search_content)

pmids <- search_result$esearchresult$idlist
# length(pmids)

efetch_url <- "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"

# Prepare the body of the POST request for XML output
efetch_params <- list(
  db = "pubmed",
  id = paste(pmids, collapse = ","),
  rettype = "abstract",
  retmode = "xml"
)

# Make the POST request to fetch abstracts
efetch_response <- httr::POST(url = efetch_url, body = efetch_params, encode = "form")
efetch_content <- httr::content(efetch_response, "text", encoding = "UTF-8")

# Parse the XML content
doc <- XML::xmlParse(efetch_content)
xmltop <- XML::xmlRoot(doc)
# xmlSize(xmltop)
# xmlName(xmltop[[1]][[1]][[1]])
# xmlValue(xmltop[[1]][[]][["PMID"]])

pub_dates <- xpathApply(doc, '//PubmedArticle', \(x) {
    dplyr::tibble(
        pmid = xmlValue(x[[1]][["PMID"]]),
        ab = xmlValue(x[[1]][["Article"]][["Abstract"]]),
        pub_date = lubridate::ymd(
            paste(
                xmlValue(x[["PubmedData"]][["History"]][["PubMedPubDate"]][["Year"]]),
                xmlValue(x[["PubmedData"]][["History"]][["PubMedPubDate"]][["Month"]]),
                xmlValue(x[["PubmedData"]][["History"]][["PubMedPubDate"]][["Day"]])
            )
        ),
        title = xmlValue(x[[1]][["Article"]][["ArticleTitle"]]),
        journal_issn = xmlValue(x[[1]][["Article"]][["Journal"]][["ISSN"]]),
        journal = xmlValue(x[[1]][["Article"]][["Journal"]][["Title"]]),
        author_affil = xmlValue(x[[1]][["Article"]][["AuthorList"]][[1]][["AffiliationInfo"]])
    )
}) %>% bind_rows()

jsonlite::write_json(pub_dates, path="pubmed.json", pretty = TRUE)

Extract tiab

term <- '"Mendelian randomisation" [tiab]'

search_url <- "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
search_params <- list(
    db = "pubmed",
    term = term,
    retmode = "json",
    usehistory = "y",
    retmax = 20000
)

search_response <- httr::GET(url = search_url, query = search_params)
search_content <- httr::content(search_response, "text")
search_result <- jsonlite::fromJSON(search_content)

pmids2 <- search_result$esearchresult$idlist


term <- '"Mendelian randomization" [tiab]'

search_url <- "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
search_params <- list(
    db = "pubmed",
    term = term,
    retmode = "json",
    usehistory = "y",
    retmax = 20000
)

search_response <- httr::GET(url = search_url, query = search_params)
search_content <- httr::content(search_response, "text")
search_result <- jsonlite::fromJSON(search_content)

pmids3 <- search_result$esearchresult$idlist
length(pmids3)

pmidstiab <- unique(c(pmids2, pmids3))
pmids_new <- pmidstiab[!pmidstiab %in% pmids]
length(pmids_new)


efetch_url <- "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"

# Prepare the body of the POST request for XML output
efetch_params <- list(
  db = "pubmed",
  id = paste(pmids_new, collapse = ","),
  rettype = "abstract",
  retmode = "xml"
)

# Make the POST request to fetch abstracts
efetch_response <- httr::POST(url = efetch_url, body = efetch_params, encode = "form")
efetch_content <- httr::content(efetch_response, "text", encoding = "UTF-8")

# Parse the XML content
doc <- XML::xmlParse(efetch_content)
xmltop <- XML::xmlRoot(doc)
# xmlSize(xmltop)
# xmlName(xmltop[[1]][[1]][[1]])
# xmlValue(xmltop[[1]][[]][["PMID"]])

pub_dates <- xpathApply(doc, '//PubmedArticle', \(x) {
    dplyr::tibble(
        pmid = xmlValue(x[[1]][["PMID"]]),
        ab = xmlValue(x[[1]][["Article"]][["Abstract"]]),
        pub_date = lubridate::ymd(
            paste(
                xmlValue(x[["PubmedData"]][["History"]][["PubMedPubDate"]][["Year"]]),
                xmlValue(x[["PubmedData"]][["History"]][["PubMedPubDate"]][["Month"]]),
                xmlValue(x[["PubmedData"]][["History"]][["PubMedPubDate"]][["Day"]])
            )
        ),
        title = xmlValue(x[[1]][["Article"]][["ArticleTitle"]]),
        journal_issn = xmlValue(x[[1]][["Article"]][["Journal"]][["ISSN"]]),
        journal = xmlValue(x[[1]][["Article"]][["Journal"]][["Title"]]),
        author_affil = xmlValue(x[[1]][["Article"]][["AuthorList"]][[1]][["AffiliationInfo"]])
    )
}) %>% bind_rows()

jsonlite::write_json(pub_dates, path="pubmed_new.json", pretty = TRUE)