term <- '"Mendelian randomisation" [tiab]'
search_url <- "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
search_params <- list(
db = "pubmed",
term = term,
retmode = "json",
usehistory = "y",
retmax = 20000
)
search_response <- httr::GET(url = search_url, query = search_params)
search_content <- httr::content(search_response, "text")
search_result <- jsonlite::fromJSON(search_content)
pmids2 <- search_result$esearchresult$idlist
term <- '"Mendelian randomization" [tiab]'
search_url <- "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
search_params <- list(
db = "pubmed",
term = term,
retmode = "json",
usehistory = "y",
retmax = 20000
)
search_response <- httr::GET(url = search_url, query = search_params)
search_content <- httr::content(search_response, "text")
search_result <- jsonlite::fromJSON(search_content)
pmids3 <- search_result$esearchresult$idlist
length(pmids3)
pmidstiab <- unique(c(pmids2, pmids3))
pmids_new <- pmidstiab[!pmidstiab %in% pmids]
length(pmids_new)
efetch_url <- "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
# Prepare the body of the POST request for XML output
efetch_params <- list(
db = "pubmed",
id = paste(pmids_new, collapse = ","),
rettype = "abstract",
retmode = "xml"
)
# Make the POST request to fetch abstracts
efetch_response <- httr::POST(url = efetch_url, body = efetch_params, encode = "form")
efetch_content <- httr::content(efetch_response, "text", encoding = "UTF-8")
# Parse the XML content
doc <- XML::xmlParse(efetch_content)
xmltop <- XML::xmlRoot(doc)
# xmlSize(xmltop)
# xmlName(xmltop[[1]][[1]][[1]])
# xmlValue(xmltop[[1]][[]][["PMID"]])
pub_dates <- xpathApply(doc, '//PubmedArticle', \(x) {
dplyr::tibble(
pmid = xmlValue(x[[1]][["PMID"]]),
ab = xmlValue(x[[1]][["Article"]][["Abstract"]]),
pub_date = lubridate::ymd(
paste(
xmlValue(x[["PubmedData"]][["History"]][["PubMedPubDate"]][["Year"]]),
xmlValue(x[["PubmedData"]][["History"]][["PubMedPubDate"]][["Month"]]),
xmlValue(x[["PubmedData"]][["History"]][["PubMedPubDate"]][["Day"]])
)
),
title = xmlValue(x[[1]][["Article"]][["ArticleTitle"]]),
journal_issn = xmlValue(x[[1]][["Article"]][["Journal"]][["ISSN"]]),
journal = xmlValue(x[[1]][["Article"]][["Journal"]][["Title"]]),
author_affil = xmlValue(x[[1]][["Article"]][["AuthorList"]][[1]][["AffiliationInfo"]])
)
}) %>% bind_rows()
jsonlite::write_json(pub_dates, path="pubmed_new.json", pretty = TRUE)