Commit fad2de6e authored by Cossin Sebastien's avatar Cossin Sebastien
Browse files

script to extract clinical drug ingredient with their ING and PIN, their quantity, amount, unit WIP

parent 099e1022
library(SPARQL)
ENV_VARIABLES <- drugsmapping::getENVvariables()
query_cdc <- drugsmapping::get_sparql_query_clinical_drug_comp()
res <- SPARQL::SPARQL(url = ENV_VARIABLES$SPARQL_ENDPOINT, query = query_cdc)
athena <- res$results
# save(athena, file = "cdc.rdata")
no_precise_ing <- is.na(athena$precise_ingredient_label)
athena$substance <- ifelse(no_precise_ing, athena$ingredient_label, athena$precise_ingredient_label)
extract_quantity <- function(substance, cdc) {
length_ing <- nchar(substance)
length_cdc <- nchar(cdc)
quantity <- substr(x = cdc, start = length_ing + 1, stop = length_cdc)
quantity <- trimws(quantity)
return(quantity)
}
substance <- "4-(Isopropylamino)Phenazone"
cdc <- "4-(Isopropylamino)Phenazone 240 MG/ML"
extract_quantity(substance, cdc) == "240 MG/ML"
athena$quantity <- mapply(extract_quantity,
substance = athena$substance,
cdc = athena$clinical_drug_comp_label)
### extract the amount:
regex_amount <- "^[0-9.]+"
quantity <- "0.938 MG/ML"
extract_amount <- function(quantity) {
amount <- stringr::str_extract(string = quantity,
pattern = regex_amount)
return(amount)
}
amount <- "0.938"
extract_amount(quantity) == amount
athena$amount <- sapply(X = athena$quantity, FUN = extract_amount)
extract_unit <- function(quantity, amount) {
length_amount <- nchar(as.character(amount))
length_quantity <- nchar(quantity)
unit <- substr(x = quantity, start = length_amount + 1, stop = length_quantity)
unit <- trimws(unit)
return(unit)
}
extract_unit(quantity, amount) == "MG/ML"
athena$unit <- mapply(extract_unit,
quantity = athena$quantity,
amount = athena$amount)
athena$amount <- as.numeric(athena$amount)
tab <- table(athena$unit)
sort(tab)
length(tab)
### add the dose_form:
query_cdc_with_quantified_drug <- drugsmapping::get_sparql_query_cdc_with_quantified_drug()
res <- SPARQL::SPARQL(url = ENV_VARIABLES$SPARQL_ENDPOINT,
query = query_cdc_with_quantified_drug)
cdc_dose_form <- res$results
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment