Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Cossin Sebastien
ehden-bordeaux-mappings
Commits
11b547fd
Commit
11b547fd
authored
Nov 08, 2021
by
Cossin Sebastien
Browse files
WIP building CDC
parent
29a2d4d7
Changes
1
Hide whitespace changes
Inline
Side-by-side
drugs/HDH/packaging.R
View file @
11b547fd
...
...
@@ -56,6 +56,7 @@ forms$galenique <- stringr::str_extract(string = forms$packaging_norm2, pattern
### special scenario for "delivrance à l'unité"
plaquette_failed
<-
forms
$
packages_name
==
"plaquette"
&
is.na
(
forms
$
galenique
)
&
!
is.na
(
forms
$
packages_name
)
rule_2
<-
plaquette_failed
&
grepl
(
x
=
forms
$
packaging
,
pattern
=
"délivrance à l'unité"
)
&
forms
$
dosageform_norm
%in%
known_forms
sum
(
rule_2
)
forms
$
galenique
[
rule_2
]
<-
paste0
(
"1 "
,
forms
$
dosageform_norm
[
rule_2
])
## special scenario for "par 28" (meaning par 28 comprimés)
...
...
@@ -82,7 +83,8 @@ forms$galenique_quantity <- gsub(",",".",forms$galenique_quantity, fixed = T)
forms
$
galenique_quantity
<-
as.numeric
(
forms
$
galenique_quantity
)
forms
$
galenique_name
<-
stringr
::
str_extract
(
string
=
forms
$
galenique
,
pattern
=
"[^0-9,.]+"
)
forms
$
galenique_name
<-
trimws
(
forms
$
galenique_name
)
sort
(
table
(
forms
$
galenique_name
))
forms
$
galenique_name
<-
gsub
(
"[[:punct:]]"
,
""
,
forms
$
galenique_name
)
sort
(
table
(
forms
$
galenique_name
))
# either a form or a quantity
## debugging purpose:
is_same_as_dose_form
<-
((
forms
$
galenique_name
==
forms
$
dosageform_norm
)
&
!
is.na
(
forms
$
galenique_name
))
|
forms
$
galenique_name
%in%
c
(
"l"
,
"ml"
,
"g"
,
"mg"
)
...
...
@@ -93,35 +95,140 @@ voir <- subset(forms, !is_same_as_dose_form)
forms
$
refstrength_norm
<-
RomediETL
::
normalize_ref_strength
(
forms
$
refstrength
)
# step1: extract dosage:
forms
$
ref_dosage
<-
stringr
::
str_extract
(
string
=
forms
$
refstrength_norm
,
pattern
=
regex_dosage
)
table
(
forms
$
ref_dosage
)
undetected
<-
is.na
(
forms
$
ref_dosage
)
sum
(
undetected
)
# step2/ packaging or form:
packaging_and_forms
<-
c
(
RomediETL
::
forms_get_list
(),
RomediETL
::
packaging_get_list
())
packaging_and_forms
<-
unique
(
packaging_and_forms
)
regex_packaging_and_forms
<-
RomediETL
::
create_regex_from_word_vector
(
packaging_and_forms
)
undetected
<-
is.na
(
forms
$
ref_dosage
)
sum
(
undetected
)
### TODO: pourquoi separer ici ?
forms
$
ref_pack_forms
[
undetected
]
<-
stringr
::
str_extract
(
string
=
forms
$
refstrength_norm
[
undetected
],
forms
$
ref_pack_forms
<-
stringr
::
str_extract
(
string
=
forms
$
refstrength_norm
,
pattern
=
regex_packaging_and_forms
)
undetected
<-
is.na
(
forms
$
ref_dosage
)
&
is.na
(
forms
$
ref_pack_forms
)
&
forms
$
refstrength
!=
""
sum
(
undetected
)
voir
<-
subset
(
forms
,
undetected
)
voir
$
refstrength_norm
[
1
]
forms
$
ref_pack_forms
[
undetected
]
<-
forms
$
dosageform_norm
[
undetected
]
##### Normalizing the quantity:
quantity_regex
<-
"[0-9,.]+"
forms
$
refstrength_quantity
<-
stringr
::
str_extract
(
string
=
forms
$
refstrength_norm
,
pattern
=
quantity_regex
)
forms
$
refstrength_quantity
<-
gsub
(
","
,
"."
,
forms
$
refstrength_quantity
,
fixed
=
T
)
forms
$
refstrength_quantity
<-
as.numeric
(
forms
$
refstrength_quantity
)
forms
$
refstrength_name
<-
stringr
::
str_extract
(
string
=
forms
$
refstrength_norm
,
pattern
=
"[^0-9,.]+"
)
forms
$
refstrength_name
<-
trimws
(
forms
$
refstrength_name
)
sort
(
table
(
forms
$
refstrength_name
))
forms
$
ref_dosage_quantity
<-
stringr
::
str_extract
(
string
=
forms
$
ref_dosage
,
pattern
=
quantity_regex
)
forms
$
ref_dosage_quantity
<-
gsub
(
","
,
"."
,
forms
$
ref_dosage_quantity
,
fixed
=
T
)
forms
$
ref_dosage_quantity
<-
as.numeric
(
forms
$
ref_dosage_quantity
)
forms
$
ref_dosage_name
<-
stringr
::
str_extract
(
string
=
forms
$
ref_dosage
,
pattern
=
"[^0-9,.]+"
)
forms
$
ref_dosage_name
<-
trimws
(
forms
$
ref_dosage_name
)
forms
$
ref_dosage_name
<-
gsub
(
"[[:punct:]]"
,
""
,
forms
$
ref_dosage_name
)
## debugging purpose:
forms
$
refstrength_name
[
undetected
]
<-
forms
$
dosageform_norm
[
undetected
]
forms
$
refstrength_quantity
[
undetected
]
<-
1
bool
<-
!
is.na
(
forms
$
ref_dosage_name
)
&
!
is.na
(
forms
$
ref_pack_forms
)
voir
<-
subset
(
forms
,
bool
)
bool
<-
!
is.na
(
forms
$
ref_pack_forms
)
&
forms
$
ref_pack_forms
%in%
forms
$
packages_name
|
forms
$
ref_pack_forms
%in%
forms
$
galenique_name
sum
(
bool
)
sum
(
!
bool
)
voir
<-
subset
(
forms
,
!
bool
)
### export
## normalizing the reference:
sort
(
table
(
forms
$
ref_dosage_name
))
units
<-
c
(
"mole"
,
"mole"
,
"1"
,
"microlitres"
,
"ml"
,
"0.001"
,
"gramme"
,
"mg"
,
"1000"
,
"g"
,
"mg"
,
"1000"
,
"litre"
,
"ml"
,
"1000"
,
"mg"
,
"mg"
,
"1"
,
"ml"
,
"ml"
,
"1"
,
"ug"
,
"mg"
,
"0.001"
)
units
<-
matrix
(
data
=
units
,
ncol
=
3
,
byrow
=
T
)
units
<-
data.frame
(
units
,
stringsAsFactors
=
F
)
colnames
(
units
)
<-
c
(
"label"
,
"label_standard"
,
"conversion"
)
units
$
conversion
<-
as.numeric
(
units
$
conversion
)
all
(
units
$
label
%in%
forms
$
ref_dosage_name
)
## put galenique name and quantity when ref_strength is NA (not detected)
bool
<-
is.na
(
forms
$
ref_dosage_name
)
&
forms
$
galenique_name
%in%
units
$
label
sum
(
bool
)
forms
$
ref_dosage_name
[
bool
]
<-
forms
$
galenique_name
[
bool
]
forms
$
ref_dosage_quantity
[
bool
]
<-
forms
$
galenique_quantity
[
bool
]
forms2
<-
merge
(
forms
,
units
,
by.x
=
"ref_dosage_name"
,
by.y
=
"label"
,
all.x
=
T
)
forms2
$
ref_dosage_quantity_norm
<-
forms2
$
ref_dosage_quantity
*
forms2
$
conversion
col_label_dosage_form_norm
<-
which
(
colnames
(
forms2
)
==
"label_standard"
)
colnames
(
forms2
)[
col_label_dosage_form_norm
]
<-
"ref_dosage_name_norm"
## remove when refstrength is empty
is_empty_ref_strength
<-
forms2
$
refstrength
==
""
sum
(
is_empty_ref_strength
)
forms3
<-
subset
(
forms2
,
!
is_empty_ref_strength
)
sum
(
!
forms2
$
cis
%in%
forms3
$
cis
)
# only 14 that doesn't have a refstrength at all
packaging_normalized
<-
subset
(
forms3
,
select
=
c
(
"cis"
,
"drug"
,
"drugform"
,
"dosageform"
,
"packages_quantity"
,
"packages_name"
,
"galenique_quantity"
,
"galenique_name"
,
"ref_dosage_quantity_norm"
,
"ref_dosage_name_norm"
))
## remove empty refstrength:
packaging_normalized
<-
unique
(
packaging_normalized
)
save
(
x
=
packaging_normalized
,
file
=
"packaging_normalized.rdata"
)
# rm(list=ls())
# load("packaging_normalized.rdata")
ref_normalized
<-
subset
(
packaging_normalized
,
select
=
c
(
"cis"
,
"dosageform"
,
"ref_dosage_quantity_norm"
,
"ref_dosage_name_norm"
))
ref_normalized
<-
unique
(
ref_normalized
)
#### récupérer les quantités
sa_with_ft_subset
<-
RomediETL
::
get_sa_with_ft_subset
(
conn
)
sa_without_ft_subset
<-
RomediETL
::
get_sa_without_ft_subset
(
conn
)
compo_new
<-
RomediETL
::
rbind_with_without_ft
(
sa_with_ft_subset
,
sa_without_ft_subset
)
colnames
(
compo_new
)
compo_new
$
id
<-
NULL
library
(
dplyr
)
colnames
(
ref_normalized
)
ref_normalized2
<-
ref_normalized
%>%
group_by
(
cis
,
dosageform
)
%>%
mutate
(
n
=
row_number
())
multiple_norm
<-
subset
(
ref_normalized2
,
n
!=
1
)
# remove if one is NA
bool
<-
ref_normalized2
$
cis
%in%
multiple_norm
$
cis
&
is.na
(
ref_normalized2
$
ref_dosage_quantity_norm
)
ref_normalized3
<-
subset
(
ref_normalized2
,
!
bool
)
ref_normalized3
<-
ref_normalized2
%>%
group_by
(
cis
,
dosageform
)
%>%
mutate
(
n
=
row_number
())
ref_normalized3
<-
subset
(
ref_normalized3
,
n
==
1
)
## TODO: find a better way
ref_normalized3
$
n
<-
NULL
ref_normalized3
$
id
<-
NULL
compo_new_2
<-
merge
(
compo_new
,
ref_normalized3
,
by
=
c
(
"cis"
,
"dosageform"
))
## normal d'avoir une duplication des lignes
## need to convert the strength:
sort
(
table
(
compo_new_2
$
saucumunit
),
decreasing
=
T
)
colnames
(
units
)
compo_new_3
<-
merge
(
compo_new_2
,
units
,
by.x
=
"ftucumunit"
,
by.y
=
"label"
,
all.x
=
T
)
colnames
(
compo_new_3
)[
c
(
16
,
17
)]
<-
c
(
"ftucumunit_standard"
,
"ft_conversion"
)
compo_new_3
$
ftamount_standard
<-
compo_new_3
$
ftamount
*
compo_new_3
$
ft_conversion
is_quantify
<-
!
is.na
(
compo_new_3
$
ref_dosage_quantity_norm
)
&
!
is.na
(
compo_new_3
$
ftamount_standard
)
sum
(
is_quantify
)
compo_new_3
$
ftamount_quantified
[
is_quantify
]
<-
compo_new_3
$
ftamount_standard
[
is_quantify
]
/
compo_new_3
$
ref_dosage_quantity_norm
[
is_quantify
]
compo_new_3
$
ftamount_quantified_unit
[
is_quantify
]
<-
paste
(
compo_new_3
$
ftucumunit_standard
[
is_quantify
],
compo_new_3
$
ref_dosage_name_norm
[
is_quantify
],
sep
=
"/"
)
sort
(
table
(
forms
$
refstrength_name
))
same_ref
<-
forms
$
refstrength_name
==
forms
$
galenique_name
|
forms
$
refstrength_name
==
forms
$
packages_name
sum
(
same_ref
,
na.rm
=
T
)
## besoin d'ajouter le libellé:
prefLabels
<-
RomediR
::
loadPrefLabels
()
bool
<-
!
compo_new_3
$
ftwikidata
%in%
prefLabels
$
code
voir
<-
subset
(
compo_new_3
,
bool
)
wikidataMappings
<-
RomediR
::
loadWikidataMappings
()
language
=
"en"
wikidata_pref_label
<-
RomediETL
::
getWikidataLabels
(
wikidataMappings
,
language
=
"en"
)
wikidata_pref_label
$
itemLabel
<-
tolower
(
wikidata_pref_label
$
itemLabel
)
compo_new_4
<-
merge
(
compo_new_3
,
wikidata_pref_label
,
by.x
=
"ftwikidata"
,
by.y
=
"item"
,
all.x
=
T
)
colnames
(
compo_new_4
)[
21
]
<-
"ftenlabel"
is_quantify
<-
!
is.na
(
compo_new_4
$
ref_dosage_quantity_norm
)
&
!
is.na
(
compo_new_4
$
ftamount_standard
)
sum
(
is_quantify
)
compo_new_4
$
cdc
<-
ifelse
(
is_quantify
,
paste0
(
compo_new_4
$
ftenlabel
,
" "
,
compo_new_4
$
ftamount_quantified
,
" "
,
toupper
(
compo_new_4
$
ftamount_quantified_unit
)),
paste0
(
compo_new_4
$
ftenlabel
,
" "
,
compo_new_4
$
ftamount_standard
,
" "
,
toupper
(
compo_new_4
$
ftucumunit_standard
)))
bool
<-
is.na
(
compo_new_4
$
ftenlabel
)
compo_new_4
$
cdc
[
bool
]
<-
NA
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment