Define thresholds based on different categories with RowSum

df <- data.frame(PatientID = c("0002" ,"0002", "0005", "0005" ,"0009" ,"0009" ,"0018", "0018" ,"0039" ,"0039" , "0043" ,"0043", "0046", "0046" ,"0048" ,"0048"),
                 Timepoint= c("A", "B", "A", "B", "A", "B", "A", "B", "A", "B",  "A", "B",  "A", "B", "A", "B"),
                 sex= c("F", "F", "M", "M", "F", "F", "M", "M","F", "F",  "M", "M",  "M", "M", "F", "F"),
                 country= c("I", "I", "S", "S", "S", "S", "S", "S","S", "S",  "I", "I",  "I", "I", "I", "I"),
                 A = c(NA , 977.146 , NA , 964.315 ,NA , 952.311 , NA , 950.797 , 947.465 , 902.852 ,  985.124  ,NA , 930.141 ,1007.790 , 1027.110 , 999.414),
                 B = c(998.988 , NA , 998.680 , NA , 1020.560 ,  955.540 , 911.606 , 964.039   ,  988.087 , 902.367 , 959.338 ,1029.050 , 987.374 ,1066.400  ,957.512 , 917.597),
                 C = c( 987.140 , 961.810 , 929.466 , 978.166, 969.469 , 943.398  ,936.034,  965.292 , 996.404 , 920.610 , 967.047, 913.517 , 893.428 , 921.606 , 929.590  ,950.493), 
                 D = c( 961.810 , 929.466 , 978.166, 1005.820 , 925.752 , 969.469  ,943.398 ,  965.292 , 996.404 ,  967.047 ,  NA , 893.428 , 921.606 , 976.192 , 929.590 , 950.493),
                 E = c(1006.330, 1028.070 ,  954.274 ,1005.910  ,949.969 , 992.820 ,934.407 , 948.913 ,    961.375  ,955.296 , 961.128  ,998.119 ,1009.110 , 994.891 ,1000.170  ,982.763),
                 G= c(NA , 958.990 , 924.680 , 955.927 , NA , 949.384  ,973.348 , 984.392 , 943.894 , 961.468 , 995.368 , 994.997 ,  979.454 , 952.605 ,NA , 956.507), stringsAsFactors = F)

I have this code to categorize people with 3 or more columns out of range, the threshold would be 1015:

cols <- 5:10
df$Myo <- ifelse(rowSums(df[cols] > 1015, na.rm = TRUE) >= 3, 'Yes', 'No')

I would need to trick this code into 2 other codes:

  • one code that has a different threshold for by sex (theshold of 1004 for female (df$sex==F) and 986 by male (df$sex==M).

  • Other code that selects based on 4 thresholds:

a) first threshold would be males (df$sex==M) living in sweden (df$country==S), this theshold would be 900

b) second threshold would be females (df$sex==F) living in sweden (df$country==S), this theshold would be 1016

c) first threshold would be males (df$sex==M) living in iceland (df$country==I), this theshold would be 800

d) second threshold would be females (df$sex==F) living in iceland (df$country==I), this theshold would be 1000.

Thanks!!

I prefer to use data.table to solve this. The function fcase is used to deal with multi thresholds branch. I am not sure whether the ourputs of Myo is your desired or not. Anyway I guess you can correct it and I will edit it later.

df <- data.frame(PatientID = c("0002" ,"0002", "0005", "0005" ,"0009" ,"0009" ,"0018", "0018" ,"0039" ,"0039" , "0043" ,"0043", "0046", "0046" ,"0048" ,"0048"),
                 Timepoint= c("A", "B", "A", "B", "A", "B", "A", "B", "A", "B",  "A", "B",  "A", "B", "A", "B"),
                 sex= c("F", "F", "M", "M", "F", "F", "M", "M","F", "F",  "M", "M",  "M", "M", "F", "F"),
                 country= c("I", "I", "S", "S", "S", "S", "S", "S","S", "S",  "I", "I",  "I", "I", "I", "I"),
                 A = c(NA , 977.146 , NA , 964.315 ,NA , 952.311 , NA , 950.797 , 947.465 , 902.852 ,  985.124  ,NA , 930.141 ,1007.790 , 1027.110 , 999.414),
                 B = c(998.988 , NA , 998.680 , NA , 1020.560 ,  955.540 , 911.606 , 964.039   ,  988.087 , 902.367 , 959.338 ,1029.050 , 987.374 ,1066.400  ,957.512 , 917.597),
                 C = c( 987.140 , 961.810 , 929.466 , 978.166, 969.469 , 943.398  ,936.034,  965.292 , 996.404 , 920.610 , 967.047, 913.517 , 893.428 , 921.606 , 929.590  ,950.493), 
                 D = c( 961.810 , 929.466 , 978.166, 1005.820 , 925.752 , 969.469  ,943.398 ,  965.292 , 996.404 ,  967.047 ,  NA , 893.428 , 921.606 , 976.192 , 929.590 , 950.493),
                 E = c(1006.330, 1028.070 ,  954.274 ,1005.910  ,949.969 , 992.820 ,934.407 , 948.913 ,    961.375  ,955.296 , 961.128  ,998.119 ,1009.110 , 994.891 ,1000.170  ,982.763),
                 G= c(NA , 958.990 , 924.680 , 955.927 , NA , 949.384  ,973.348 , 984.392 , 943.894 , 961.468 , 995.368 , 994.997 ,  979.454 , 952.605 ,NA , 956.507), stringsAsFactors = F)
library(data.table)
setDT(df)
cols <- 5:10
df[, Myo := fcase(sex == "F" & rowSums(.SD > 1004,na.rm = T) >= 3, "Yes",
                  sex == "M" & rowSums(.SD > 986,na.rm = T) >= 3, "Yes",
                  default = "No"),
   .SDcols = cols]

df[, Myo2 := fcase(sex == "M" & country == "S" & rowSums(.SD > 900,na.rm = T) >= 3, "Yes",
                  sex == "F" & country == "S" & rowSums(.SD > 1016,na.rm = T) >= 3, "Yes",
                  sex == "M" & country == "I" & rowSums(.SD > 800 ,na.rm = T) >= 3, "Yes",
                  sex == "F" & country == "I" & rowSums(.SD > 1000,na.rm = T) >= 3, "Yes",
                  default = "No"),
   .SDcols = cols]
df
#>     PatientID Timepoint sex country        A        B       C        D        E
#>  1:      0002         A   F       I       NA  998.988 987.140  961.810 1006.330
#>  2:      0002         B   F       I  977.146       NA 961.810  929.466 1028.070
#>  3:      0005         A   M       S       NA  998.680 929.466  978.166  954.274
#>  4:      0005         B   M       S  964.315       NA 978.166 1005.820 1005.910
#>  5:      0009         A   F       S       NA 1020.560 969.469  925.752  949.969
#>  6:      0009         B   F       S  952.311  955.540 943.398  969.469  992.820
#>  7:      0018         A   M       S       NA  911.606 936.034  943.398  934.407
#>  8:      0018         B   M       S  950.797  964.039 965.292  965.292  948.913
#>  9:      0039         A   F       S  947.465  988.087 996.404  996.404  961.375
#> 10:      0039         B   F       S  902.852  902.367 920.610  967.047  955.296
#> 11:      0043         A   M       I  985.124  959.338 967.047       NA  961.128
#> 12:      0043         B   M       I       NA 1029.050 913.517  893.428  998.119
#> 13:      0046         A   M       I  930.141  987.374 893.428  921.606 1009.110
#> 14:      0046         B   M       I 1007.790 1066.400 921.606  976.192  994.891
#> 15:      0048         A   F       I 1027.110  957.512 929.590  929.590 1000.170
#> 16:      0048         B   F       I  999.414  917.597 950.493  950.493  982.763
#>           G Myo Myo2
#>  1:      NA  No   No
#>  2: 958.990  No   No
#>  3: 924.680  No  Yes
#>  4: 955.927  No  Yes
#>  5:      NA  No   No
#>  6: 949.384  No   No
#>  7: 973.348  No  Yes
#>  8: 984.392  No  Yes
#>  9: 943.894  No   No
#> 10: 961.468  No   No
#> 11: 995.368  No  Yes
#> 12: 994.997 Yes  Yes
#> 13: 979.454  No  Yes
#> 14: 952.605 Yes  Yes
#> 15:      NA  No   No
#> 16: 956.507  No   No

Created on 2021-08-03 by the reprex package (v2.0.0)

I came across with this code that also works for the first case:

cols <- 5:10
df$sex= as.factor(df$sex)
df %>% mutate(Myo=ifelse(sex == "F" & (rowSums(df[cols] > 1004, na.rm = TRUE) >=3) ,'Yes',
                                      ifelse(sex == "M" & (rowSums(df[cols] > 986, na.rm = TRUE) >=3) ,'Yes','No')))-> df

Leave a Comment