Created
November 26, 2016 22:14
-
-
Save KBlansit/278a979237cc129922b3f511017d1580 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # import libraries | |
| require(data.table) | |
| require(dplyr) | |
| # define system variables | |
| PROCEDURE <- '^proc_p$|^oproc[0-9]{1,2}' | |
| DIAGNOSIS <- '^diag_p$|^odiag[0-9]{1,2}' | |
| ROBOTIC_PROCEDURE <- '^174[^1]{1}' | |
| MIN_VOL <- 25 | |
| # diagnosis strings | |
| CCY_DIAGNOSIS <- '^5770|^574[0-9]{0,1}' | |
| APPY_DIAGNOSIS <- '^54[1-2]{1}|^540[0,1,9]{1}' | |
| GBP_DIAGNOSIS <- '^2780[0,1]{0,1}|^278[8,1]{1}' | |
| HERNIA_DIAGNOSIS <- '^5522[0,1,9]{0,1}|^5512[0,1,9]{0,1}|^5532[0,1,9]{0,1}' | |
| COLEC_DIAGNOSIS <- '^153[0-9]{0,1}' | |
| PROST_DIAGNOSIS <- '^185|^2334' | |
| HYSTR_DIAGNOSIS <- '^6170|^5680|^6146|^22[0,1]{1}|620[1,2]{1}|^621|^614[1,2,3,4,8]{0,1}|^625[8,9]{1}|^179|180[0,1,8,9]{0,1}' | |
| MYOM_DIAGNOSIS <- '^218[0,1,2,9]{0,1}|^6541' | |
| # procedure strings | |
| CCY_PROCEDURE <- '^512[1,2,3,4]{0,1}' | |
| APPY_PROCEDURE <- '^470[1,9]{0,1}|^54[5,2]{1}1' | |
| GBP_PROCEDURE <- '^443[1,9,8]{1}|^445|449[5,6,7,8,9]{1}|446[8,9]{1}' | |
| HERNIA_PROCEDURE <- '^535[1,9]{1}|536[1,2,9]{0,1}' | |
| COLEC_PROCEDURE <- '^457[1,2,3,4,5,6,9]|^458|^485|^173[1,9]{0,1}' | |
| PROST_PROCEDURE <- '^602[1,9]{0,1}|^609[6,7,9]{1}|^60[3,4,5]{1}|^606[1,2,9]{1}' | |
| HYSTR_PROCEDURE <- '^68[0-9]{0,2}' | |
| MYOM_PROCEDURE <- '^68[0-9]{0,2}|^6919' | |
| # names for procedures | |
| CCY_NAME = 'CCY' | |
| APPY_NAME = 'APPY' | |
| GBP_NAME = 'GPB' | |
| HERNIA_NAME = 'HERNIA' | |
| COLEC_NAME = 'COLEC' | |
| PROST_NAME = 'PROST' | |
| HYSTR_NAME = 'HYST' | |
| MYOM_NAME = 'MYOM' | |
| regex_table <- rbind( | |
| c(CCY_NAME, CCY_PROCEDURE, CCY_DIAGNOSIS), | |
| c(APPY_NAME, APPY_PROCEDURE, APPY_DIAGNOSIS), | |
| c(GBP_NAME, GBP_PROCEDURE, GBP_DIAGNOSIS), | |
| c(HERNIA_NAME, HERNIA_PROCEDURE, HERNIA_DIAGNOSIS), | |
| c(COLEC_NAME, COLEC_PROCEDURE, COLEC_DIAGNOSIS), | |
| c(PROST_NAME, PROST_PROCEDURE, PROST_DIAGNOSIS), | |
| c(HYSTR_NAME, HYSTR_PROCEDURE, HYSTR_DIAGNOSIS), | |
| c(MYOM_NAME, MYOM_PROCEDURE, MYOM_DIAGNOSIS) | |
| ) | |
| colnames(regex_table) <- c('Name', 'Procedure', 'Diagnosis') | |
| regex_table <- as.data.frame(regex_table) | |
| regex_table$Name <- as.character(regex_table$Name) | |
| regex_table$Procedure <- as.character(regex_table$Procedure) | |
| regex_table$Diagnosis <- as.character(regex_table$Diagnosis) | |
| # read in data | |
| oshpd_08 <- read.csv('OSHPD_08.csv', header = T) | |
| oshpd_09 <- read.csv('OSHPD_09.csv', header = T) | |
| oshpd_10 <- read.csv('OSHPD_10.csv', header = T) | |
| oshpd_11 <- read.csv('OSHPD_11.csv', header = T) | |
| oshpd_12 <- read.csv('OSHPD_12.csv', header = T) | |
| oshpd_13 <- read.csv('OSHPD_13.csv', header = T) | |
| processOshpd <- function(year, dt, regex_table, DIAGNOSIS, PROCEDURE, robot_query) { | |
| processRegexRow <- function(regex_row, mtx_diag, mtx_proc, robot_query) { | |
| queryData <- function(mtx, regexQuery) { | |
| # mtx: matrix to query | |
| # regexQuery: regex to query | |
| loc <- grepl(regexQuery, mtx) | |
| dim(loc) <- dim(mtx) | |
| sums <- as.logical(rowSums(loc) > 0) | |
| } | |
| if(dim(mtx_diag)[1] != dim(mtx_proc)[1]) { | |
| stop('mtx_diag and mtx_proc require similiar rows') | |
| } | |
| rslt <- rep(NA, dim(mtx_diag)[1]) | |
| proc_rows <- queryData(mtx_diag, regex_row['Diagnosis']) * queryData(mtx_proc, regex_row['Procedure']) | |
| rob_rows <- queryData(mtx_diag, regex_row['Diagnosis']) * queryData(mtx_proc, robot_query) | |
| proc_rows <- as.logical(proc_rows) | |
| rob_rows <- as.logical(rob_rows) | |
| rslt[proc_rows] <- 'non-robotic' | |
| rslt[rob_rows] <- 'robotic' | |
| rtn_dt = as.data.frame(rslt) | |
| colnames(rtn_dt) <- as.character(regex_row['Name']) | |
| return(rtn_dt) | |
| } | |
| mtx_diag <- as.matrix(dt[, grepl(DIAGNOSIS, names(dt)), with = FALSE]) | |
| mtx_proc <- as.matrix(dt[, grepl(PROCEDURE, names(dt)), with = FALSE]) | |
| lsted_dts <- apply(regex_table, 1, FUN = processRegexRow, | |
| mtx_diag = mtx_diag, mtx_proc = mtx_proc, robot_query = ROBOTIC_PROCEDURE) | |
| procs <- do.call(cbind, lsted_dts) | |
| vld_rows <- rowSums(is.na(procs)) < nrow(regex_table) | |
| multi_rows <- rowSums(is.na(procs)) < nrow(regex_table) - 1 | |
| # hard code vars | |
| poor_insur <- c(2, 5, 7) | |
| other_cols <- c('los', 'charge', 'adm_src', 'pay_cat', 'oshpd_id') | |
| # deal with single diagnosis | |
| singular_dt <- cbind(procs[vld_rows, ], dt[vld_rows, other_cols, with = FALSE]) | |
| singular_dt$year <- year | |
| return(singular_dt) | |
| } | |
| # process OSHPD data | |
| df_08 <- processOshpd('08', data.table(oshpd_08), regex_table, DIAGNOSIS, PROCEDURE, robot_query) | |
| df_09 <- processOshpd('09', data.table(oshpd_09), regex_table, DIAGNOSIS, PROCEDURE, robot_query) | |
| df_10 <- processOshpd('10', data.table(oshpd_10), regex_table, DIAGNOSIS, PROCEDURE, robot_query) | |
| df_11 <- processOshpd('11', data.table(oshpd_11), regex_table, DIAGNOSIS, PROCEDURE, robot_query) | |
| df_12 <- processOshpd('12', data.table(oshpd_12), regex_table, DIAGNOSIS, PROCEDURE, robot_query) | |
| df_13 <- processOshpd('13', data.table(oshpd_13), regex_table, DIAGNOSIS, PROCEDURE, robot_query) | |
| # bind data | |
| df <- rbind(df_08, df_09, df_10, df_11, df_12, df_13) | |
| # aggegrate | |
| aggegrateHosps <- function(regex_table, df) { | |
| aggegrateTotal <- function(df, regex_table) { | |
| robRows <- rowSums(df[regex_table$Name] == 'robotic', na.rm = TRUE) > 0 | |
| df$TOTAL <- 'non-robotic' | |
| df$TOTAL[robRows] <- 'robotic' | |
| temp_df <- count(df, TOTAL, oshpd_id, year) | |
| temp_df<- temp_df[!is.na(temp_df[1]),] | |
| wide_df <- dcast(temp_df, oshpd_id + year ~ TOTAL, value.var = 'n') | |
| wide_df$robotic[is.na(wide_df$robotic)] <- 0 # this is okay | |
| colnames(wide_df)[grepl('robotic$', names(wide_df))] <- | |
| paste("TOTAL", names(wide_df)[grepl('robotic$', names(wide_df))], sep = '_') | |
| wide_df$TOTAL_VOL <- wide_df$`TOTAL_non-robotic` + wide_df$TOTAL_robotic | |
| wide_df$TOTAL_PROP <- wide_df$TOTAL_robotic / wide_df$TOTAL_VOL | |
| rtn_cols <- c("oshpd_id", "year", "TOTAL_VOL", "TOTAL_robotic", "TOTAL_PROP") | |
| return(wide_df[rtn_cols]) | |
| } | |
| aggegrateType <- function(type, df) { | |
| temp_df <- eval(parse(text = paste('count(df,', type, ', oshpd_id, year)'))) | |
| colnames(temp_df)[1] <- type | |
| # remove NAs | |
| temp_df<- temp_df[!is.na(temp_df[1]),] | |
| typeFormula <- as.formula(paste('oshpd_id + year ~ ', type, sep = '')) | |
| wide_df <- dcast(temp_df, typeFormula, value.var = 'n') | |
| wide_df$robotic[is.na(wide_df$robotic)] <- 0 # this is okay | |
| colnames(wide_df)[grepl('robotic$', names(wide_df))] <- | |
| paste(type, names(wide_df)[grepl('robotic$', names(wide_df))], sep = '_') | |
| typeTotal <- cbind(wide_df[paste(type, "_non-robotic", sep = "")] + | |
| wide_df[paste(type, "_robotic", sep = "")]) | |
| typeProp <- cbind(wide_df[paste(type, "_robotic", sep = "")] / typeTotal) | |
| newCols <- as.data.frame(cbind(typeTotal, typeProp)) | |
| colnames(newCols) <- paste(type, c("_VOL", "_PROP"), sep = "") | |
| wide_df <- cbind(wide_df, newCols) | |
| rtn_cols <- c("oshpd_id", "year", paste(type, c("_VOL", "_robotic", "_PROP"), sep = "")) | |
| return(wide_df[rtn_cols]) | |
| } | |
| mainDf <- aggegrateTotal(df, regex_table) | |
| dfLst <- lapply(regex_table$Name, aggegrateType, df = df) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment