bos_pit.csv
) to the desired data set.
The following is a possible solution.
= "BOS" & saving_name
"(none)",
saving_name, NA),
# Whether the starting pitcher "won"
bstpwon = ifelse(bstp == bwp, bstp, NA)
)
# (2) "Reshape" x from wide to long longx <- with(x, data.frame(pitcher = c(bstp, bwp, bsvp, bstpwon))) longx$outcome <- factor(c(rep("started", nrow(x)), rep("won", nrow(x)), rep("saved", nrow(x)), rep("won_as_stp", nrow(x))))
# (3) Remove any missing pitcher values longx <- subset(longx, is.na(pitcher))
# (4) Calculate the number of starts, saves, wins, and wins as # starting pitcher for each pitcher newx <- with(longx, aggregate(x = outcome, by = list(pitcher, outcome), FUN = length))
# (5) Reshape newx so each level of outcome is its own column wide.newx <- reshape(newx, direction = "wide", v.names = "x", timevar = "Group.2", idvar = "Group.1") # Rename the columns names(wide.newx) <- Cs(pitcher, saves, starts, wins, wins_as_stp)
# (6) Make some changes wide.newx wide.newx <- upData(wide.newx, # Replace all missing values with 0 starts = ifelse(is.na(starts), 0, starts), wins_as_stp = ifelse(is.na(wins_as_stp), 0, wins_as_stp), wins = ifelse(is.na(wins), 0, wins), saves = ifelse(is.na(saves), 0, saves), # Add a "win_per" column = wins_as_stp/starts # --> replace any win_per values of NaN with 0 win_per = ifelse(wins_as_stp =0 & starts = 0, wins_as_stp/starts,