Cleaning (and plotting) reaction times.

Hi all. I’m starting a new series of post in which I will show the tedious operations that I’ve programmed in my everyday job at the psychology lab. I’m not starting from the bottom (get the data, etc)… But my plan is cover all the process and share the code to try to make the life easier for the community : )

FULL CODE AT GITHUB

# Snippet to call and install all libraries required with pacman package
if (!require('pacman')) install.packages('pacman'); library('pacman') 
p_load(tidyverse, yarrr, trimr, lattice, doBy, Rmisc, reshape2)

# Dataset taken from trimr package. Check it out and 
# don't miss this other way to do Reaction Time outlier detection
# at https://cran.r-project.org/web/packages/trimr/vignettes/trimr-vignette.html
data(exampleData)
head(exampleData)  

# First of all: watch your data with lattice
# Several outliers appears at the boxplot
bwplot(~rt | participant, data = exampleData)
#bwplot(~rt | participant, data = exampleData %>% filter(., condition =="Switch"))
#bwplot(~rt | participant, data = exampleData %>% filter(., condition =="Switch"))


clean_rt <- function(df, rt, subject, form_extra="", borders = "")
{
  # '@df = data.frame were execute the outlier detection
  # '@rt = string with the reaction time colname
  # '@form_extra = categories we want to split.
  # '@border = a vector with the upper and lower bounds, example: c("200", "4000") if we want to 
  #  delete rt lower than 200, and upper than 4000.
  if(form_extra != ""){
    function_to_apply = as.formula(paste(rt, "~", subject, "+", form_extra))
    key_of_joins = c(subject, form_extra)
  } else {
    function_to_apply = as.formula(paste(rt, "~", subject))
    key_of_joins = c(subject)
  }
  
  require(doBy)
  
  
  
  df <-data.frame(merge(df,
                        summaryBy(function_to_apply, data = df,
                                  FUN = function(x) { 
                                    # notice that changing the function
                                    # you can do other things
                                    c(lower = boxplot.stats(x)$stats[1],
                                      upper = boxplot.stats(x)$stats[5]) 
                                  }), key_of_joins))
  # Create two new response and rt variables to manipulate
  df$clean.rt<-df[, rt]
  lower_name = paste0(rt, ".lower")
  upper_name = paste0(rt, ".upper")
  
  
  # Remove from the new variables the extreme values -> Below Q1-1.5*IQR or Over Q3+1.5*IQR
  df$clean.rt[
    which(df[,rt] <= df[, lower_name] | df[,rt] >= df[, upper_name])]<-NA
  if(borders!= ""){
    df$clean.rt[
      which(df[,rt] <= df[, lower_name] | df[,rt] >= df[, upper_name]) | df[,rt] <= border[1] | df[,rt] >= border[2] ]<-NA
  }  
  
  
  # Make the new rt variable numeric again
  df$clean.rt<-as.numeric(as.character(df$clean.rt))
  
  
  return(df)
  
}
# Apply the new function to our data frame
df <- clean_rt(exampleData, rt="rt", subject="participant") df %>% head
# Check with lattice the distribution
bwplot(~clean.rt | participant, df)



# Calculate how many trials removed
(trials_removed <- plyr::rename(merge(c(sum(is.na(df$clean.rt))),
                                     c(100*sum(is.na(df$clean.rt))/length(df$clean.rt))),
                               c("x" = "Removed", y = "% of total"))  )


# Info by participant and block
(trials_removed_parts <- plyr::rename(data.frame(summaryBy(clean.rt ~ participant, data = df,
                                           FUN = function(x) { 
                                             c(sum(is.na(x)), round(100*sum(is.na(x))/length(x),2))
                                           })), c("clean.rt.FUN1" = "Removed", "clean.rt.FUN2" = "Removed %")) )

# create the summary of rt by it's median, quartiles and top deciles 

df.summary <- df %>% group_by(., participant, condition) %>% 
  dplyr::summarise(., rt.median = median(clean.rt, na.rm = TRUE), 
                        q1 =quantile(clean.rt, probs = c(0.25), na.rm=TRUE),
                        q3 =quantile(clean.rt, probs = c(0.75), na.rm=TRUE),
                        c1 =quantile(clean.rt, probs = c(0.1), na.rm=TRUE),
                        c9 =quantile(clean.rt, probs = c(0.9), na.rm=TRUE))
df.summary <- left_join(df.summary, trials_removed_parts)
df.summary
p1 <- function(){ # is easier in this way to save the plot: create a function and call it later
pirateplot(rt.median ~ condition, data =  df.summary,  
           pal = gray(.05), 
           inf.method = "ci", # default is HDI 
           inf.within = "participant",
           inf.disp = "bean",
           main= "That's our title",
           theme = 1,  sortx = "sequential", jitter.val = 0.1, xlab=" ",
           ylab = " ", cex.lab=2.5, gl.col = gray(.12), gl.lwd = 0.1 ,
           bean.b.o = 0.9, bean.f.o = 0.5, inf.f.o = 0.4, point.cex = 1.7, 
           point.o = 0.5, point.pch = 16, cex.axis = 1)
 
}

bmp("plot.bmp", width = 7, height = 5, units = "in" , res = 160) # create the plot to save it
p1()
dev.off()

# We want to graph the difference between tasks. 
# We have a within measures design, so we remove the between participant variation
# with the  Morey method trough the Rmisc package 
# check http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
df.norm <- normDataWithin(df.summary, idvar = "participant", measurevar = "rt.median")

# when we deal with few data point, we can use a stripchart. 
# Stripchart is a 1 dimension plot of the values. It's incredibly
# powerfull. No data is behind barplot and confidence intervals 
# In our case we want to show the change of each participant 
plot_stripchart <- function(df, cond, value, participant, title = " "){
  # Based on https://www.r-bloggers.com/visualizing-small-scale-paired-data-combining-boxplots-stripcharts-and-confidence-intervals-in-r/
  custom_formula <- as.formula(paste0(value, " ~ ", cond))
  df.plot <- dcast(df[,c(cond, value ,participant)], as.formula(paste0( participant, "~", cond)) , value.var = value )
  stripchart( custom_formula, data=df, vertical=T,pch=16,method="jitter" , main = title, ylab = " ")
  conds_label <- unique(as.character(df[[cond]] ) )
  pre <-  df.plot[[conds_label[1]]]
  post <-  df.plot[[conds_label[2]]]
  s<-seq(length(pre))
  segments(rep(1.2,length(pre))[s],pre[s],rep(1.8,length(pre))[s],post[s],col=1,lwd=0.5)
}

bmp("strip.bmp", width = 7, height = 5, units = "in" , res = 160) # create the plot to save it
plot_stripchart(df.norm, cond="condition", value= "rt.medianNormed",
                participant = "participant", title="This is a title")
mtext("Median RT ", side = 2, line = 2, cex = 1.5, font = 2)
dev.off()