# Lena Jäger, Zhong Chen, Qiang Li, Chien-Jer Charles Lin, Shravan Vasishth
# The subject-relative advantage in Chinese: Evidence for expectation-based processing
# published in Journal of Memory and Language 79–80, pp. 97–120, 2015. 

# Data Analysis of Experiment 1



# Design: 2x2 fully crossed factorial design:
# Factor I: Modification type (subject modification; object modification)
# Factor II: Relative clause type (subject relative; object relative)
# Condition labels:
	# a) subject modification; subject relative
	# b) subject modification; object relative
	# a) object modification; subject relative
	# b) object modification; object relative


# Description of the dataset:
	# item: item id
	# cond: experimental condition, coded as defined above
	# subject: participant id
	# region.id: region of interest, coded as follows:
		# N: Main clause subject (in object-modifications only)
		# V: Main clause verb (in object-modifications only)
		# Det+CL: determiner+classifier   
		# Adv: adverb     
		# VN: RC-verb+RC-object (subject relatives) | RC-subject+RC-verb (object relatives)
			# NOTE: these two words were merged to one region *after* the experiment;
			#		they were presented as separate regions in the experiment 
		# FreqP: frequency phrase/durational phrase
		# DE: relativizer "de"
		# head: relative clause head noun
		# hd1: 1st word after the head noun
		# hd2: 2nd word after the head noun
		# hd3: 3rd word after the head noun
		# hd4: 4th word after the head noun (only in subject-modifications)
		# hd5: 5th word after the head noun (only in subject-modifications)
	# rt: reading time (defined as latency of key-presses in a self-pace reading moving window paradigm)
		# NOTE: The reading time of the relative clause region V-N/N-V is computed by summing up the reading times of the relative clause verb and the noun inside the relative clause; The verb and the noun were presented as two separate regions in the experiment. 		  
	# correct_answer: correct answer to the comprehension question (Y: yes; N: no)
	# accuracy: accuracy of the participant's answer to the comprehension question, coded as 1 if the answer is correct, 0 otherwise
	


library(MASS)
library(lme4)

rm(list=ls())
data<-read.table("JaegerChenLiLinVasishth2015_Exp1.txt", header=TRUE)


# --------------------------------------------------------------------------------
#               CONTRAST CODING 
# --------------------------------------------------------------------------------
# main effects and interaction:
#      					 a   b   c   d
# Modification			-1	-1	+1	+1  	Main effect of modification type
# RCtype				-1	+1	-1  +1    	Main effect of relative clause type
# RCtype_x_Modification	+1	-1	-1  +1     	Interaction relative clause type x modification type

## Nested contrasts:
#      				 a   b   c   d
#RCtype_subjMod   	-1   1   0   0  	Effect of SR/OR within subject modifying 
#RCtype_objMod    	 0   0  -1   1  	Effect of SR/OR within object modifying RCs
#Modification   	-1  -1   1   1  	Main effect of modification
# scale to 0.5

# MAIN EFFECTS AND INTERACTION
# Modification: ME of modification type (obj-mod - subj-mod)
data$Modification <- ifelse(data$cond%in%c("a","b"),-.5,.5)
# RCtype: ME of extraction type (OR-SR)
data$RCtype<-ifelse(data$cond%in%c("a","c"),-.5,.5)
# RCtype_x_Modification: interaction extraction type x modification type
data$RCtype_x_Modification<-ifelse(data$cond%in%c("b","c"),-.5,.5)


# NESTED CONTRASTS
# RCtype_subjMod: subj-mod_0R - subj-mod_SR
data$RCtype_subjMod <- ifelse(data$cond=="a",-.5, ifelse(data$cond=="b",.5,0))
# RCtype_objMod: obj-mod_0R - obj-mod_SR
data$RCtype_objMod <- ifelse(data$cond=="c",-.5, ifelse(data$cond=="d",.5,0))

# --------------------------------------------------------------------------------
#               ANALYSIS of RESPONSE ACCURACIES  
# --------------------------------------------------------------------------------
# subset data such that there is one row per item and subject
data_acc<-subset(data, region.id=="VN")

# ERRATUM: In the paper, we write:  "In subject-mod- ifying cases, SRs had accuracy 85%, and ORs 85%; for object-modifying cases, the corresponding accuracies were 84% and 86%."  This sentence is wrong, it should be corrected to: "In subject-mod- ifying cases, SRs had accuracy 84%, and ORs 86%; for object-modifying cases, the corresponding accuracies were 85% in both modification types."

means.q<-round(tapply(data$accuracy,data$cond, mean)*100)
print(means.q)

# -------------- Main effects and interaction
answer_accuracy_m0 <- glmer(accuracy~Modification+RCtype+RCtype_x_Modification+(1+Modification+RCtype+RCtype_x_Modification|item)+(1+Modification+RCtype+RCtype_x_Modification|subject),family=binomial(),data_acc, control=glmerControl(optimizer="bobyqa"))
summary(answer_accuracy_m0)

# -------------- Nested contrasts
answer_accuracy_m1 <- glmer(accuracy~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|item)+(1+RCtype_subjMod+RCtype_objMod+Modification|subject),family=binomial(),data_acc, control=glmerControl(optimizer="bobyqa"))
summary(answer_accuracy_m1)



# --------------------------------------------------------------------------------
#               ANALYSIS of READING TIMES
# --------------------------------------------------------------------------------

# -------------- Main effects and interaction
# Region: Adv
m1.Adv<-lmer(log(rt)~Modification+(1+Modification|subject)+(1+Modification|item),subset(data,region.id=="Adv"),control=lmerControl(optimizer="bobyqa"))
summary(m1.Adv)

# Region: VN
m1.VN<-lmer(log(rt)~Modification+RCtype+RCtype_x_Modification+(1+Modification+RCtype+RCtype_x_Modification|subject)+(1+Modification+RCtype+RCtype_x_Modification|item),subset(data,region.id=="VN"), control=lmerControl(optimizer="bobyqa"))
summary(m1.VN)

# Region: FreqP
m1.FreqP<-lmer(log(rt)~Modification+RCtype+RCtype_x_Modification+(1+Modification+RCtype+RCtype_x_Modification|subject)+(1+Modification+RCtype+RCtype_x_Modification|item),subset(data,region.id=="FreqP"), control=lmerControl(optimizer="bobyqa"))
summary(m1.FreqP)

# Region: DE
m1.DE<-lmer(log(rt)~Modification+RCtype+RCtype_x_Modification+(1+Modification+RCtype+RCtype_x_Modification|subject)+(1+Modification+RCtype+RCtype_x_Modification|item),subset(data,region.id=="DE"), control=lmerControl(optimizer="bobyqa"))
summary(m1.DE)

# Region: RC head noun
m1.head<-lmer(log(rt)~Modification+RCtype+RCtype_x_Modification+(1+Modification+RCtype+RCtype_x_Modification|subject)+(1+Modification+RCtype+RCtype_x_Modification|item),subset(data,region.id=="head"),control=lmerControl(optimizer="bobyqa"))
summary(m1.head)


# -------------- Nested contrasts
# Region: VN
m2.VN<-lmer(log(rt)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),subset(data,region.id=="VN"),control=lmerControl(optimizer="bobyqa"))
summary(m2.VN)

# Region: FreqP
m2.FreqP<-lmer(log(rt)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),subset(data,region.id=="FreqP"),control=lmerControl(optimizer="bobyqa"))
summary(m2.FreqP)

# Region: DE
m2.DE<-lmer(log(rt)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),subset(data,region.id=="DE"),control=lmerControl(optimizer="bobyqa"))
summary(m2.DE)

# Region: RC head noun
m2.head<-lmer(log(rt)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),subset(data,region.id=="head"),control=lmerControl(optimizer="bobyqa"))
summary(m2.head)

# Region: head+1
m2.hd1<-lmer(log(rt)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),subset(data,region.id=="hd1"),control=lmerControl(optimizer="bobyqa"))
summary(m2.hd1)

# Region: head+2
m2.hd2<-lmer(log(rt)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),subset(data,region.id=="hd2"),control=lmerControl(optimizer="bobyqa"))
summary(m2.hd2)

