# Lena Jäger, Zhong Chen, Qiang Li, Chien-Jer Charles Lin, Shravan Vasishth
# The subject-relative advantage in Chinese: Evidence for expectation-based processing
# published in Journal of Memory and Language 79–80, pp. 97–120, 2015. 

# Data Analysis of Experiment 2


# Design: 2x2 fully crossed factorial design:
# Factor I: Modification type (subject modification; object modification)
# Factor II: Relative clause type (subject relative; object relative)
# Condition labels:
	# a) subject modification; subject relative
	# b) subject modification; object relative
	# a) object modification; subject relative
	# b) object modification; object relative


# Description of the dataset:
	# condition: experimental condition, coded as defined above
	# subject: participant id
	# trial: experimental trial number
	# item: item id
	# list: Latin Square List id
	# accuracy: accuracy of the participant's answer to the comprehension question, coded as 1 if the answer is correct, 0 otherwise
	# correct_answer: correct answer to the comprehension question ("yes" vs. "no")
	# FFP: first-fixation probability, coded as 1 iff the region was fixated in first-pass reading, 0 otherwise
	# FPRT: first-pass reading time
	# TFT: total-fixation time
	# RPD: regression-path duration
	# FPReg: first-pass regression probability, coded as 1 iff a regression was initiated from the respective region in first-pass reading, 0 otherwise
	# region.id: region of interest, coded as follows:
		# N: Main clause subject (in object-modifications only)
		# V: Main clause verb (in object-modifications only)
		# Det+CL: determiner+classifier   
		# Adv: adverb     
		# VN: RC-verb+RC-object (subject relatives) | RC-subject+RC-verb (object relatives) 
		# FreqP: frequency phrase/durational phrase
		# DE: relativizer "de"
		# head: relative clause head noun
		# hd1: 1st word after the head noun
		# hd2: 2nd word after the head noun
		# hd3: 3rd word after the head noun
		# hd4: 4th word after the head noun (only in subject-modifications)
		# hd5: 5th word after the head noun (only in subject-modifications)
			  
	


library(MASS)
library(lme4)

rm(list=ls())

data<-read.table("JaegerChenLiLinVasishth2015_Exp2.txt", header=TRUE)


# --------------------------------------------------------------------------------
#               CONTRAST CODING 
# --------------------------------------------------------------------------------
# MAIN EFFECTS AND INTERACTION
#      					 a   b   c   d
# Modification			-1	-1	+1	+1  	Main effect of modification type
# RCtype				-1	+1	-1  +1    	Main effect of relative clause type
# RCtype_x_Modification	+1	-1	-1  +1     	Interaction relative clause type x modification type

# NESTED CONTRASTS
#   				 a   b   c   d
#RCtype_subjMod   	-1   1   0   0  	Effect of SR/OR within subject modifying 
#RCtype_objMod    	 0   0  -1   1  	Effect of SR/OR within object modifying RCs
#Modification   	-1  -1   1   1  	Main effect of modification
# scale to 0.5

# MAIN EFFECTS AND INTERACTION
# Modification: ME of modification type (obj-mod - subj-mod)
data$Modification <- ifelse(data$cond%in%c("a","b"),-.5,.5)
# RCtype: ME of extraction type (OR-SR)
data$RCtype<-ifelse(data$cond%in%c("a","c"),-.5,.5)
# RCtype_x_Modification: interaction extraction type x modification type
data$RCtype_x_Modification<-ifelse(data$cond%in%c("b","c"),-.5,.5)

# NESTED CONTRASTS
# RCtype_subjMod: subj-mod_0R - subj-mod_SR
data$RCtype_subjMod <- ifelse(data$cond=="a",-.5, ifelse(data$cond=="b",.5,0))
# RCtype_objMod: obj-mod_0R - obj-mod_SR
data$RCtype_objMod <- ifelse(data$cond=="c",-.5, ifelse(data$cond=="d",.5,0))


# --------------------------------------------------------------------------------
#               ANALYSIS of RESPONSE ACCURACIES  
# --------------------------------------------------------------------------------
# subset data such that there is one row per item and subject
data_acc<-subset(data, region.id=="VN")

# calculate mean reading time by experimental condition
means.q<-round(tapply(data$accuracy,data$cond, mean)*100)
print(means.q)

# -------------- Main effects and interaction
answer_accuracy_m0<-glmer(accuracy~Modification+RCtype+RCtype_x_Modification+(1|subject)+(1|item),data=data_acc, family=binomial, control=glmerControl(optimizer="bobyqa"))
summary(answer_accuracy_m0)

# -------------- Nested contrasts
answer_accuracy_m2<-glmer(accuracy~RCtype_subjMod+RCtype_objMod+Modification+(1|subject)+(1|item),data=data_acc, family=binomial, control=glmerControl(optimizer="bobyqa"))
summary(answer_accuracy_m2)


# --------------------------------------------------------------------------------
#                       ANALYSIS of EYE MOVEMENTS
# --------------------------------------------------------------------------------
# ! NOTE: The analyses reported in Jäger et al (2015), The subject-relative advantage in Chinese, J. Mem. Lang. 79–80, 97–120 were conducted with lme4 package version 1.0-6. Using later versions of lme4 some of the models specified below do not converge or show pathological random effects correlations of +/-1.0. 

# --------------------- MAIN EFFECTS and INTERACTIONS ----------------------------

# -------- Dependent Variable: First-pass reading time FPRT ----------------------
# Region: Adv 
#	 main effect of modification type only
m1.fprt.adv<-lmer(log(FPRT)~Modification+(1+Modification|subject)+(1|item),data=subset(data, region.id=="Adv" & FPRT>0),control=lmerControl(optimizer="bobyqa"))
summary(m1.fprt.adv)

# Region: VN
m1.fprt.vn<-lmer(log(FPRT)~Modification+RCtype+RCtype_x_Modification+(1+Modification+RCtype+RCtype_x_Modification|subject)+(1+Modification+RCtype+RCtype_x_Modification|item),data=subset(data,FPRT>0 & region.id=="VN"), control=lmerControl(optimizer="bobyqa"))
summary(m1.fprt.vn)

# Region: FreqP
m1.fprt.FreqP<-lmer(log(FPRT)~Modification+RCtype+RCtype_x_Modification+(1+Modification+RCtype+RCtype_x_Modification|subject)+(1+Modification+RCtype+RCtype_x_Modification|item),data=subset(data,FPRT>0 & region.id=="FreqP"), control=lmerControl(optimizer="bobyqa"))
summary(m1.fprt.FreqP)

# Region: DE
m1.fprt.DE<-lmer(log(FPRT)~Modification+RCtype+RCtype_x_Modification+(1|subject)+(1+Modification+RCtype+RCtype_x_Modification|item),data=subset(data,FPRT>0 & region.id=="DE"), control=lmerControl(optimizer="bobyqa"))
summary(m1.fprt.DE)

# Region: RC head
m1.fprt.head<-lmer(log(FPRT)~Modification+RCtype+RCtype_x_Modification+(1+Modification+RCtype+RCtype_x_Modification|subject)+(1+Modification+RCtype+RCtype_x_Modification|item),data=subset(data,FPRT>0 & region.id=="head"), control=lmerControl(optimizer="bobyqa"))
summary(m1.fprt.head)


# -------- Dependent Variable: First-pass regression probability FPReg -------------
# Region: Adv 
#	 main effect of modification type only
m1.fpreg.adv<-glmer(FPReg~Modification+(1+Modification | subject)+(1 | item),data=subset(data, region.id=="Adv"),family=binomial(), control=glmerControl(optimizer="bobyqa"))
summary(m1.fpreg.adv)

# Region: VN
m1.fpreg.vn<-glmer(FPReg~Modification+RCtype+RCtype_x_Modification+(1+Modification+RCtype+RCtype_x_Modification|subject)+(1+Modification+RCtype+RCtype_x_Modification|item),data=subset(data,region.id=="VN"),family=binomial, glmerControl(optimizer="bobyqa"))
summary(m1.fpreg.vn)

# Region: FreqP
m1.fpreg.FreqP<-glmer(FPReg~Modification+RCtype+RCtype_x_Modification+(1+Modification+RCtype+RCtype_x_Modification|subject)+(1+Modification+RCtype+RCtype_x_Modification|item),data=subset(data,region.id=="FreqP"),family=binomial, glmerControl(optimizer="bobyqa"))
summary(m1.fpreg.FreqP)

# Region: DE
m1.fpreg.DE<-glmer(FPReg~Modification+RCtype+RCtype_x_Modification+(1+Modification+RCtype+RCtype_x_Modification|subject)+(1+Modification+RCtype+RCtype_x_Modification|item),data=subset(data,region.id=="DE"),family=binomial,glmerControl(optimizer="bobyqa"))
summary(m1.fpreg.DE)

# Region: head
m1.fpreg.head<-glmer(FPReg~Modification+RCtype+RCtype_x_Modification+(1+Modification+RCtype+RCtype_x_Modification|subject)+(1+Modification+RCtype+RCtype_x_Modification|item),data=subset(data,region.id=="head"),family=binomial,glmerControl(optimizer="bobyqa"))
summary(m1.fpreg.head)


# -------- Dependent Variable: Regression-path duration RPD -------------
# Region: Adv 
#	 main effect of modification type only
m1.rpd.adv<-lmer(log(RPD)~Modification+(1+Modification|subject)+(1|item),data=subset(data, region.id=="Adv" & RPD>0),control=lmerControl(optimizer="bobyqa"))
summary(m1.rpd.adv)

# Region: VN
m1.rpd.vn<-lmer(log(RPD)~Modification+RCtype+RCtype_x_Modification+(1+Modification+RCtype+RCtype_x_Modification|subject)+(1+Modification+RCtype+RCtype_x_Modification|item),data=subset(data,RPD>0 & region.id=="VN"), control=lmerControl(optimizer="bobyqa"))
summary(m1.rpd.vn)

# Region: FreqP
m1.rpd.FreqP<-lmer(log(RPD)~Modification+RCtype+RCtype_x_Modification+(1+Modification+RCtype+RCtype_x_Modification|subject)+(1+Modification+RCtype+RCtype_x_Modification|item),data=subset(data,RPD>0 & region.id=="FreqP"), control=lmerControl(optimizer="bobyqa"))
summary(m1.rpd.FreqP)

# Region: DE
m1.rpd.DE<-lmer(log(RPD)~Modification+RCtype+RCtype_x_Modification+(1+Modification+RCtype+RCtype_x_Modification|subject)+(1+Modification+RCtype+RCtype_x_Modification|item),data=subset(data,RPD>0 & region.id=="DE"), control=lmerControl(optimizer="bobyqa"))
summary(m1.rpd.DE)

# Region: RC head
m1.rpd.head<-lmer(log(RPD)~Modification+RCtype+RCtype_x_Modification+(1|subject)+(1+Modification+RCtype+RCtype_x_Modification|item),data=subset(data,RPD>0 & region.id=="head"), control=lmerControl(optimizer="bobyqa"))
summary(m1.rpd.head)


# -------- Dependent Variable: Total fixation time TFT -------------
# Region: Adv 
#	 main effect of modification type only
m1.tft.adv<-lmer(log(TFT)~Modification+(1+Modification |subject)+(1+Modification | item),data=subset(data, region.id=="Adv" & TFT > 0),control=lmerControl(optimizer="bobyqa"))
summary(m1.tft.adv)

# Region: VN
m1.tft.vn<-lmer(log(TFT)~Modification+RCtype+RCtype_x_Modification+(1+Modification+RCtype+RCtype_x_Modification|subject)+(1+Modification+RCtype+RCtype_x_Modification|item),data=subset(data,TFT>0 & region.id=="VN"),control=lmerControl(optimizer="bobyqa"))
summary(m1.tft.vn)

# Region: FreqP
m1.tft.FreqP<-lmer(log(TFT)~Modification+RCtype+RCtype_x_Modification+(1+Modification+RCtype+RCtype_x_Modification|subject)+(1+Modification+RCtype+RCtype_x_Modification|item),data=subset(data,TFT>0 & region.id=="FreqP"), control=lmerControl(optimizer="bobyqa"))
summary(m1.tft.FreqP)

# Region: DE
m1.tft.DE<-lmer(log(TFT)~Modification+RCtype+RCtype_x_Modification+(1+Modification+RCtype+RCtype_x_Modification|subject)+(1+Modification+RCtype+RCtype_x_Modification|item),data=subset(data,TFT>0 & region.id=="DE"), control=lmerControl(optimizer="bobyqa"))
summary(m1.tft.DE)

# Region: RC head
m1.tft.head<-lmer(log(TFT)~Modification+RCtype+RCtype_x_Modification+(1+Modification+RCtype+RCtype_x_Modification|subject)+(1+Modification+RCtype+RCtype_x_Modification|item),data=subset(data,TFT>0 & region.id=="head"),control=lmerControl(optimizer="bobyqa"))
summary(m1.tft.head)


# --------------------- NESTED CONTRASTS ------------------------------------------

# -------- Dependent Variable: First-pass reading time FPRT ----------------------
# Region: VN
m2.fprt.vn<-lmer(log(FPRT)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),data=subset(data,FPRT>0 & region.id=="VN"), control=lmerControl(optimizer="bobyqa"))
summary(m2.fprt.vn)

# Region: FreqP
m2.fprt.FreqP<-lmer(log(FPRT)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),data=subset(data,FPRT>0 & region.id=="FreqP"), control=lmerControl(optimizer="bobyqa"))
summary(m2.fprt.FreqP)

# Region: DE
m2.fprt.DE<-lmer(log(FPRT)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),data=subset(data,FPRT>0 & region.id=="DE"), control=lmerControl(optimizer="bobyqa"))
summary(m2.fprt.DE)

# Region: RC head
m2.fprt.head<-lmer(log(FPRT)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),data=subset(data,FPRT>0 & region.id=="head"), control=lmerControl(optimizer="bobyqa"))
summary(m2.fprt.head)

# Region: head+1
#  	Nested contrasts
m2.fprt.hd1<-lmer(log(FPRT)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),data=subset(data,FPRT>0 & region.id=="hd1"), control=lmerControl(optimizer="bobyqa"))
summary(m2.fprt.hd1)

# Region: head+2
#  	Nested contrasts
m2.fprt.hd2<-lmer(log(FPRT)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),data=subset(data,FPRT>0 & region.id=="hd2"), control=lmerControl(optimizer="bobyqa"))
summary(m2.fprt.hd2)


# -------- Dependent Variable: First-pass regression probability FPReg -------------
#	Region: VN
m2.fpreg.vn<-glmer(FPReg~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),data=subset(data,region.id=="VN"),family=binomial(), control=glmerControl(optimizer="bobyqa"))
summary(m2.fpreg.vn)

#	Region: FreqP
m2.fpreg.FreqP<-glmer(FPReg~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),data=subset(data,region.id=="FreqP"),family=binomial(), control=glmerControl(optimizer="bobyqa"))
summary(m2.fpreg.FreqP)

#	Region: DE
m2.fpreg.DE<-glmer(FPReg~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),data=subset(data,region.id=="DE"),family=binomial(), control=glmerControl(optimizer="bobyqa"))
summary(m2.fpreg.DE)

#	Region: RC head
m2.fpreg.head<-glmer(FPReg~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),data=subset(data,region.id=="head"),family=binomial(), control=glmerControl(optimizer="bobyqa"))
summary(m2.fpreg.head)

#	Region: head+1
m2.fpreg.hd1<-glmer(FPReg~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),data=subset(data,region.id=="hd1"),family=binomial(), control=glmerControl(optimizer="bobyqa"))
summary(m2.fpreg.hd1)

#	Region: head+2
m2.fpreg.hd2<-glmer(FPReg~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),data=subset(data,region.id=="hd2"),family=binomial(), control=glmerControl(optimizer="bobyqa"))
summary(m2.fpreg.hd2)


# -------- Dependent Variable: Regression-path duration RPD -----------------
#	Region: VN
m2.rpd.vn<-lmer(log(RPD)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),data=subset(data,RPD>0 & region.id=="VN"), control=lmerControl(optimizer="bobyqa"))
summary(m2.rpd.vn)

#	Region: FreqP
m2.rpd.FreqP<-lmer(log(RPD)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),data=subset(data,RPD>0 & region.id=="FreqP"), control=lmerControl(optimizer="bobyqa"))
summary(m2.rpd.FreqP)

#	Region: DE
m2.rpd.DE<-lmer(log(RPD)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),data=subset(data,RPD>0 & region.id=="DE"), control=lmerControl(optimizer="bobyqa"))
summary(m2.rpd.DE)

#	Region: RC head
m2.rpd.head<-lmer(log(RPD)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),data=subset(data,RPD>0 & region.id=="head"), control=lmerControl(optimizer="bobyqa"))
summary(m2.rpd.head)

#	Region: head+1
m2.rpd.hd1<-lmer(log(RPD)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1|item),data=subset(data,RPD>0 & region.id=="hd1"), control=lmerControl(optimizer="bobyqa"))
summary(m2.rpd.hd1)

#	Region: head+2
m2.rpd.hd2<-lmer(log(RPD)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),data=subset(data,RPD>0 & region.id=="hd2"), control=lmerControl(optimizer="bobyqa"))
summary(m2.rpd.hd2)


# -------- Dependent Variable: Total-fixation time TFT -----------------
#	Region: VN
m2.tft.vn<-lmer(log(TFT)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),data=subset(data,TFT>0 & region.id=="VN"), control=lmerControl(optimizer="bobyqa"))
summary(m2.tft.vn)

#	Region: FreqP
m2.tft.FreqP<-lmer(log(TFT)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),data=subset(data,TFT>0 & region.id=="FreqP"), control=lmerControl(optimizer="bobyqa"))
summary(m2.tft.FreqP)

#	Region: DE
m2.tft.DE<-lmer(log(TFT)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),data=subset(data,TFT>0 & region.id=="DE"), control=lmerControl(optimizer="bobyqa"))
summary(m2.tft.DE)

#	Region: RC head
m2.tft.head<-lmer(log(TFT)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),data=subset(data,TFT>0 & region.id=="head"), control=lmerControl(optimizer="bobyqa"))
summary(m2.tft.head)

#	Region: head+1
m2.tft.hd1<-lmer(log(TFT)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),data=subset(data,TFT>0 & region.id=="hd1"), control=lmerControl(optimizer="bobyqa"))
summary(m2.tft.hd1)

#	Region: head+2
m2.tft.hd2<-lmer(log(TFT)~RCtype_subjMod+RCtype_objMod+Modification+(1+RCtype_subjMod+RCtype_objMod+Modification|subject)+(1+RCtype_subjMod+RCtype_objMod+Modification|item),data=subset(data,TFT>0 & region.id=="hd2"), control=lmerControl(optimizer="bobyqa"))
summary(m2.tft.hd2)
