Author: | Chen, S. X. and Y. L. Qin |
Title: | Chen, S. X. and Y. L. Qin (2010). A two sample test for high dimensional data with application to gene-set testing, The Annals of Statistics, 38, 808-835. |
Code | |
R-code: #CQ.stat returns the value of the CQ statistic in Chen and Qin (2010)
#Inputs are two data matrices: p by n1 and p by n2
#Each column of a data matrix is a p-dim observation
CQ.stat=function(X1,X2)
{
p=dim(X1)[1] # dim
n1=dim(X1)[2] # sample size n1
n2=dim(X2)[2] # sample size n2
X1mean=apply(X1,1,mean) # First sample mean
X2mean=apply(X2,1,mean) # Second sample mean
T1=n1^2*sum(X1mean^2)-sum(X1^2)
T2=n2^2*sum(X2mean^2)-sum(X2^2)
T12=sum(t(X1mean)%*%X2mean)
Tn=T1/(n1*(n1-1))+T2/(n2*(n2-1))-2*T12 # Tn in the numerator of the CQ statistic
# estimate of tr(Sigma1^2)
trSig1sq=0
seq1=1:n1
for(j in seq1){
for(k in seq1[-j]){
cv1mean=apply(X1[,-c(j,k)],1,mean)
trSig1sq=trSig1sq+sum(t(X1[,j])%*%(X1[,k]-cv1mean))*sum(t(X1[,k])%*%(X1[,j]-cv1mean))/(n1*(n1-1))
}
}
# estimate of tr(Sigma2^2)
trSig2sq=0
seq2=1:n2
for(j in seq2){
for(k in seq2[-j]){
cv2mean=apply(X2[,-c(j,k)],1,mean)
trSig2sq=trSig2sq+sum(t(X2[,j])%*%(X2[,k]-cv2mean))*sum(t(X2[,k])%*%(X2[,j]-cv2mean))/(n2*(n2-1))
}
}
# estimate of tr(Sigma1Sigma2)
trSig1Sig2=0
for(l in seq1){
cv1mean=apply(X1[,-l],1,mean)
for(k in seq2){
cv2mean=apply(X2[,-k],1,mean)
trSig1Sig2=trSig1Sig2+sum(t(X1[,l]%*%(X2[,k]-cv2mean)))*sum(t(X2[,k]%*%(X1[,l]-cv1mean)))/(n1*n2)
}
}
# estimate of sigma_n^2=Var(Tn)
signsq=2*trSig1sq/(n1*(n1-1))+2*trSig2sq/(n2*(n2-1))+4*trSig1Sig2/(n1*n2)
# CQ statistic
CQstat=Tn/sqrt(signsq)
return(CQstat)
} | |