作者:

Peng, LH, S.X. Chen and W, Zhou

标题:

Peng, LH, S.X. Chen and W, Zhou (2016) More Powerful Tests for Sparse High-Dimensional Covariances Matrices, Journal of Multivariate Analysis,  149, 124-143.
###############
# R-code for Peng, LH, S.X. Chen and W, Zhou (2016) 
#            More Powerful Tests for Sparse High-Dimensional Covariances Matrices, 
#            Journal of Multivariate Analysis, 149, 124-143.

###############
# function that calculate the test statistic for the identity hypothesis,
# X is the N times P data matrix, and k is the banding width
###############
testIden = function(X, k) {
  n = nrow(X)
  p = ncol(X)
  
  Y = apply(X, 2, function(y) y-mean(y))
  
  l_i = colSums(Y)
  l_ii = colSums(Y*Y)
  l_ij = matrix(0, p, p)
  l_ijj = matrix(0, p, p)
  l_iijj = matrix(0, p, p)
  for (i in 1:p) {
    for (j in 1:p) {
      if (abs(i-j)<=k) {
        for (l in 1:n) {
          l_ij[i,j] = l_ij[i,j]+Y[l,i]*Y[l,j]
          l_ijj[i,j] = l_ijj[i,j]+Y[l,i]*Y[l,j]*Y[l,j]
          l_iijj[i,j] = l_iijj[i,j]+Y[l,i]*Y[l,j]*Y[l,i]*Y[l,j]
        }
      }
    }
  }
  
  L4 = sum(l_ii)/n
  L5 = sum(l_i*l_i-l_ii)/(n*(n-1))
  L1=L2=L3=0
  for (i in 1:p) {
    for (j in 1:p) {
      if (abs(i-j)<=k) {
        L1 = L1+l_ij[i,j]*l_ij[i,j]-l_iijj[i,j]
        L2 = L2+l_i[i]*l_i[j]*l_ij[i,j]-l_i[i]*l_ijj[i,j]+
          2*l_iijj[i,j]-l_ij[i,j]*l_ij[i,j]-l_i[j]*l_ijj[j,i]
        L3 = L3+l_i[i]*l_i[i]*l_i[j]*l_i[j]-2*l_i[i]*l_i[i]*l_ii[j]+l_ii[i]*l_ii[j]+
          2*l_ij[i,j]^2-6*l_iijj[i,j]+8*l_i[i]*l_ijj[i,j]-4*l_i[i]*l_i[j]*l_ij[i,j]
      }
    }
  }
  L1 = L1/(n*(n-1))
  L2 = L2/(n*(n-1)*(n-2))
  L3 = L3/(n*(n-1)*(n-2)*(n-3))
  
  testStat1 = (L1-2*L2+L3)-2*(L4-L5)+p
  
  l_cal1 = rep(0, 2*p*k-k*k-k+p)
  l_cal11 = rep(0, 2*p*k-k*k-k+p)
  l_cal12 = matrix(0, 2*p*k-k*k-k+p, 2*p*k-k*k-k+p)
  l_cal121 = matrix(0, 2*p*k-k*k-k+p, 2*p*k-k*k-k+p)
  l_cal1212 = matrix(0, 2*p*k-k*k-k+p, 2*p*k-k*k-k+p)
  for (i in 1:p) {
    for (j in 1:p) {
      if (abs(i-j)<=k) {
        s1 = loc_ij_k(i,j,p,k)
        for (l in 1:n) {
          l_cal1[s1] = l_cal1[s1]+Y[l,i]*Y[l,j]
          l_cal11[s1] = l_cal11[s1]+Y[l,i]^2*Y[l,j]^2
        }
      }
    }
  }
  
  for (i1 in 1:p) {
    for (j1 in 1:p) {
      if (abs(i1-j1)<=k) {
        for (i2 in 1:p) {
          for (j2 in 1:p) {
            if (abs(i2-j2)<=k) {
              s1 = loc_ij_k(i1,j1,p,k)
              s2 = loc_ij_k(i2,j2,p,k)
              for (l in 1:n) {
                l_cal12[s1,s2] = l_cal12[s1,s2]+Y[l,i1]*Y[l,j1]*Y[l,i2]*Y[l,j2]
                l_cal121[s1,s2] =l_cal121[s1,s2]+Y[l,i1]*Y[l,j1]*Y[l,i2]^2*Y[l,j2]^2
                l_cal1212[s1,s2] = l_cal1212[s1,s2]+Y[l,i1]^2*Y[l,j1]^2*Y[l,i2]^2*Y[l,j2]^2
              }
            }
          }
        }
      }
    }
  }
  
  tmp1=tmp2=tmp3=0
  for (i1 in 1:p) {
    for (j1 in 1:p) {
      if (abs(i1-j1)<=k) {
        for (i2 in 1:p) {
          for (j2 in 1:p) {
            if (abs(i2-j2)<=k) {
              s1 = loc_ij_k(i1,j1,p,k)
              s2 = loc_ij_k(i2,j2,p,k)
              for (l in 1:n) {
                tmp1 = tmp1+l_cal12[s1,s2]^2-l_cal1212[s1,s2]
                tmp2 = tmp2+l_cal1[s1]*l_cal1[s2]*l_cal12[s1,s2]-l_cal1[s1]*l_cal121[s1,s2]+
                  2*l_cal1212[s1,s2]-l_cal12[s1,s2]^2-l_cal1[s2]*l_cal121[s2,s1]
                tmp3 = tmp3+l_cal1[s1]^2*l_cal1[s2]^2-l_cal1[s1]^2*l_cal11[s2]-
                  l_cal1[s2]^2*l_cal11[s1]+l_cal11[s1]*l_cal11[s2]+2*l_cal12[s1,s2]^2-
                  6*l_cal1212[s1,s2]+4*l_cal1[s1]*l_cal121[s1,s2]+4*l_cal1[s2]*l_cal121[s2,s1]-
                  4*l_cal1[s1]*l_cal1[s2]*l_cal12[s1,s2]
              }
            }
          }
        }
      }
    }
  }
  tmp1 = tmp1/(n*(n-1))
  tmp2 = tmp2/(n*(n-1)*(n-2))
  tmp3 = tmp3/(n*(n-1)*(n-2)*(n-3))
  
  varEst = 2*(tmp1-2*tmp2+tmp3)/(n*(n-1))
  
  res = testStat1/sqrt(varEst)
  return(res)
}


###############
# function that calculate the test statistic for the shpericity hypothesis,
# X is the N times P data matrix, and k is the banding width
###############
testSphe = function(X, k) {
  n = nrow(X)
  p = ncol(X)

  Y = apply(X, 2, function(y) y-mean(y))
  
  l_i = colSums(Y)
  l_ii = colSums(Y*Y)
  l_ij = matrix(0, p, p)
  l_ijj = matrix(0, p, p)
  l_iijj = matrix(0, p, p)
  for (i in 1:p) {
    for (j in 1:p) {
      if (abs(i-j)<=k) {
        for (l in 1:n) {
          l_ij[i,j] = l_ij[i,j]+Y[l,i]*Y[l,j]
          l_ijj[i,j] = l_ijj[i,j]+Y[l,i]*Y[l,j]*Y[l,j]
          l_iijj[i,j] = l_iijj[i,j]+Y[l,i]*Y[l,j]*Y[l,i]*Y[l,j]
        }
      }
    }
  }
  
  L4 = sum(l_ii)/n
  L5 = sum(l_i*l_i-l_ii)/(n*(n-1))
  L1=L2=L3=0
  for (i in 1:p) {
    for (j in 1:p) {
      if (abs(i-j)<=k) {
        L1 = L1+l_ij[i,j]*l_ij[i,j]-l_iijj[i,j]
        L2 = L2+l_i[i]*l_i[j]*l_ij[i,j]-l_i[i]*l_ijj[i,j]+
          2*l_iijj[i,j]-l_ij[i,j]*l_ij[i,j]-l_i[j]*l_ijj[j,i]
        L3 = L3+l_i[i]*l_i[i]*l_i[j]*l_i[j]-2*l_i[i]*l_i[i]*l_ii[j]+l_ii[i]*l_ii[j]+
          2*l_ij[i,j]^2-6*l_iijj[i,j]+8*l_i[i]*l_ijj[i,j]-4*l_i[i]*l_i[j]*l_ij[i,j]
      }
    }
  }
  L1 = L1/(n*(n-1))
  L2 = L2/(n*(n-1)*(n-2))
  L3 = L3/(n*(n-1)*(n-2)*(n-3))
  
  testStat1 = p*(L1-2*L2+L3)/((L4-L5)^2)-1
  
  l_cal1 = rep(0, 2*p*k-k*k-k+p)
  l_cal11 = rep(0, 2*p*k-k*k-k+p)
  l_cal12 = matrix(0, 2*p*k-k*k-k+p, 2*p*k-k*k-k+p)
  l_cal121 = matrix(0, 2*p*k-k*k-k+p, 2*p*k-k*k-k+p)
  l_cal1212 = matrix(0, 2*p*k-k*k-k+p, 2*p*k-k*k-k+p)
  for (i in 1:p) {
    for (j in 1:p) {
      if (abs(i-j)<=k) {
        s1 = loc_ij_k(i,j,p,k)
        for (l in 1:n) {
          l_cal1[s1] = l_cal1[s1]+Y[l,i]*Y[l,j]
          l_cal11[s1] = l_cal11[s1]+Y[l,i]^2*Y[l,j]^2
        }
      }
    }
  }
  
  for (i1 in 1:p) {
    for (j1 in 1:p) {
      if (abs(i1-j1)<=k) {
        for (i2 in 1:p) {
          for (j2 in 1:p) {
            if (abs(i2-j2)<=k) {
              s1 = loc_ij_k(i1,j1,p,k)
              s2 = loc_ij_k(i2,j2,p,k)
              for (l in 1:n) {
                l_cal12[s1,s2] = l_cal12[s1,s2]+Y[l,i1]*Y[l,j1]*Y[l,i2]*Y[l,j2]
                l_cal121[s1,s2] =l_cal121[s1,s2]+Y[l,i1]*Y[l,j1]*Y[l,i2]^2*Y[l,j2]^2
                l_cal1212[s1,s2] = l_cal1212[s1,s2]+Y[l,i1]^2*Y[l,j1]^2*Y[l,i2]^2*Y[l,j2]^2
              }
            }
          }
        }
      }
    }
  }
  
  tmp1=tmp2=tmp3=0
  for (i1 in 1:p) {
    for (j1 in 1:p) {
      if (abs(i1-j1)<=k) {
        for (i2 in 1:p) {
          for (j2 in 1:p) {
            if (abs(i2-j2)<=k) {
              s1 = loc_ij_k(i1,j1,p,k)
              s2 = loc_ij_k(i2,j2,p,k)
              for (l in 1:n) {
                tmp1 = tmp1+l_cal12[s1,s2]^2-l_cal1212[s1,s2]
                tmp2 = tmp2+l_cal1[s1]*l_cal1[s2]*l_cal12[s1,s2]-l_cal1[s1]*l_cal121[s1,s2]+
                  2*l_cal1212[s1,s2]-l_cal12[s1,s2]^2-l_cal1[s2]*l_cal121[s2,s1]
                tmp3 = tmp3+l_cal1[s1]^2*l_cal1[s2]^2-l_cal1[s1]^2*l_cal11[s2]-
                  l_cal1[s2]^2*l_cal11[s1]+l_cal11[s1]*l_cal11[s2]+2*l_cal12[s1,s2]^2-
                  6*l_cal1212[s1,s2]+4*l_cal1[s1]*l_cal121[s1,s2]+4*l_cal1[s2]*l_cal121[s2,s1]-
                  4*l_cal1[s1]*l_cal1[s2]*l_cal12[s1,s2]
              }
            }
          }
        }
      }
    }
  }
  tmp1 = tmp1/(n*(n-1))
  tmp2 = tmp2/(n*(n-1)*(n-2))
  tmp3 = tmp3/(n*(n-1)*(n-2)*(n-3))
  
  varEst = 2*(tmp1-2*tmp2+tmp3)/((L1-2*L2+L3)^2*n*(n-1))
  
  res = testStat1/sqrt(varEst)
  return(res)
}


###############
# a useful function
###############
loc_ij_k = function(i, j, p, k) {
  tmp = 0
  if (k==0) {
    tmp = i
  } else {
    if (i==j) {
      tmp = i
    } else {
      for (l in 1:(k+1)) {
        if ((j-i)==l) {
          tmp = (2*l-1)*p-l*l+l+i
        } else if ((i-j)==l) {
          tmp = 2*l*p-l*l+j
        }
      }
    }
  }
  return(tmp)
}