From 332758a0595a0e173cc80ee9f92f3262973301ae Mon Sep 17 00:00:00 2001 From: Kwonsang Lee Date: Tue, 26 Dec 2017 11:41:32 -0500 Subject: [PATCH] simulation_studies including implementation of other existing methods and comparisons of the penalized and regular likelihood methods --- comparison_via_simulation.R | 143 ++++++++++++++++ other_existing_method.R | 322 ++++++++++++++++++++++++++++++++++++ 2 files changed, 465 insertions(+) create mode 100644 comparison_via_simulation.R create mode 100644 other_existing_method.R diff --git a/comparison_via_simulation.R b/comparison_via_simulation.R new file mode 100644 index 0000000..474a441 --- /dev/null +++ b/comparison_via_simulation.R @@ -0,0 +1,143 @@ +library(splines) + + +g.alpha1=function(alpha, mat){ + alpha0=exp(alpha[1])/(1+exp(alpha[1])) + new.vec=mat%*%alpha[-1] + phi.alpha=log(sum(exp(new.vec))) + func.g=exp(new.vec-phi.alpha) + func.g=as.vector(func.g) + return(c(alpha0, (1-alpha0)*func.g)) +} +g.alpha2=function(alpha,eta,d,mat){ + new.d=(d-mean(d))/sd(d) + #new.d=d + new.vec=mat%*%alpha+eta*new.d + phi.alpha=log(sum(exp(new.vec))) + func.g=exp(new.vec-phi.alpha) + func.g=as.vector(func.g) + return(c(0,func.g)) +} + +pois.err=function(x, d){ + n=length(x) + m=length(d) + mat=matrix(NA, nrow=n, ncol=m) + for(i in 1:n){ + #mat[i,]=dpois(x[i], lambda=(exp(d)-1)) + mat[i,]=dpois(x[i], lambda=d) + } + return(mat) +} + + +freq.count=function(data, x, d){ + dobs=data[,1]; yobs=data[,2] + n=length(x) + freq0=freq1=rep(NA, n) + for(i in 1:(n-1)){ + freq0[i]=sum(dobs[yobs==0]>=x[i] & dobs[yobs==0]=x[i] & dobs[yobs==1]=x[n]) + freq1[n]=sum(dobs[yobs==1]>=x[n]) + freq.mat=cbind(freq0, freq1) + return(freq.mat) +} + +penal.poisson.fke=function(par,x,d,freq.mat,mat, fke, c0){ + pois.err.mat=pois.err(x=x, d=d) + pois.err.mat.fke=pois.err(x=x, d=fke*d) + + m=length(par) + lamb=exp(par[1])/(1+exp(par[1])) + alpha=par[2:(m-1)] + eta=par[m] + + g1=g.alpha1(alpha=alpha, mat=mat) + g2=g.alpha2(alpha=alpha[-1], eta=eta, d=d[-1], mat=mat) + trans.g1=pois.err.mat%*%g1 + trans.g2=pois.err.mat%*%g2 + trans.g1star=pois.err.mat.fke%*%g1 + + freq0=freq.mat[,1]; freq1=freq.mat[,2] + likeli.y0=sum(freq0*log(trans.g1)) + likeli.y1=sum(freq1*log(trans.g1star*(1-lamb)+trans.g2*lamb)) + likeli=likeli.y0+likeli.y1 + return(-likeli+c0*sqrt(sum(alpha[-1]^2))) +} + + + + +##################################################### +simnum=1 +est.maff=penal.est.maff=rep(NA, simnum) + +t1=Sys.time(); +for(k in 1:simnum){ + library(truncnorm) + n=500 + q=0.2 + true.p=q + #true.lamb=0.5 + beta=1 + + p.mi=0.25 + p.nmi=0.2 + + y.mi=c(rep(0, n*(1-p.mi)), rep(1, n*p.mi)) + y.nmi=c(rep(0, n*(1-p.nmi)*(1-p.mi)), rep(1, n*p.nmi*(1-p.mi)), rep(0, n*(1-p.nmi)*p.mi), rep(1, n*p.nmi*p.mi)) + y.obs=y.mi+y.nmi>0 + + d.no.nmi=d.cur=d.obs=rep(NA, n) + u=runif(n,0,1) + for(i in 1:n){ + if(y.mi[i]==0){ + d.no.nmi[i]=ifelse(u[i]1]=1 # truncation + ll[ll<0]=0 + + count=c() + for(i in 1:length(xeval)){ + count[i]=sum(x==xeval[i]) + } + + llp=rep(ll, count) + + lip=llp + lirn=rn=rnfun(y,n,llp,bw) + + # if the ll estimate is not monotone, we apply isotonic regression to get the LI estimate + + if(any(diff(llp)<0)){ + lip=pavaest(x,llp) + lip[lip>1]=1 + lip[lip<0]=0 + + lirn=rnfun(y,n,lip,bw) + } + + return(list(llp=llp,lip=lip,rn=rn,lirn=lirn)) +} + +# bws: grid points of bandwidth +# lws: length of bws + +lep=function(x,y,xeval,n,bws,lws){ + + lirn=rn=rep(1e6,lws) + + for(i in 1:lws){ + + obj=lifun(x,y,xeval,bws[i]) + rn[i]=obj$rn + lirn[i]=obj$lirn + } + # choose the optimal bandwidth at which 'rn' and 'lirn' are minimized + + orn=order(rn)[1] + ob=bws[orn] + lle=lifun(x,y,xeval,ob)$llp + + olp=order(lirn)[1] + obli=bws[olp] + lie=lifun(x,y,xeval,obli)$lip + + bh=c(ob,obli) + + return(list(lle=lle,lie=lie,bh=bh)) +} + +lifun2=function(x,y,xeval,bw){ + + ll=locLinSmootherC(x, y, xeval, bw, EpaK)$beta0 # local linear estimate + + ll[ll>1]=1 # truncation + ll[ll<0]=0 + + li=ll + lirn=rn=rnfun(y,n,ll,bw) + + # if the ll estimate is not monotone, we apply isotonic regression to get the LI estimate + + if(any(diff(ll)<0)){ + li=pavaest(x,ll) + li[li>1]=1 + li[li<0]=0 + + lirn=rnfun(y,n,li,bw) + } + + return(list(ll=ll,li=li,rn=rn,lirn=lirn)) +} + +# bws: grid points of bandwidth +# lws: length of bws + +lep2=function(x,y,n,bws,lws){ + + ll=li=matrix(0,n,lws) + lirn=rn=rep(1e6,lws) + + for(i in 1:lws){ + + obj=lifun2(x,y,x,bws[i]) + ll[,i]=obj$ll + li[,i]=obj$li + rn[i]=obj$rn + lirn[i]=obj$lirn + } + # choose the optimal bandwidth at which 'rn' and 'lirn' are minimized + + orn=order(rn)[1] + ob=bws[orn] + lle=ll[,orn] + + olp=order(lirn)[1] + obli=bws[olp] + lie=li[,olp] + + bh=c(ob,obli) + + return(list(lle=lle,lie=lie,bh=bh)) +} + +maf.fun=function(y,probx){ + probxx=(probx-probx[1])/probx + maf=sum(probxx[y==1])/sum(y==1) + return(maf) +} +## Qin and Leung estimator +# Evaluate semiparametric likelihood in Qin and Leung + +ql.loglikelihood.for.optim=function(param,y,d){ + alpha=param[1]; + beta=param[2]; + lambdastar=param[3]; + p=param[4]; + m0=sum((d==0)[y==0]); + n0=sum((d==0)[y==1]); + N1=length(y)-m0-n0; + m1=sum(y==0)-m0; + n1=sum(y==1)-n0; + # Take log transformation of positive values + xpos=(d[d>0 & y==1]); + tpos=(d[d>0]); + feverpos=y[d>0]; + nu=lambdastar*(1/N1)*sum((exp(alpha+beta*xpos))/((1-lambdastar)+lambdastar*exp(alpha+beta*xpos))); + l2=-sum(log(1+nu*(exp(alpha+beta*tpos)-1)))+sum(log((1-lambdastar)+lambdastar*exp(alpha+beta*xpos))); + l1=m0*log(p)+m1*log(1-p)+n0*log(p*(1-lambdastar)/(1-lambdastar*p))+n1*log(1-p*(1-lambdastar)/(1-lambdastar*p)); + l1+l2; +} + +# Function for taking in values and computing the ql estimator using +# optim +ql.optim=function(y,d){ + m0=sum((d==0)[y==0]); + n0=sum((d==0)[y==1]); + N1=length(y)-m0-n0; + m1=sum(y==0)-m0; + n1=sum(y==1)-n0; + # Take log transformation of positive values + xpos=(d[d>0 & y==1]); + tpos=(d[d>0]); + feverpos=y[d>0]; + # Starting values + # Find starting value for p and lambda based on binomial likelihood + parasite.prevalence=(d>0); + lambdahat.binomial=(mean(parasite.prevalence[y==1])-mean(parasite.prevalence[y==0]))/(1-mean(parasite.prevalence[y==0])); + phat.binomial=1-mean(parasite.prevalence[y==0]); + p.curr=phat.binomial; + lambdastar.curr=lambdahat.binomial/(1-phat.binomial*(1-lambdahat.binomial)); + # Assume normal distribution on logged positive values to estimate starting values for alpha, beta + mu1=mean(tpos[feverpos==0]); + sigmasq=var(tpos[feverpos==0]); + mu2=(mean(tpos[feverpos==1])-(1-lambdastar.curr)*mu1)/lambdastar.curr; + alpha.curr=(mu1^2-mu2^2)/(2*sigmasq); + beta.curr=(mu2-mu1)/sigmasq; + param=c(alpha.curr,beta.curr,lambdastar.curr,p.curr) + + temp.optim=optim(param,ql.loglikelihood.for.optim,method="L-BFGS-B",lower=c(-Inf,-Inf,.01,.01),upper=c(Inf,Inf,.99,.99),control=list(fnscale=-1),y=y,d=d); + alphahat=temp.optim$par[1]; + betahat=temp.optim$par[2]; + lambdastarhat=temp.optim$par[3] + phat=temp.optim$par[4]; + lambdahat=(lambdastarhat-lambdastarhat*phat)/(1-lambdastarhat*phat); + list(lambdahat=lambdahat,alphahat=alphahat,betahat=betahat,lambdastarhat=lambdastarhat,phat=phat); +} + +expit=function(x){ + val=exp(x)/(1+exp(x)) + val +} + +#################################### +simnum=10 +maf.logi=maf.power=maf.nonpara=est.sp=rep(NA, simnum) + +t1=Sys.time(); +for(k in 1:simnum){ + n=500 + true.p=0.2 + #true.lamb=0.5 + beta=0.8 + + p.mi=0.25 + p.nmi=0.2 + + y.mi=c(rep(0, n*(1-p.mi)), rep(1, n*p.mi)) + y.nmi=c(rep(0, n*(1-p.nmi)*(1-p.mi)), rep(1, n*p.nmi*(1-p.mi)), rep(0, n*(1-p.nmi)*p.mi), rep(1, n*p.nmi*p.mi)) + y.obs=y.mi+y.nmi>0 + + #rand.num=rbinom(n, size=1, prob=true.p) + d.no.nmi=d.cur=d.obs=rep(NA, n) + u=runif(n,0,1) + for(i in 1:n){ + if(y.mi[i]==0){ + d.no.nmi[i]=ifelse(u[i]