# Analysis for the "How many validations is enough" manuscript - Salk et al. (submitted 2021) setwd("C:/Users/carlsalk/Dropbox/ActualFolder/Wacko Ideas/When to pull an image from circulation/2 resubmitted") #setwd(choose.dir()) #Choose your working directory #A function to calculate quantiles of the posterior beta distribution with prior Beta(alpha0=.5, beta0=.5) betaqs<-function(dat,qts=c(.025,.05,.5,.95,.975)){ #prior parameters: alpha0<-.5 beta0<-.5 #Data checks: if((min(qts<0) | max(qts>1) | sum(!is.finite(qts)))>0){ return("ERROR: quantile values outside [0,1]")} if(sum(dat==1 | dat==0 | is.na(dat))!=length(dat)){ return("ERROR: data has values not equal to 0, 1 or NA")} #Frame to hold results: outdat<-as.data.frame(matrix(NA,ncol=length(qts),nrow=length(dat))) colnames(outdat)<-qts #Calculation of quantiles: for(j in 1:length(qts)){ outdat[,j]<-qbeta(qts[j],alpha0+cumsum(dat==1 & is.finite(dat)), beta0+cumsum(dat==0 & is.finite(dat))) } return(outdat) } #Load the data file of the individual ratings performed by the volunteers: ratings.all<-read.csv("ratings.csv",sep=',',header=T) #Metadata for these columns: # imgid: The unique identifier for each image used in the Cropland Capture campaign # userid: The unique identifier for each volunteer in the Cropland Capture campaign # rating: The answer provided; can be only one of the following: # 1: yes cropland # 2: no cropland # 0: maybe # date: Timestamp of the rating # ratingid: The unique identifier of the rating (this is different for each data row) # platform: What interface did the volunteer use to provide this rating? # 1: iPhone5 # 2: iPhone, other models # 3: iPad # 4: Browser # >100: Android; different numbers indicate the screen size in pixels images<-unique(ratings.all$imgid) #Create a vector of unique IDs for images used in the campaign imgrats<-rep(NA,length(images)) #Vector to hold the number of ratings for each image imglist.raw<-as.list(rep(NA,length(images))) #List to hold a time-ordered vector of ratings for each image noNA<-function(vec){ #A function to strip a vector of NA values return(vec[which(is.finite(vec))]) } #Arrange data into ordered vectors of ratings for each unique image (this takes a while!): # [This is a roundabout way of getting the data into Yes=1 No=0 Maybe=NA format, # but it reduces the computational burden in the loop.] ratings.all$no<-0; ratings.all$no[ratings.all$rating==2]<-1 ratings.all$yes<-0; ratings.all$yes[ratings.all$rating==1]<-1 ratings.all$maybe<-0; ratings.all$maybe[ratings.all$rating==0]<-1 ratings<-ratings.all[ratings.all$maybe==0,] for(i in 1:length(images)){ imglist.raw[[i]]<-ratings$yes[ratings$imgid==images[i]] imglist.raw[[i]][ratings$maybe[ratings$imgid==images[i]]==1]<-NA print(i) } imglist<-lapply(imglist.raw,noNA) imgrats<-sapply(imglist,length) CIs<-lapply(imglist,betaqs) #Calculate the 95% and 90% central posterior intervals #####RERUNNING FROM HERE.... ############ #A function to aid graphical exploration of the posterior credible interval trajectories: plotter<-function(imnum){ plot(imglist[[imnum]],pch=19,cex=.6,ylim=c(0,1),ylab='P(crop)',xlab='rating', main=paste('Image',images[imnum]),col=0) polygon(c(1:dim(CIs[[imnum]])[1],dim(CIs[[imnum]])[1]:1), c(CIs[[imnum]][,1],CIs[[imnum]][,5][dim(CIs[[imnum]])[1]:1]),col='light gray',border=NA) polygon(c(1:dim(CIs[[imnum]])[1],dim(CIs[[imnum]])[1]:1), c(CIs[[imnum]][,2],CIs[[imnum]][,4][dim(CIs[[imnum]])[1]:1]),col='dark gray',border=NA) points(imglist[[imnum]],pch=19,cex=.6) abline(h=c(.2,.8),lty=2) } #Figure 1 tiff('Fig1.tif',res=300,height=3.5,width=7,units='in') #Fig 1A: The general conceptual diagram: par(mfrow=c(1,3),mar=c(4,4,3,2)) plot(0,0,col=0,ylim=c(0,1),xlim=c(0,1.4),xaxt='n', xlab='',ylab='P(cropland)',main='A - Conceptual diagram') abline(h=c(.2,.8),lty=2) axis(2) lines(c(.2,.2),betaqs(rep(0,11))[11,c(1,5)],lwd=8,col='light gray') lines(c(.2,.2),betaqs(rep(0,11))[11,c(2,4)],lwd=8,col='dark gray') lines(c(.4,.4),betaqs(rep(c(0,1),11))[22,c(1,5)],lwd=8,col='light gray') lines(c(.4,.4),betaqs(rep(c(0,1),11))[22,c(2,4)],lwd=8,col='dark gray') lines(c(.6,.6),betaqs(rep(1,11))[11,c(1,5)],lwd=8,col='light gray') lines(c(.6,.6),betaqs(rep(1,11))[11,c(2,4)],lwd=8,col='dark gray') lines(c(.8,.8),betaqs(rep(c(0,0,0,0,1),5))[25,c(1,5)],lwd=8,col='light gray') lines(c(.8,.8),betaqs(rep(c(0,0,0,0,1),5))[25,c(2,4)],lwd=8,col='dark gray') lines(c(1,1),betaqs(rep(c(0,1),11))[4,c(1,5)],lwd=8,col='light gray') lines(c(1,1),betaqs(rep(c(0,1),11))[4,c(2,4)],lwd=8,col='dark gray') lines(c(1.2,1.2),betaqs(rep(c(1,1,1,1,0),5))[25,c(1,5)],lwd=8,col='light gray') lines(c(1.2,1.2),betaqs(rep(c(1,1,1,1,0),5))[25,c(2,4)],lwd=8,col='dark gray') mtext(side=1,las=2,line=.3,at=.2,text='Non-crop',cex=.6) mtext(side=1,las=2,line=.3,at=.4,text='Deadlock',cex=.6) mtext(side=1,las=2,line=.3,at=.6,text='Cropland',cex=.6) mtext(side=1,las=1,line=.3,at=1,text='Insufficient',cex=.6) mtext(side=1,las=1,line=1,at=1,text='data',cex=.6) mtext(side=4,line=-1,at=.9,text='Cropland',cex=.6) mtext(side=4,line=-1,at=.1,text='Non-crop',cex=.6) mtext(side=4,line=-1,at=.5,text='Deadlocked',cex=.6) #Fig 1B: An example from a real image imnum<-3256 plot(imglist[[imnum]],pch=19,cex=.6,ylim=c(0,1),ylab='P(cropland)',xlab='Rating number', main=paste('B - Image',images[imnum]),col=0,xaxt='n') axis(1,at=seq(0,60,by=15)) polygon(c(1:dim(CIs[[imnum]])[1],dim(CIs[[imnum]])[1]:1), c(CIs[[imnum]][,1],CIs[[imnum]][,5][dim(CIs[[imnum]])[1]:1]),col='light gray',border=NA) polygon(c(1:dim(CIs[[imnum]])[1],dim(CIs[[imnum]])[1]:1), c(CIs[[imnum]][,2],CIs[[imnum]][,4][dim(CIs[[imnum]])[1]:1]),col='dark gray',border=NA) points(imglist[[imnum]],pch=19,cex=.3) abline(h=c(.2,.8),lty=2) arrows(min(which(CIs[[imnum]][,4]<.2)),.3, min(which(CIs[[imnum]][,4]<.2)),.2,length=.1,lwd=1.5) lines(c(52,52),betaqs(rep(c(1,1,0,0,1),50))[40,c(1,5)],lwd=8,col='light gray',lend=2) lines(c(52,52),betaqs(rep(c(1,1,0,0,1),50))[40,c(2,4)],lwd=8,col='dark gray',lend=2) text(60,.76,'97.5%',cex=.8) text(46,.73,'95%',cex=.8) text(46.5,.46,'5%',cex=.8) text(59,.43,'2.5%',cex=.8) lines(c(68,41),c(.78,.78)) lines(c(68,41),c(.39,.39)) lines(c(41,41),c(.39,.78)) #Fig 1C: Another example from a real image imnum<-3445 plot(imglist[[imnum]],pch=19,cex=.6,ylim=c(0,1),ylab='P(cropland)',xlab='Rating number', main=paste('C - Image',images[imnum]),col=0,xaxt='n') axis(1,at=seq(0,30,by=10)) polygon(c(1:dim(CIs[[imnum]])[1],dim(CIs[[imnum]])[1]:1), c(CIs[[imnum]][,1],CIs[[imnum]][,5][dim(CIs[[imnum]])[1]:1]),col='light gray',border=NA) polygon(c(1:dim(CIs[[imnum]])[1],dim(CIs[[imnum]])[1]:1), c(CIs[[imnum]][,2],CIs[[imnum]][,4][dim(CIs[[imnum]])[1]:1]),col='dark gray',border=NA) points(imglist[[imnum]],pch=19,cex=.3) abline(h=c(.2,.8),lty=2) arrows(min(which(CIs[[imnum]][,1]>.2)),.1, min(which(CIs[[imnum]][,1]>.2)),.2,length=.1,lwd=1.5) rm(imnum) dev.off() #Figure 2: tiff('Fig2.tif',width=7,height=7.5,units='in',res=300) par(mfrow=c(3,3),mar=c(4,4,3,1)) plotter(10) lines(c(56,56),betaqs(rep(c(1,1,0,0,1),50))[40,c(1,5)],lwd=8,col='light gray',lend=2) lines(c(56,56),betaqs(rep(c(1,1,0,0,1),50))[40,c(2,4)],lwd=8,col='dark gray',lend=2) text(64,.76,'97.5%',cex=.7) text(50,.73,'95%',cex=.7) text(50.5,.46,'5%',cex=.7) text(63,.43,'2.5%',cex=.7) lines(c(70,45),c(.79,.79)) lines(c(70,45),c(.39,.39)) lines(c(45,45),c(.39,.79)) plotter(13) plotter(428) plotter(46) plotter(70) plotter(74) plotter(49) plotter(795) plotter(23122) dev.off() #End Figure 2 #Function to return number of ratings to declare classification or deadlock and total number of ratings howmany<-function(cidat,lim=.2){ outdat<-rep(NA,3) outdat[1]<-min(which(cidat[,4]<=lim)) outdat[3]<-min(which(cidat[,2]>=(1-lim))) outdat[2]<-min(which(cidat[,1]>lim & cidat[,5]<(1-lim))) switch<-newconcl<-cropnocrop<-NA if(is.finite(min(outdat))){ rest<-cidat[-(1:min(outdat)),] #Does the CI revert back beyond the bounds after deciding to stop? if(min(outdat)==outdat[1]){ switch<-mean(rest[,4]>lim)} if(min(outdat)==outdat[3]){ switch<-mean(rest[,2]<(1-lim))} if(min(outdat)==outdat[2]){ switch<-mean(rest[,1](1-lim))} #Does a definitive answer definiteively become something else? if(min(outdat)==outdat[1]){ newconcl<-max(rest[,1])>lim} if(min(outdat)==outdat[3]){ newconcl<-min(rest[,5])<(1-lim)} if(min(outdat)==outdat[2]){ newconcl<-min(rest[,4])(1-lim)} #Does a definitive 'no' become a definite 'yes' or vice versa? if(min(outdat)==outdat[1]){ cropnocrop<-max(rest[,2])>(1-lim)} if(min(outdat)==outdat[3]){ cropnocrop<-min(rest[,4])1){finrat<-NA} totrat<-dim(cidat)[1] out<-c(numtoans,finrat,totrat,switch,newconcl,cropnocrop) names(out)<-c('numtoans','finrat','totrat','switch','newconcl','cropnocrop') return(out) } ### Calculations used in Figure 3: outmat<-as.data.frame(t(sapply(CIs,howmany))) #Vector of number of ratings needed to reach a conclusion hdat.all<-hist(outmat[,1],breaks=seq(1.5,141.5,by=1),xlim=c(0,40)) hdat.all$counts=log10(hdat.all$counts+1) hdat.nocrop<-hist(outmat[outmat[,2]==0,1],breaks=seq(1.5,141.5,by=1),xlim=c(0,40)) hdat.nocrop$counts=log10(hdat.nocrop$counts+1) hdat.crop<-hist(outmat[outmat[,2]==1,1],breaks=seq(1.5,141.5,by=1),xlim=c(0,40)) hdat.crop$counts=log10(hdat.crop$counts+1) hdat.unk<-hist(outmat[outmat[,2]==.5,1],breaks=seq(1.5,141.5,by=1),xlim=c(0,40)) hdat.unk$counts=log10(hdat.unk$counts+1) hdat.noans<-hist(outmat[is.na(outmat[,2]),3]) hdat.noans$counts=log10(hdat.noans$counts+1) ### End Figure 3 calculations #Figure 3: tiff('Fig3.tif',width=6.5,height=5.5,units='in',res=300) par(mfrow=c(2,2)) plot(hdat.all,yaxt='n',ylim=c(0,5),main='A - All classified images', xlab='Ratings to reach a decision',cex.axis=.8) axis(2,at=log10(c(1,11,101,1001,10001,100001)),labels=c(0,10,100,1000,10000,'100000'),las=2, cex.axis=.8) points(142,log10(sum(outmat[,1]==Inf)+1),pch=19) plot(hdat.nocrop,yaxt='n',ylim=c(0,5),main='B - Images classified as non-crop', xlab='Ratings to reach a decision',cex.axis=.8) axis(2,at=log10(c(1,11,101,1001,10001,100001)),labels=c(0,10,100,1000,10000,'100000'),las=2, cex.axis=.8) plot(hdat.crop,yaxt='n',ylim=c(0,5),main='C - Images classified as cropland', xlab='Ratings to reach a decision',cex.axis=.8) axis(2,at=log10(c(1,11,101,1001,10001,100001)),labels=c(0,10,100,1000,10000,'100000'),las=2, cex.axis=.8) plot(hdat.unk,yaxt='n',ylim=c(0,5),main='D - Deadlocked images', xlab='Ratings to reach a decision',cex.axis=.8) axis(2,at=log10(c(1,11,101,1001,10001,100001)),labels=c(0,10,100,1000,10000,'100000'),las=2, cex.axis=.8) dev.off() #End Figure 3 #Figure 4: An illustration of the number of unnecessary ratings outsort<-sort(outmat$numtoans,index.return=T) unfinished<-which(!is.finite(outmat$numtoans)) tiff('Fig4.tif',width=7,height=8.5,units='in',res=500) #tiff('Fig4_LOW_RES.tif',width=7,height=8.5,units='in',res=100) plot(0,0,col=0,log='xy',xlim=c(9,140),ylim=c(11,150), xlab='Number of ratings needed', ylab='Number of ratings actually performed',axes=F) axis(1,at=c(5,10,20,50,100,10000)) axis(1,at=147,labels='Undet.',las=2) axis(2,at=c(-9.99,1,5,10,20,50,100,10000)+10,labels=c(0,1,5,10,20,50,100,10000)) lines(seq(8,140),seq(18,150),col=2,lty=1) points(outsort$x+rnorm(length(outsort$x),0,.18), outmat$totrat[outsort$ix]+rnorm(length(outsort$x),0,.18)+10, pch='.',cex=1,col=1) points(rep(147,length(unfinished))+rnorm(length(unfinished),0,2), outmat$totrat[unfinished]+rnorm(length(unfinished),0,.5/sqrt(outmat$totrat[unfinished]))+10, col=6,pch='.') points(rnorm(100,20,.2),rnorm(100,12.3,.2),col=1,pch='.') points(rnorm(100,20,.2),rnorm(100,11,.2),col=6,pch='.') text(18,13.8,'Classification status',pos=4) text(20,12.3,'Classification finished',pos=4,cex=.9) text(20,11,'Classification ongoing',pos=4,cex=.9) lines(c(18,18),c(1,15)) lines(c(18,48),c(15,15)) lines(c(48,48),c(15,1)) dev.off() #End Figure 4 #Figure 5: Diagram to facilitate implementing this system #Vector of needed classifications to declare classified with increasing number of dissentions: nmajC<-c(9,17,24,30,36,42,47,53,58,63,69,74,79,84,89,94,99,104) tiff('Fig5.tif',width=5.5,height=5,units='in',res=300) par(mar=c(5,5,2,2)) plot(0,0,col=0,xlim=c(0,10),ylim=c(0,71),axes=F, xlab="Number of dissenting ratings", ylab="Number of majority ratings") axis(1) axis(2) polygon(c(0,10,10,0),c(0,10,0,0),col='dark gray',border=0) polygon(c(0:10,10,0),c(nmajC[1:11],71,71),col='green',border=0) text(0:10,nmajC[1:11],labels=nmajC[1:11],pos=3) polygon(c(4:10,10:4),c(nmajC[5:11],10:4),col='pink',border=0) points(0:10,nmajC[1:11],pch=19) text(2.2,48,'Task is classified',cex=1.2) text(7,25,'Conclusion unlikely',cex=1.2) text(1.4,16,'Continue',pos=4,cex=1.2) text(1.4,12,'collecting',pos=4,cex=1.2) text(1.4,8,'ratings',pos=4,cex=1.2) dev.off() #End Figure 5 #Simulation to show median time to decision as a function of underlying 'true' rating probability #Function to return specified posterior probabilty levels with a prior of Beta(.5,.5) bqlast<-function(dat,qts=c(.025,.05,.5,.95,.975)){ if((min(qts<0) | max(qts>1) | sum(!is.finite(qts)))>0){ return("ERROR: quantile values outside [0,1]")} if(sum(dat==1 | dat==0 | is.na(dat))!=length(dat)){ return("ERROR: data has values not equal to 0, 1 or NA")} return(qbeta(qts,.5+sum(dat==1 & is.finite(dat)), .5+sum(dat==0 & is.finite(dat))) ) } thetas<-seq(0,1,by=.01) #Vector of true rating probabilities nrep<-1000 #Number of repetitions for each probability level ratmat<-matrix(NA,nrow=nrep,ncol=length(thetas)) #Matrix of ratings required to reach a conclusion answer<-matrix(NA,nrow=nrep,ncol=length(thetas)) #Matrix of conclusions reached for(i in c(1:20,22:80,82:101)){ #Loop through the probablity levels (.20 and .80 are skipped) for(j in 1:nrep){ dat<-rbinom(1,1,prob=thetas[i]) bq<-bqlast(dat) while(bq[4]>.2 & bq[2]<.8 & (bq[1]<.2 | bq[5]>.8)){ dat<-append(dat,rbinom(1,1,prob=thetas[i])) bq<-bqlast(dat) print(round(c(i,j,bq),4)) } ratmat[j,i]<-length(dat) if(bq[4]<.2){answer[j,i]<-0} if(bq[2]>.8){answer[j,i]<-1} if(bq[1]>.2 & bq[5]<.8){answer[j,i]<-.5} } } #NOTE THAT this stochastic simulation gives slightly different results each time it is run, # especially for values close to .2 and .8. #End median time to decision simulation #Figure 6: Plot of median time to decision and decision reached as a function of underlying rating probability tiff('Fig6.tif',res=300,height=5.6,width=6.5,units='in') par(mar=c(4.5,4.5,1,4.5)) plot(seq(0,1,by=.01),apply(ratmat,2,median),type='l',ylim=c(0,300), xlab="Underlying probability of choosing 'cropland' (?)", ylab="Median number of ratings to outcome") axis(4,at=seq(0,300,length.out=6),labels=seq(0,1,length.out=6)) mtext(side=4,line=2.4,'Proportion of simulations with a given outcome') polygon(c(0,seq(0,1,by=.01)[1:20],.19,0),c(0,apply(answer==0,2,sum)[1:20]*3/10,0,0),col=colors()[78],lty=0) polygon(c(.21,seq(0,1,by=.01)[22:80],.79,.21),c(0,apply(answer==0,2,sum)[22:80]*3/10,0,0),col=colors()[79],lty=0) polygon(c(.81,seq(0,1,by=.01)[82:101],1,.81),300-c(1,apply(answer==1,2,sum)[82:101],1,1)*3/10,col=colors()[50],lty=0) polygon(c(.21,seq(0,1,by=.01)[22:80],.79,.21),300-c(1,apply(answer==1,2,sum)[22:80],1,1)*3/10,col=colors()[51],lty=0) polygon(c(seq(0,1,by=.01)[22:80],seq(0,1,by=.01)[80:22],.21), c(apply(answer==0,2,sum)[22:80]*3/10,300-apply(answer==1,2,sum)[80:22]*3/10,apply(answer==0,2,sum)[22]*3/10), col=colors()[234],lty=0) polygon(c(0,seq(0,1,by=.01)[1:20],.19,0),c(300,apply(answer==0,2,sum)[1:20]*3/10,300,300), col=colors()[214],lty=0) polygon(c(.81,seq(0,1,by=.01)[82:101],1,.81),c(0,1000-apply(answer==1,2,sum)[82:101],0,0)*3/10, col=colors()[214],lty=0) lines(seq(0,1,by=.01),apply(ratmat,2,median),type='l',ylim=c(0,300),lwd=2) legend(.32,250,legend=c('Cropland','Deadlocked','Non-cropland','Ratings to outcome'), pch=c('?','?','?','-'),col=colors()[c(50,234,78,24)],pt.cex=c(2.2,2.2,2.2,1.85), border=1,bg='white') legend(.3375,250,legend=c('','','',''),pch=c('?','?','?','-'), col=colors()[c(51,214,79,24)],bty='n',pt.cex=c(2.2,2.2,2.2,1.85)) dev.off() #End Figure 6 #### Code to compute numbers given in the manuscript text: #Numbers given in the 'Data collection campaign' section: length(unique(ratings$userid)) #Number of volunteers sum(ratings$yes)+sum(ratings$no) #Number of classifications length(unique(ratings$imgid)) #Number of images #Numbers given in results, by paragraph: #First paragraph: min(outmat[outmat[,2]==0,1],na.rm=T) #Min number of classifications to conclude crop/non-crop min(outmat[outmat[,2]==1,1],na.rm=T) #Min number of classifications to conclude crop/non-crop min(outmat[outmat[,2]==.5,1],na.rm=T) #Min number of classifications to declare deadlocked #How often does the first rating agree with the eventual result?: partone<-function(vec){ return(vec[is.finite(vec)][1]) } first<-sapply(imglist,partone) average<-sapply(imglist,mean,na.rm=T) mean(first[is.finite(first)]==round(average[is.finite(first)],0),na.rm=T) #Percentage of classifications reached with the minimum possible number of ratings: mean(outmat[,1]==9 | (outmat[,1]==10 & outmat[,2]==.5)) #Mean and 95%, 99% quantiles for ratings to conclude non-cropland: mean(outmat[outmat[,2]==0,1],na.rm=T) quantile(outmat[outmat[,2]==0,1],c(.95,.99),na.rm=T) #Mean and quantiles for ratings to conclude cropland: mean(outmat[outmat[,2]==1,1],na.rm=T) quantile(outmat[outmat[,2]==1,1],c(.95,.99),na.rm=T) #Mean and quantiles for ratings to declare deadlocked: mean(outmat[outmat[,2]==.5,1],na.rm=T) quantile(outmat[outmat[,2]==.5,1],c(.95,.99),na.rm=T) #End first paragraph #Second paragraph: #Frequency of un-conclusion with additional ratings (by classification): mean(outmat$switch[outmat$finrat==0]>0,na.rm=T) #Classified non-crop mean(outmat$switch[outmat$finrat==1]>0,na.rm=T) #Classified crop mean(outmat$switch[outmat$finrat==.5]>0,na.rm=T) #Deadlocked #Frequency of changed classification (one conclusion, followed by another): mean(outmat$newconcl[outmat$finrat==0],na.rm=T) #Initially non-crop mean(outmat$newconcl[outmat$finrat==1],na.rm=T) #Initially crop mean(outmat$newconcl[outmat$finrat==.5],na.rm=T) #Initially deadlocked #Frequency of really changed classification (non to crop or vice versa): mean(outmat$cropnocrop,na.rm=T) #End second paragraph #Third paragraph: #How many could have been saved? sum(is.finite(outmat$numtoans))/length(outmat$numtoans) #Savings of rating effort: totred<-outmat$numtoans totred[!is.finite(totred)]<-outmat$totrat[!is.finite(totred)] redu<-(sum(outmat$totrat)-sum(totred))/sum(outmat$totrat) redu #Savings volunteer ratings if images removed from circulation 1/(1-redu) #Estimated increase in images classified due to removal from circulation #Increased savings due to always quitting after 34 ratings: totred2<-outmat$numtoans totred2[!is.finite(totred2)]<-34 #Always quit after 34 ratings totred2[totred2>outmat$totrat]<-outmat$totrat[totred2>outmat$totrat] redu2<-(sum(outmat$totrat)-sum(totred2))/sum(outmat$totrat) redu2-redu #Increase in savings with additional rule redu2 #New savings if this additional rule is applied #Increased savings if always quitting after 4 dissents: #A funciton to calculate how many ratings were needed to reach 4 dissents disses<-function(ldat,limit=4){ dat<-ldat[is.finite(ldat)] cummat<-rbind( cumsum(dat==1), #cumulative number of positive ratings cumsum(dat==0)) #cumulative number of negative ratings return(min(which(apply(cummat,2,min)>=limit))) } outmat$diss4<-sapply(imglist,disses) #Rating number of 4th dissent totred3<-totred2 totred3[outmat$numtoans>outmat$diss4]<-outmat$diss4[outmat$numtoans>outmat$diss4] redu3<-(sum(outmat$totrat)-sum(totred3))/sum(outmat$totrat) redu3 #End third paragraph #Fourth paragraph (simulation results): 1-apply(answer==0,2,mean)[20] #Incorrectly classified, theta=.19 1-apply(answer==1,2,mean)[82] #Incorrectly classified, theta=.81 1-apply(answer==.5,2,mean)[c(22,80)] #Incorrectly classified, theta=.21 and .79 apply(answer[,1:20]==1,2,sum) #Non-crop->crop misclassification frequencies apply(answer[,82:101]==0,2,sum) #Crop->non-crop misclassification frequencies #End fourth paragraph