QuadrantTreeClassifierRCode

From CVRG Wiki

Jump to: navigation, search

Contents

R Code Branch of Quadrant Tree Classifier Doc

Document Information
Package base version 2.8.0
1/5/2009
Project 5: Statistical Learning with Multi-Scale Cardiovascular Data
Contact email: yqin@jhu.edu
CardioVascular Research Grid
Johns Hopkins University

Commented Quadrant Tree Classifier Code

Download zipped file of commented code

library(survival)
 
 #
 
 # Given vector of event times and fired indicator (0=censored,1=fired),
 
 # and a time threshold, compute an estimate of the probability of firing
 
 # before that time threshold. We use linear interpolation of output from the
 
 # survival package's kaplan-meier calculation. Generally, we err on the side
 
 # of making the probability greater than needed to be careful.
 
 #
 
 survival.prob<-function(time,fired,time.threshold)
 
 {
 
    # If no data, then returen 0 so err on the side of making everyone fire.
 
    if (length(time)==0)
 
    {
 
      return(0)
 
    }
 
    # Fit a survival model with Kaplan-Meier method.
 
    S<-survfit(Surv(time,fired),type="kaplan-meier")
 
    # Assign number of observatoins to ntimes.
 
    ntimes<-length(S$time)
 
    # If there is just one observation or the smallest time is larger than the
 
    # threshold, return 0.
 
    if ((ntimes==1)||(min(S$time)>=time.threshold))
 
    {
 
       return(0)
 
    }
 
    # If the largest time is smaller than the threshold, return the largest survival
 
    # probability generated by the model.
 
    if (max(S$time)<=time.threshold)
 
    {
 
       return(S$surv[length(S$surv)])
 
    }
 
    # If not either of the two situation mentioned above, then do linear interpolation
 
    # Find the index of observation with the largest time that is smaller than threshold
 
    index1<-max(which(S$time<time.threshold))
 
    # Find the index of observation with the smallest time that is larger than threshold
 
    # i.e. index1+1
 
    index2<-index1+1
 
    # Calculate the probability of surviving after time index1.
 
    p1<-S$surv[index1]
 
    # Calculate the probability of surviving after time index2.
 
    p2<-S$surv[index2]
 
    # Linear interpolation
 
    lambda<-(time.threshold-S$time[index1])/(S$time[index2]-S$time[index1])
 
    p<-p1+lambda*(p2-p1)
 
    return(p)
 
 }
 
  
 
 build.quadrant.classifier<-function(x,y,implanted,days.to.firing,tx.list,ty.list,sens,time.threshold)
 
 {
 
    #
 
    #
 
    #
 
    # Inputs:
 
    #
 
    #           x = continuous feature variable vector
 
    #           y = continuous feature variable vector
 
    #           implanted = days of implanted
 
    #           days to firing = days to firing
 
    #
 
    #           tx = list of x thresholds
 
    #           ty = list of y thresholds
 
    #           sens = desired sensitivity
 
    #           time.threshold = time defining the "fired within" phenotype
 
    #
 
    #Output:
 
    #
 
    #           an assignment of probabilities to the cells:
 
    #
 
    #           cell 1: x<tx, y<ty
 
    #           cell 2: x<tx, y>ty
 
    #           cell 3: x>tx, y<ty
 
    #           cell 4: x>tx, y>ty
 
    #
 
    #
 
    # This function uses the R "survival" package to compute
 
    # Kaplan-Meier probabilities
 
    #
 
    # Create a time variable and an indicator of fired.
 
    #
 
    # Assign lx the value of the length of x.
 
    lx<-length(x)
 
    # Assign fired a array of 0s with length the same as lx.
 
    fired<-rep(0,lx)
 
    # Assign time a array of 0s with length the same as lx.
 
    time<-rep(0,lx)
 
    # Remove all the NAs and convert data for survival analysis.
 
    for (i in seq(1,lx))
 
    {
 
       # For each i, i.e. each observation:
 
       # If days.to.firing is NA, then assign both fired[i] and time[i] to 0.
 
       # Otherwise, if days.to firing is smaller than time.threshold, then assign
 
       # fired[i]
 
       if (is.na(days.to.firing[i]))
 
       {
 
          fired[i]<-0
 
          time[i]<-implanted[i]
 
       } else
 
       # Otherwise, if days.to firing is smaller than time.threshold, then assign
 
       # fired[i] 1, and copy days.to.firing[i] to time[i]. And otherwise, assign
 
       # fired[i] 0, and copy implanted[i] to time[i].
 
       {
 
          if (days.to.firing[i]<time.threshold)
 
          {
 
           fired[i]<-1
 
             time[i]<-days.to.firing[i]
 
          }else
 
         
 
          {
 
             fired[i]<-0
 
             time[i]<-implanted[i]
 
          }
 
       }
 
    }
 
  
 
    #
 
    # Get rid of missing data
 
    #
 
    # Combine x, y, time, and fired to a data frame named d.temp
 
    d.temp<-data.frame(x=x,y=y,time=time,fired=fired)
 
    # Remove all the observations with missing data or NAs.
 
    d.temp<-d.temp[complete.cases(d.temp),]
 
    #
 
    # Creat a list with spec=0
 
    best.L<-list(spec=0)
 
    # Creat a loop to try every possible combination of tx and ty to get a optimal
 
    # pair of tx ty with maxium specificity.
 
    for (tx in tx.list)
 
    {
 
       for (ty in ty.list)
 
       {
 
         #
 
         # Break up the data into the four cells based on tx and ty.
 
         #
 
         data<-list()
 
         data[[1]]<-d.temp[(d.temp$x<tx)&(d.temp$y<ty),]
 
         data[[2]]<-d.temp[(d.temp$x<tx)&(d.temp$y>ty),]
 
         data[[3]]<-d.temp[(d.temp$x>tx)&(d.temp$y<ty),]
 
         data[[4]]<-d.temp[(d.temp$x>tx)&(d.temp$y>ty),]
 
         #
 
         #
 
         # Compute prior probabilities of the cells.
 
         #
 
         p.cells<-c(
 
             dim(data[[1]])[1],
 
             dim(data[[2]])[1],
 
             dim(data[[3]])[1],
 
             dim(data[[4]])[1])
 
         p.cells<-p.cells/sum(p.cells)
 
         #  
 
         # Compute the fire survival probablilities.
 
         # Note the linear interpolation.   
 
         #
 
         # Creat array for p and P[fire | cell = i]
 
         p.fire<-c()
 
         # Creat array for p and P[not fire | cell = i]
 
         p.notfire<-c()
 
         #
 
         # Do a loop for each cell.
 
         for (i in seq(1,4))
 
         {
 
              #print("i = ")
 
              #print(i)
 
              # Store time for cell i
 
              time<-data[[i]]$time
 
              # Store fired for cell i
 
              fired<-data[[i]]$fired
 
              # Do survival analysis within cell i
 
              p<-survival.prob(time,fired,time.threshold)
 
              #print("p = ")
 
              #print(p)
 
              # Store P[not fire | cell = i] to p.notfire
 
              p.notfire<-c(p.notfire,p) 
 
              # Store P[fire | cell = i] to p.fire
 
              p.fire<-c(p.fire,1-p)
 
             
 
         }
 
         #print("end of loop p.fire p.notfire= ")
 
         #print(p.fire)
 
         #print(p.notfire)
 
         #
 
         # We estimated P[fire | cell = i] for each i,
 
         # and P[not fire | cell = i]      
 
         # for each i and we know P[ cell = i].
 
         # Use Bayes theorem to get the cell
 
         # distributions under each class.             
 
         #
 
         # p = P[ cell = i | fired] 
 
         #
 
         # and
 
         #
 
         # q = P[ cell i | not fired]
 
         #
 
         # Calculate P[ cell = i, fired]
 
         p<-p.fire*p.cells
 
         # Calculate P[ cell = i | fired]
 
         p<-p/sum(p)
 
         # Calculate P[ cell = i, not fired]
 
         q<-p.notfire*p.cells
 
         # Calculate P[ cell = i | not fired]
 
         q<-q/sum(q)
 
         #
 
         # Compute likelihood ratios
 
         #
 
         lr<-p/q
 
         #
 
         # Make a list of the cell indices 1,2,3,4 in decreasing order
 
         # of likelihood ratio.
 
         #
 
         perm<-order(lr, decreasing=TRUE)
 
         #
 
         # Define the cell probabilities for the classifier.
 
         # The rule is to assign a 
 
         # probability of 1 to each cell until we get to
 
         # the desired sensitivity.
 
         # The last probability can be a fraction in order
 
         # to get exactly what we want.
 
         #
 
         #print("lr = ")
 
         #print(lr)
 
         #print("p = ")
 
         #print(p)
 
         #print("q = ")
 
         #print(q)  
 
         #
 
         # Assign all 0s to fprob
 
         fprob<-rep(0,4)
 
         # Creat a variable used in "while" loop.
 
         psum<-0
 
         i<-1
 
         # Repeat the loop until psum+p[perm[i]]<sens, which means keep add up more
 
         # cells into psum until it reaches sensitiviy.
 
         while(psum+p[perm[i]]<sens)
 
         {
 
            # If a cell is included, assign fprob for that cell 1.
 
            fprob[perm[i]]<-1
 
            # Add one more into psum
 
            psum<-psum+p[perm[i]]
 
            i<-i+1
 
         }
 
         # For the last cell, compute a partial
 
         fprob[perm[i]]<-(sens-psum)/p[perm[i]]
 
         #
 
         # Compute the specificity
 
         #
 
         spec<-sum((1-fprob)*q)
 
         # If we find a larger specificity, we replace the best.L with the new one.
 
         if (spec>best.L$spec)
 
             {   
 
                best.L<-list(fprob=fprob,spec=spec,tx=tx,ty=ty,p=p,q=q)
 
             }
 
         }
 
     }
 
     # Return best.L.
 
     best.L
 
 }

Uncommented Quadrant Tree Classifier Code

Download zipped file of uncommented code

library(survival)
 
 survival.prob<-function(time,fired,time.threshold)
 
 {
 
    # If no data, then returen 0 so err on the side of making everyone fire.
 
    if (length(time)==0)
 
    {
 
      return(0)
 
    }
 
    S<-survfit(Surv(time,fired),type="kaplan-meier")
 
    ntimes<-length(S$time)
 
    if ((ntimes==1)||(min(S$time)>=time.threshold))
 
    {
 
       return(0)
 
    }
 
    if (max(S$time)<=time.threshold)
 
    {
 
       return(S$surv[length(S$surv)])
 
    }
 
    index1<-max(which(S$time<time.threshold))
 
    index2<-index1+1
 
    p1<-S$surv[index1]
 
    p2<-S$surv[index2]
 
    lambda<-(time.threshold-S$time[index1])/(S$time[index2]-S$time[index1])
 
    p<-p1+lambda*(p2-p1)
 
    return(p)
 
 }
 
  
 
 build.quadrant.classifier<-function(x,y,implanted,days.to.firing,tx.list,ty.list,sens,time.threshold)
 
 {
 
    lx<-length(x)
 
    fired<-rep(0,lx)
 
    time<-rep(0,lx)
 
    for (i in seq(1,lx))
 
    {
 
       if (is.na(days.to.firing[i]))
 
       {
 
          fired[i]<-0
 
          time[i]<-implanted[i]
 
       } else
 
       {
 
          if (days.to.firing[i]<time.threshold)
 
          {
 
           fired[i]<-1
 
             time[i]<-days.to.firing[i]
 
          }else
 
         
 
          {
 
             fired[i]<-0
 
             time[i]<-implanted[i]
 
          }
 
       }
 
    }
 
    d.temp<-data.frame(x=x,y=y,time=time,fired=fired)
 
    d.temp<-d.temp[complete.cases(d.temp),]
 
    best.L<-list(spec=0)
 
    for (tx in tx.list)
 
    {
 
       for (ty in ty.list)
 
       {
 
         data<-list()
 
         data[[1]]<-d.temp[(d.temp$x<tx)&(d.temp$y<ty),]
 
         data[[2]]<-d.temp[(d.temp$x<tx)&(d.temp$y>ty),]
 
         data[[3]]<-d.temp[(d.temp$x>tx)&(d.temp$y<ty),]
 
         data[[4]]<-d.temp[(d.temp$x>tx)&(d.temp$y>ty),]
 
         p.cells<-c(
 
             dim(data[[1]])[1],
 
             dim(data[[2]])[1],
 
             dim(data[[3]])[1],
 
             dim(data[[4]])[1])
 
         p.cells<-p.cells/sum(p.cells)
 
         p.fire<-c()
 
         p.notfire<-c()
 
         for (i in seq(1,4))
 
         {
 
              time<-data[[i]]$time
 
              fired<-data[[i]]$fired
 
              p<-survival.prob(time,fired,time.threshold)
 
              p.notfire<-c(p.notfire,p) 
 
              p.fire<-c(p.fire,1-p)
 
              
 
         }
 
         p<-p.fire*p.cells
 
         p<-p/sum(p)
 
         q<-p.notfire*p.cells
 
         q<-q/sum(q)
 
         lr<-p/q
 
         perm<-order(lr, decreasing=TRUE)
 
         fprob<-rep(0,4)
 
         psum<-0
 
         i<-1
 
         while(psum+p[perm[i]]<sens)
 
         {
 
            # If a cell is included, assign fprob for that cell 1.
 
            fprob[perm[i]]<-1
 
            # Add one more into psum
 
            psum<-psum+p[perm[i]]
 
            i<-i+1
 
         }
 
         fprob[perm[i]]<-(sens-psum)/p[perm[i]]
 
         spec<-sum((1-fprob)*q)
 
         if (spec>best.L$spec)
 
             {   
 
                best.L<-list(fprob=fprob,spec=spec,tx=tx,ty=ty,p=p,q=q)
 
             }
 
         }
 
     }
 
     best.L
 
 }

Reference

Personal tools
Project Infrastructures