"core.find" <-
function(aln,
         shortcut  = FALSE,
         rm.island = FALSE,
         verbose   = TRUE,
         stop.at   = 15,
         stop.vol  = 0.5,
         write.pdbs = FALSE,
         outpath="core_pruned/") {

  ##  Itterative core deffination for lsq fit optimisation  
  ##  (core positions are those with low ellipsoid volume)

  
  error.ellipsoid<-function(pos.xyz) {
    S<-var(pos.xyz)
    prj  <- eigen(S, symmetric = TRUE)
    prj$values[prj$values < 0 & prj$values >= -1.0E-12]<-1.0E-12
    vol<-4/3*pi*prod( sqrt( prj$values ) )
    out<-list(vol=vol, U=prj$vectors, L=prj$values)
  }
  

  if(is.matrix(aln)) {
    xyz <- aln
    
    xyz.inds <- which(apply(is.na( xyz ), 2, sum)==0)
    res.inds<-xyz.inds[seq(3,length(xyz.inds),by=3)]/3

    pdbseq = rep("ALA",length(xyz.inds)/3)
    pdbnum = c(1:(length(xyz.inds)/3))
    
  } else {
    if( (is.list(aln)) && (class(aln)=="3dalign") ) {
      xyz=aln$xyz
    
      xyz.inds <- which(apply(is.na( xyz ), 2, sum)==0)
      res.inds <- which(apply(aln$ali=="-", 2, sum)==0)
    
      pdbseq = aa123(aln$ali[1,]); pdbnum = aln$resno[1,]
    } else {
      stop("input 'aln' should either be:
               a list object from 'read.fasta.pdb'
               or a numeric 'xyz' matrix of aligned coordinates")
    }
  }

  # First core = all non gap positions
  res.still.in <- res.inds # indices of core residues
  xyz.still.in <- xyz.inds # indices of core xyz's
  new.xyz.inds <- xyz.inds # indices of core xyz's
  xyz.moved    <- xyz      # core-fitted coords
  throwout.res <- NULL     # non-core res inds
  throwout.xyz <- NULL     # non-core xyz inds
  remain.vol   <- NULL
  core.length  <- NULL


  fit.to = rep(FALSE,ncol(xyz.moved))        # Preliminary fitting 
  fit.to[ as.vector(xyz.still.in) ]<-TRUE    # on first structure 
  xyz.tmp <- t(apply(xyz.moved, 1,           # to find mean structure
                       rot.lsq,              # for next fitting
                       yy=xyz.moved[1,],
                       xfit=fit.to))

  mean.xyz <- apply(xyz.tmp,2,mean)
  
  if(write.pdbs) { dir.create(outpath,FALSE)  }

  while(length(res.still.in) > stop.at) {

    # Core fitting, (core => pdbnum[ res.still.in ]) 
    fit.to = rep(FALSE,ncol(xyz.moved))
    fit.to[ as.vector(xyz.still.in) ]<-TRUE
    xyz.moved <- t(apply(xyz.moved, 1,
                         rot.lsq,
                         #yy=xyz.moved[1,],
                         yy=mean.xyz,
                         xfit=fit.to))

    mean.xyz <- apply(xyz.moved,2,mean)

    i<-1; j<-3
    volume<-NULL # ellipsoid volume
    while(j<=length( new.xyz.inds )) {
      e<-error.ellipsoid(xyz.moved[,new.xyz.inds[i:j]])
      volume<-c(volume,e$vol)
      i<-i+3;j<-j+3
    }

    record <- cbind(res.still.in ,   # store indices and volumes
                    matrix(new.xyz.inds,ncol=3,byrow=3),
                    volume)
     
    # Find highest volume (most variable position)
    if (shortcut) {
      if (length(res.still.in) >= 35) {
        # remove four at a time
        highest.vol.ind <- rev(order(volume))[1:4]
      } else { highest.vol.ind <- which.max(volume) } 
    } else {
      # no shortcut rm one at a time
      highest.vol.ind <- which.max(volume) 
    }

    if (rm.island) {
      # Exclude length 4 residue islands
      check <- bounds( res.still.in )
      check.ind <- which(check[,"length"] < 4)
      if ( length(check.ind) > 0 ) {
        res.cut=NULL
        for (r in 1:length(check.ind)) {
          res.cut <- c(res.cut, check[check.ind[r],"start"]:
                       check[check.ind[r],"end"])
        }
        highest.vol.ind <- unique( c(highest.vol.ind,
                             which( is.element(res.still.in, res.cut)) ))
      }
    }
    
    # rm position from "new.xyz.inds"
    xyz.exclude <- record[highest.vol.ind,c(2:4)]
    inds.torm <- which(is.element( new.xyz.inds, as.vector(xyz.exclude) ))
    new.xyz.inds <- new.xyz.inds[ -inds.torm ]
    
    # Store details of the residue we excluded
    tmp.vol <- sum(record[-highest.vol.ind,5])
    throwout.res <- c( throwout.res, as.vector(record[highest.vol.ind,1]))
    throwout.xyz <- rbind( throwout.xyz, record[highest.vol.ind,2:4] )
    remain.vol   <- c(remain.vol, tmp.vol)
    res.still.in <- record[-highest.vol.ind,1]
    xyz.still.in <- record[-highest.vol.ind,2:4]
    core.length  <- c(core.length,length(res.still.in))

    if(verbose) {
      # Progress report
      cat( paste(" core size",length(res.still.in),"of",
                length(res.inds))," vol =",
          round(tmp.vol,3),"\n" )

      if(write.pdbs) {
        # Write current core structure    
        write.pdb(file  = paste(outpath,"core_",
                    sprintf("%04.0f", length(res.still.in)),".pdb",sep=""),
                  #xyz   = xyz[1, new.xyz.inds ],
                  xyz   = mean.xyz[ new.xyz.inds ],
                  resno = pdbnum[ res.still.in ],
                  resid = pdbseq[ res.still.in ],
                  b     = round((volume[-highest.vol.ind] /
                    max(volume[-highest.vol.ind]) * 1),2) )
      }
    }
    
    if(tmp.vol < stop.vol) {
      cat(paste(" FINISHED: Min vol (",stop.vol,") reached\n"))
      break
    }
  }
  
  # ordered thro-out lists
  ordered.res<-as.vector(c(throwout.res, res.still.in))
  ordered.xyz<-rbind(throwout.xyz, xyz.still.in)
  rownames(ordered.xyz)=NULL
  vol = c(remain.vol, rep(NA,stop.at))
  len = c(core.length,rep(NA,stop.at))
  blank<-rep(NA, len[1]); blank[na.omit(len)]=na.omit(vol)
  ordered.vol<-c(rev(blank),NA); blank[na.omit(len)]=na.omit(len)
  ordered.len<-c(rev(blank),NA)
  
  # sample cores (volume < 1 A^3 & < 0.5 A^3)
  if( min(ordered.vol,na.rm=TRUE) < 1) {
    a.atom <- sort(ordered.res[which(ordered.vol<1)[1]:length(ordered.vol)])
    a.xyz  <- sort(as.vector(ordered.xyz[which(ordered.vol<1)[1]:
                                         length(ordered.vol),]))
    a.resno <- as.numeric(pdbnum[a.atom])
  } else {
    a.atom  <- NULL
    a.xyz   <- NULL
    a.resno <- NULL
  }
  if( min(ordered.vol,na.rm=TRUE) < 0.5) {  
    b.atom <- sort(ordered.res[which(ordered.vol<0.5)[1]:length(ordered.vol)])
    b.xyz  <- sort(as.vector(ordered.xyz[which(ordered.vol<0.5)[1]:
                                         length(ordered.vol),]))
    b.resno <- as.numeric(pdbnum[b.atom])
  } else {
    b.atom  <- NULL
    b.xyz   <- NULL
    b.resno <- NULL
  }
  
  output <- list(volume   = ordered.vol,
                 length   = ordered.len,
                 resno    = pdbnum[ ordered.res ],
                 atom     = ordered.res,
                 xyz      = ordered.xyz,
                 c1A.atom  = a.atom,
                 c1A.xyz   = a.xyz,
                 c1A.resno = a.resno,
                 c0.5A.atom  = b.atom,
                 c0.5A.xyz   = b.xyz,                 
                 c0.5A.resno = b.resno )
  class(output)="core"; return(output)

}

