# Copyright (c) 2014,
# Mathias Kuhring, KuhringM@rki.de, Robert Koch Institute, Germany, 
# All rights reserved. For details, please note the license.txt.


# Calculate 25% and 50% drops in the contig coverage
# Input:
#  coverage           List of basewise coverage per contig
#  readlengthsmean    List of ReadLengthMean per contig
calcFeatureCoverageDrops <- function(coverage, readlengthsmean){
  
  drop.result <- data.frame(CoverageDrop25Count=vector(mode="numeric", length(coverage)), 
                            CoverageDrop25Max=vector(mode="numeric", length(coverage)),
                            CoverageDrop50Count=vector(mode="numeric", length(coverage)),
                            CoverageDrop50Max=vector(mode="numeric", length(coverage)))
  
  drops <- lapply(1:length(coverage), function(i){ coverageCurve(coverage[[i]],readlengthsmean[i], 0.75) })
  # if no contig has drops, 'apply' returns 'numeric(0)' instead of a list with lenght = # contigs
  # so length > 0 has to be tested
  if (length(drops)>0){
    for (i in 1:length(drops)){
      drop.result$CoverageDrop25Count[i] = dim(drops[[i]])[1]
      if (drop.result$CoverageDrop25Count[i] > 0)
        drop.result$CoverageDrop25Max[i] = max(c(drops[[i]][,4]-drops[[i]][,2],drops[[i]][,6]-drops[[i]][,2]))
    }
  }
  
  drops <- lapply(1:length(coverage), function(i){ coverageCurve(coverage[[i]],readlengthsmean[i], 0.50) })
  if (length(drops)>0){
    for (i in 1:length(drops)){
      drop.result$CoverageDrop50Count[i] = dim(drops[[i]])[1]
      if (drop.result$CoverageDrop50Count[i] > 0)
        drop.result$CoverageDrop50Max[i] = max(c(drops[[i]][,4]-drops[[i]][,2],drops[[i]][,6]-drops[[i]][,2]))
    }
  }
  
  return(drop.result)
}


dupdel <- function(data){
  l = length(data)
  for (i in l:2)
    if (data[i] == data[i-1]){
      data <- data[-i]
    }
  return(data)
}

window <- function(data, w, i){
  return (data[(max(1, i-w)):(min(i+w, length(data)))])
}

left <- function(data, w, i){
  return (data[(max(1, i-w)):(i-1)])
}

right <- function(data, w, i){
  return (data[(i+1):(min(i+w, length(data)))])
}

leftMax <- function(data, w, i){
  d <- left(data, w, i)
  y <- max(d)
  idx <- which(d==y)
  x <- tail(idx,1) + (i-w-1)
  return(c(x,y))
}

rightMax <- function(data, w, i){
  d <- right(data, w, i)
  y <- max(d)
  idx <- which(d==y)
  x <- head(idx,1) + i
  return(c(x,y))
}

size <- function(cl, rl){
  if (rl <= 0.1 * cl)
    return (rl)
  else
    return (cl * 0.1)
}

mySmooth <- function(data, w){
  l <- length(data)
  output <- vector(mode="numeric", length=l)
  for (i in 1:l){
    output[i] = mean(window(data, w, i))
  }
  return(output)
}

covDrop <- function(data, w, p){
  l <- length(data)
  maxl <- vector(mode="numeric")
  maxr <- vector(mode="numeric")
  min <- vector(mode="numeric")
  # find minima
  for (i in 2:(l-1)){
    left <- leftMax(data, w, i)
    right <- rightMax(data, w, i)
    if ((data[i]/left[2]<=p) && (data[i]/right[2]<=p)){
      maxl <- rbind(maxl, left)
      maxr <- rbind(maxr, right)
      min <- rbind(min, c(i,data[i]))
    }
  }
  # merge minima
  maxlf <- vector(mode="numeric")
  maxrf <- vector(mode="numeric")
  minf <- vector(mode="numeric")
  if (length(dim(min))>0){
    group <- 1
    if (dim(min)[1] > 1){
      for (i in 2:dim(min)[1]){
        if (maxl[i,1] < min[tail(group,1),1]){
          group <- c(group,i)
        }
        else{
          maxlf <- rbind(maxlf, c(mean(maxl[group,1][maxl[group,2]==max(maxl[group,2])]), max(maxl[group,2])))
          maxrf <- rbind(maxrf, c(mean(maxr[group,1][maxr[group,2]==max(maxr[group,2])]), max(maxr[group,2])))
          minf <- rbind(minf, c(mean(min[group,1][min[group,2]==min(min[group,2])]), min(min[group,2])))
          group <- i
        }
      }
    }
    maxlf <- rbind(maxlf, c(mean(maxl[group,1][maxl[group,2]==max(maxl[group,2])]), max(maxl[group,2])))
    maxrf <- rbind(maxrf, c(mean(maxr[group,1][maxr[group,2]==max(maxr[group,2])]), max(maxr[group,2])))
    minf <- rbind(minf, c(mean(min[group,1][min[group,2]==min(min[group,2])]), min(min[group,2])))
  }
  return(cbind(minf,maxlf,maxrf))
}


covcurv <- function(input, num, p){
  cov <- unlist(input$Coverage[num])
  x <- 1:length(cov)
  r <- input$ReadLengthsMean[num]
  w <- size(length(cov),r)
  sm <- mySmooth(cov,w)
  title <-  paste('contig', num, ', drop', (1-p)*100,'%',collapse=' ')
  plot(cov,col="grey", main=title, xlab='contig', ylab='coverage')
  lines(sm, col='red', lwd=2)
  test <- covDrop(sm, 2*w, p)
  if (length(test) > 0){
    points(test[,1],test[,2],col='blue',lwd=2, cex=2)
    points(test[,3],test[,4],col='purple',lwd=2, cex=2)
    points(test[,5],test[,6],col='purple',lwd=2, cex=2)
  }
}

coverageCurve <- function(cov, read, p){  
  w <- size(length(cov),read)
  sm <- mySmooth(cov,w)
  drops <- covDrop(sm, 2*w, p)
  return(drops)
}

mypdf <- function(input, max){
  pdf(height=5)
  for (i in 1:max){
    covcurv(input,i,0.75)
    covcurv(input,i,0.50)
  }
  dev.off()
}