"calc.deviance" <-
function(obs.values, fitted.values, weights = rep(1,length(obs.values)), family="binomial", calc.mean = TRUE)
{
# j. leathwick/j. elith
#
# version 2.1 - 5th Sept 2005
#
# function to calculate deviance given two vectors of raw and fitted values
# requires a family argument which is set to binomial by default
#
#

if (length(obs.values) != length(fitted.values)) 
   stop("observations and predictions must be of equal length")

y_i <- obs.values

u_i <- fitted.values
 
if (family == "binomial" | family == "bernoulli") {
 
   deviance.contribs <- (y_i * log(u_i)) + ((1-y_i) * log(1 - u_i))
   deviance <- -2 * sum(deviance.contribs * weights)

}

if (family == "poisson" | family == "Poisson") {

    deviance.contribs <- ifelse(y_i == 0, 0, (y_i * log(y_i/u_i))) - (y_i - u_i)
    deviance <- 2 * sum(deviance.contribs * weights)

}

if (family == "laplace") {
    deviance <- sum(abs(y_i - u_i))
    }

if (family == "gaussian") {
    deviance <- sum((y_i - u_i) * (y_i - u_i))
    }
    


if (calc.mean) deviance <- deviance/length(obs.values)

return(deviance)

}

"calibration" <-
function(obs, preds, family = "binomial")
{
#
# j elith/j leathwick 17th March 2005
# calculates calibration statistics for either binomial or count data
# but the family argument must be specified for the latter 
# a conditional test for the latter will catch most failures to specify
# the family
#

if (family == "bernoulli") family <- "binomial"
pred.range <- max(preds) - min(preds)
if(pred.range > 1.2 & family == "binomial") {
print(paste("range of response variable is ", round(pred.range, 2)), sep = "", quote = F)
print("check family specification", quote = F)
return()
}
if(family == "binomial") {
pred <- preds + 1e-005
pred[pred >= 1] <- 0.99999
mod <- glm(obs ~ log((pred)/(1 - (pred))), family = binomial)
lp <- log((pred)/(1 - (pred)))
a0b1 <- glm(obs ~ offset(lp) - 1, family = binomial)
miller1 <- 1 - pchisq(a0b1$deviance - mod$deviance, 2)
ab1 <- glm(obs ~ offset(lp), family = binomial)
miller2 <- 1 - pchisq(a0b1$deviance - ab1$deviance, 1)
miller3 <- 1 - pchisq(ab1$deviance - mod$deviance, 1)
}
if(family == "poisson") {
mod <- glm(obs ~ log(preds), family = poisson)
lp <- log(preds)
a0b1 <- glm(obs ~ offset(lp) - 1, family = poisson)
miller1 <- 1 - pchisq(a0b1$deviance - mod$deviance, 2)
ab1 <- glm(obs ~ offset(lp), family = poisson)
miller2 <- 1 - pchisq(a0b1$deviance - ab1$deviance, 1)
miller3 <- 1 - pchisq(ab1$deviance - mod$deviance, 1)
}
calibration.result <- c(mod$coef, miller1, miller2, miller3)
names(calibration.result) <- c("intercept", "slope", "testa0b1", "testa0|b1", "testb1|a")
return(calibration.result)
}

"mars.contribs" <-
function (mars.glm.object,sp.no = 1, verbose = TRUE) 
{

# j leathwick/j elith August 2006
#
# version 3.1 - developed in R 2.3.1 using mda 0.3-1
#
# takes a mars/glm model and uses the updated mars export table
# stored as the second list item from mars.binomial
# assessing the contribution of the fitted functions, 
# amalgamating terms for variables as required
#
# amended 29/9/04 to pass original mars model details
# and to return f-statistics
#
# amended 7th January to accommodate any glm model family

  mars.detail <- mars.glm.object$mars.call
  pred.names <- mars.detail$predictor.base.names #get the names from the original data
  n.preds <- length(pred.names)

  spp.names <- mars.detail$y.names
  family <- mars.detail$family

  m.table <- mars.glm.object$mars.table[-1,]
  m.table$names1 <- as.character(m.table$names1)   #convert from a factor

  x.data <- as.data.frame(eval(mars.glm.object$basis.functions))
  y.data <- as.data.frame(eval(mars.glm.object$y.values))

  assign("x.data", x.data, pos = 1)
  assign("y.data", y.data, pos = 1)
  assign("sp.no", sp.no, pos = 1)

  glm.model <- glm(y.data[,sp.no] ~ .,data = x.data,family = family)

  print(paste("performing backwards drops for mars/glm model for",
       spp.names[sp.no]),quote=F)

  n.bfs <- length(m.table[,1])
 
  delta.deviance <- rep(0,n.preds)
  df <- rep(0,n.preds)
  signif <- rep(0,n.preds)

  for (i in 1:n.preds) {   #start at two because first line is the constant
    # look for variable names in the table matching those in the var list

	  var.nos <- which(as.character(pred.names[i]), m.table$names1) #grep(as.character(pred.names[i]),m.table$names1)

    #drop.list stores numbers of basis functions to be dropped
    if (length(var.nos) > 0) {
      drop.list <- 0 - var.nos
      x.data.new <- x.data[,drop.list]
      assign("x.data.new",x.data.new,pos=1)

      if (verbose) {
 	  print(paste("Dropping ",pred.names[i],"...",sep=""),
	             quote=FALSE)
      }

      new.model <- glm(y.data[,sp.no] ~ ., data=x.data.new, family = family)
      comparison <- anova(glm.model,new.model,test="Chisq")

      df[i] <- comparison[2,3]
      delta.deviance[i] <- zapsmall(comparison[2,4],4)
      signif[i] <- zapsmall(comparison[2,5],6)
    }
  }

  rm(x.data,y.data,sp.no,pos=1)  # tidy up temporary files    

  deviance.table <- as.data.frame(cbind(pred.names,delta.deviance,df,signif))
  names(deviance.table) <- c("variable","delta dev","deg. free.","p-value")

  return(list(mars.call=mars.detail,deviance.table=deviance.table))
}

"mars.cv" <-
function (mars.glm.object, nk = 10, sp.no = 1, prev.stratify = F) 
{
#
# j. leathwick/j. elith - August 2006
#
# version 3.1 - developed in R 2.3.1 using mda 0.3-1
#
# function to perform k-fold cross validation 
# with full model perturbation for each subset
#
# requires mda library from Cran
# requires functions mw and calibration
#
# takes a mars/glm object produced by mars.glm
# and first assesses the full model, and then 
# randomly subsets the dataset into nk folds and drops 
# each subset in turn, fitting on remaining data 
# and predicting for withheld data
#
# caters for both single species and community models via the argument sp.no
# for the first, sp.no can be left on its default of 1
# for community models, sp.no can be varied from 1 to n.spp
#
# modified 29/9/04 to 
#   1. return mars analysis details for audit trail
#   2. calculate roc and calibration on subsets as well as full data
#      returning the mean and se of the ROC scores 
#      and the mean calibration statistics
#
# modified 8/10/04 to add prevalence stratification
# modified 7th January to test for binomial family and return if not
# 
# updated 15th March to cater for both binomial and poisson families
#
# updated 16th June 2005 to calculate residual deviance
#

  data <- mars.glm.object$mars.call$dataframe    #get the dataframe name
  dataframe.name <- deparse(substitute(data))   
    
  data <- as.data.frame(eval(parse(text=data)))   #and now the data
  n.cases <- nrow(data)

  mars.call <- mars.glm.object$mars.call          #and the mars call details
  mars.x <- mars.call$mars.x    
  mars.y <- mars.call$mars.y
  mars.degree <- mars.call$degree
  mars.penalty <- mars.call$penalty
  family <- mars.call$family
  site.weights <- eval(mars.glm.object$weights$site.weights)

  n.spp <- length(mars.y)

  if (sp.no > n.spp) {
    print(paste("the value specified for sp.no of",sp.no),quote=F)
    print(paste("exceeds the total number of species, which is ",n.spp),quote=F)
    return()
  }
  
  xdat <- as.data.frame(data[,mars.x])
  xdat <- mars.new.dataframe(xdat)[[1]]
  ydat <- mars.glm.object$y.values[,sp.no]
  target.sp <- names(data)[mars.y[sp.no]]

  if (prev.stratify) {
    presence.mask <- ydat == 1
    absence.mask <- ydat == 0
    n.pres <- sum(presence.mask)
    n.abs <- sum(absence.mask)
  }

  print(paste("Calculating ROC and calibration from full model for",target.sp),quote=F)

  u_i <- mars.glm.object$fitted.values[,sp.no]
  y_i <- ydat

  if (family == "binomial") {
    full.resid.deviance <- calc.deviance(y_i,u_i, weights = site.weights, family="binomial") 
    full.test <- roc(y_i, u_i)
    full.calib <- calibration(y_i, u_i)
  }

  if (family=="poisson") {
    full.resid.deviance <- calc.deviance(y_i,u_i, weights = site.weights, family="poisson")
    full.test <- cor(y_i, u_i) 
    full.calib <- calibration(y_i, u_i, family = "poisson")
  }

# set up for results storage
  
  subset.test <- rep(0,nk)
  subset.calib <- as.data.frame(matrix(0,ncol=5,nrow=nk))
  names(subset.calib) <- c("intercept","slope","test1","test2","test3")
  subset.resid.deviance <- rep(0,nk)

# now setup for withholding random subsets
    
  pred.values <- rep(0, n.cases)
  fitted.values <- rep(0, n.cases)

  if (prev.stratify) {

    selector <- rep(0,n.cases)

#create a vector of randomised numbers and feed into presences

    temp <- rep(seq(1, nk, by = 1), length = n.pres)
    temp <- temp[order(runif(n.pres, 1, 100))]
    selector[presence.mask] <- temp

# and then do the same for absences

    temp <- rep(seq(1, nk, by = 1), length = n.abs)
    temp <- temp[order(runif(n.abs, 1, 100))]
    selector[absence.mask] <- temp

  }
  else {  #otherwise make them random with respect to presence/absence

    selector <- rep(seq(1, nk, by = 1), length = n.cases)
    selector <- selector[order(runif(n.cases, 1, 100))]
  }
 
  print("", quote = FALSE)
  print("Creating predictions for subsets...", quote = F)

  for (i in 1:nk) {
    cat(i," ")
    model.mask <- selector != i  #used to fit model on majority of data
    pred.mask <- selector == i   #used to identify the with-held subset
    assign("species.subset", ydat[model.mask], pos = 1)
    assign("predictor.subset", xdat[model.mask, ], pos = 1)

    # fit new mars model

    mars.object <- mars(y = species.subset, x = predictor.subset, 
      degree = mars.degree, penalty = mars.penalty)

    # and extract basis functions

    n.bfs <- length(mars.object$selected.terms)
    bf.data <- as.data.frame(mars.object$x)
    names(bf.data) <- paste("bf",1:n.bfs,sep="")
    assign("bf.data", bf.data, pos=1)

    # then fit a binomial model to them

    mars.binomial <- glm(species.subset ~ .,data=bf.data[,-1], family= family, maxit = 100)

    pred.basis.functions <- as.data.frame(mda:::model.matrix.mars(mars.object, 
      xdat[pred.mask, ]))

    #now name the bfs to match the approach used in mars.binomial

    names(pred.basis.functions) <- paste("bf",1:n.bfs,sep="")

    # and form predictions for them and evaluate performance

    fitted.values[pred.mask] <- predict(mars.binomial, 
      pred.basis.functions, type = "response")

    y_i <- ydat[pred.mask]
    u_i <- fitted.values[pred.mask]  
    weights.subset <- site.weights[pred.mask]

    if (family == "binomial") {
      subset.resid.deviance[i] <- calc.deviance(y_i,u_i,weights = weights.subset, family="binomial") 
      subset.test[i] <- roc(y_i,u_i)
      subset.calib[i,] <- calibration(y_i, u_i)
    }

    if (family=="poisson"){
      subset.resid.deviance[i] <- calc.deviance(y_i,u_i,weights = weights.subset, family="poisson") 
      subset.test[i] <- cor(y_i, u_i) 
      subset.calib[i,] <- calibration(y_i, u_i, family = family)
    }
  }
 
  cat("","\n")

# tidy up temporary files

  rm(species.subset,predictor.subset,bf.data,pos=1) 

# and assemble results for return

#  mars.detail <- list(dataframe = dataframe.name,
#    x = mars.x, x.names = names(xdat), 
#    y = mars.y, y.names = names(data)[mars.y], 
#    target.sp = target.sp, degree=mars.degree, penalty = mars.penalty, family = family)

  y_i <- ydat
  u_i <- fitted.values

  if (family=="binomial") {
    cv.resid.deviance <- calc.deviance(y_i,u_i,weights = site.weights, family="binomial") 
    cv.test <- roc(y_i, u_i)
    cv.calib <- calibration(y_i, u_i)
  }

  if (family=="poisson"){
    cv.resid.deviance <- calc.deviance(y_i,u_i,weights = site.weights, family="poisson") 
    cv.test <- cor(y_i, u_i) 
    cv.calib <- calibration(y_i, u_i, family = "poisson")
  }

  subset.test.mean <- mean(subset.test)
  subset.test.se <- sqrt(var(subset.test))/sqrt(nk)

  subset.test <- list(test.scores = subset.test, subset.test.mean = subset.test.mean, 
    subset.test.se = subset.test.se)

  subset.calib.mean <- apply(subset.calib[,c(1:2)],2,mean)
  names(subset.calib.mean) <- names(subset.calib)[c(1:2)] #mean only of parameters

  subset.calib <- list(subset.calib = subset.calib, 
    subset.calib.mean = subset.calib.mean)
    
  subset.deviance.mean <- mean(subset.resid.deviance)
  subset.deviance.se <- sqrt(var(subset.resid.deviance))/sqrt(nk)

  subset.deviance <- list(subset.deviances = subset.resid.deviance, subset.deviance.mean = subset.deviance.mean,
    subset.deviance.se = subset.deviance.se)

  return(list(mars.call = mars.call, full.resid.deviance = full.resid.deviance,
    full.test = full.test, full.calib = full.calib, pooled.deviance = cv.resid.deviance, pooled.test = cv.test, 
    pooled.calib = cv.calib,subset.deviance = subset.deviance, subset.test = subset.test, subset.calib = subset.calib))
}

"mars.export" <-
function (object,lineage) 
{
#
# j leathwick/j elith August 2006
#
# takes a mars model fitted using library mda
# and extracts the basis functions and their 
# coefficients, returning them as a table
# caters for models with degree up to 2
#
# version 3.1 - developed in R 2.3.1 using mda 0.3-1

  which <- object$selected.terms
  nterms <- length(which)
  nspp <- ncol(eval(object$call$y))
  dir <- object$factor
  cut <- object$cuts
  var.names <- dimnames(object$factor)[[2]]
  p <- length(var.names)
  coefs <- as.data.frame(object$coefficients)
  names(coefs) <- names(eval(object$call$y))

# setup storage for results

  names1 <- rep("null", length = nterms)
  types1 <- rep("null", length = nterms)
  levels1 <- rep("null", length = nterms)
  signs1 <- rep(0, length = nterms)
  cuts1 <- rep(0, length = nterms)

  names2 <- rep("null", length = nterms)
  types2 <- rep("null", length = nterms)
  levels2 <- rep("null", length = nterms)
  signs2 <- rep(0, length = nterms)
  cuts2 <- rep(0, length = nterms)
  names1[1] <- "constant"
  signs1[1] <- 1

# now cycle through the terms

  for (i in seq(2, nterms)) {
    j <- which[i]
      term.count = 1
      for (k in 1:p) {
        if (dir[j, k] != 0) {
          if (term.count == 1) {
            n <- match(var.names[k],lineage$full.name)
            names1[i] <- lineage$base.name[n] #var.names[k]
            types1[i] <- lineage$type[n]
            levels1[i] <- lineage$level[n]
            signs1[i] <- dir[j, k]
            cuts1[i] <- cut[j, k]
            term.count <- term.count + 1
          }
          else {
            names2[i] <- var.names[k]
            n <- match(var.names[k],lineage$full.name)
            names2[i] <- lineage$base.name[n] #var.names[k]
            types2[i] <- lineage$type[n]
            levels2[i] <- lineage$level[n]
            signs2[i] <- dir[j, k]
            cuts2[i] <- cut[j, k]
          }
        }
      }
    }
  mars.export.table <- data.frame(names1, types1, levels1, signs1, cuts1, 
       names2, types2, levels2, signs2, cuts2, coefs)

  return(mars.export.table)
}

"mars.glm" <-
function (data,                         # the input data frame
  mars.x,                               # column numbers of the predictors
  mars.y,                               # column number(s) of the response variable(s)
  mars.degree = 1,                      # level of interactions - 1 = zero, 2 = 1st order, etc
  site.weights = rep(1, nrow(data)),    # one weight per site
  spp.weights = rep(1,length(mars.y)),  # one wieght per species
  penalty = 2,                          # the default penaly for a mars model
  family = "binomial")                  # the family for the glm model
{
#
# j leathwick, j elith - August 2006
#
# version 3.1 - developed in R 2.3.1 using mda 0.3-1
#
# calculates a mars/glm object in which basis functions are calculated
# using an initial mars model with single or multiple responses 
# data for individual species are then fitted as glms using the 
# common set of mars basis functions with results returned as a list
#
# takes as input a dataset and args selecting x and y variables, and degree of interaction
# along with site and species weights, the CV penalty, and the glm family argument
# the latter would normally be one of "binomial" or "poisson" - "gaussian" could be used
# but in this case the model shouldn't differ from that fitted using mars on its own
#
# requires mda and leathwick/elith's mars.export
#
# modified 3/11/04 to store information on glm phase convergence
# and with number of iterations raised to 100 to encourage convergence for low prevalence species
# modified 4/11/04 to accommodate observation weights in both mars and glm steps
# modified 12/04 to accommodate non-binomial families
# modified 11/05 to accommodate factor variables
# these are done as 0/1 dummy variables in a new dataframe
# created using mars.new.dataframe

  require(mda)

  n.spp <- length(mars.y)

# setup input data and assign to position one

  dataframe.name <- deparse(substitute(data))  # get the dataframe name

  xdat <- as.data.frame(eval(data[, mars.x]))                 #form the temporary datasets
  predictor.base.names <- names(xdat)

# create the new dataframe with dummy vars for factor predictors
  xdat <- mars.new.dataframe(xdat)
  lineage <- xdat[[2]]   # tracks which variables have had dummy's created
  xdat <- xdat[[1]]
  predictor.dummy.names <- names(xdat)

  ydat <- as.data.frame(eval(data[, mars.y]))
  names(ydat) <- names(data)[mars.y]

  assign("xdat", xdat, pos = 1)               #and assign them for later use
  assign("ydat", ydat, pos = 1)

# create storage space for glm model results

  n.cases <- nrow(xdat)
 
  fitted.values <- matrix(0,ncol = n.spp, nrow = n.cases)
  model.residuals <- matrix(0,ncol = n.spp, nrow = n.cases)
  null.deviances <- rep(0,n.spp)
  residual.deviances <- rep(0,n.spp)
  null.dfs <- rep(0,n.spp)
  residual.dfs <- rep(0,n.spp)
  converged <- rep(TRUE,n.spp)

# fit the mars model and extract the basis functions

  cat("fitting initial mars model for",n.spp,"responses","\n")
  cat("followed by a glm model with a family of",family,"\n")

  mars.object <- mars(x = xdat, y = ydat, degree = mars.degree, w = site.weights, 
       wp = spp.weights, penalty = penalty)

  bf.data <- as.data.frame(eval(mars.object$x))
  n.bfs <- ncol(bf.data)
  bf.names <- paste("bf", 1:n.bfs, sep = "")
  names(bf.data) <- bf.names
  bf.data <- as.data.frame(bf.data[,-1])

  m.table <- as.data.frame(mars.export(mars.object,lineage))
  names(m.table)[(10 + 1):(10 + n.spp)] <- names(ydat)

  p.values <- matrix(0, ncol = n.spp, nrow = n.bfs)
  rownames(p.values) <- paste("bf", 1:n.bfs, sep = "")
  colnames(p.values) <- names(ydat)

# now cycle through the species fitting glm models 

  cat("fitting glms for individual responses","\n")

  for (i in 1:n.spp) {

    cat(names(ydat)[i],"\n")
    model.glm <- glm(ydat[, i] ~ ., data = bf.data, weights = site.weights, 
  	  family = family, maxit = 100)

# update the coefficients and other results

    # then match names and insert as appropriate
    m.table[ , i + 10] <- 0   					# set all values to zero
    m.table[ , i + 10] <- model.glm$coefficients  	      # update all the constant
    sum.table <- summary(model.glm)$coefficients
    p.values[,i] <- sum.table[,4]
    fitted.values[,i] <- model.glm$fitted
    model.residuals[,i] <- resid(model.glm)
    null.deviances[i] <- model.glm$null.deviance
    residual.deviances[i] <- model.glm$deviance
    null.dfs[i] <- model.glm$df.null
    residual.dfs[i] <- model.glm$df.residual
    converged[i] <- model.glm$converged
  }

# now assemble data to be returned

  fitted.values <- as.data.frame(fitted.values)
  names(fitted.values) <- names(ydat)

  model.residuals <- as.data.frame(model.residuals)
  names(model.residuals) <- names(ydat)

  deviances <- data.frame(names(ydat),null.deviances,null.dfs,residual.deviances,residual.dfs,converged)
  names(deviances) <- c("species","null.dev","null.df","resid.dev","resid.df","converged")

  weights = list(site.weights = site.weights, spp.weights = spp.weights)

  mars.detail <- list(dataframe = dataframe.name, mars.x = mars.x, 
    predictor.base.names = predictor.base.names, predictor.dummy.names = predictor.dummy.names, 
    mars.y = mars.y, y.names = names(ydat), degree=mars.degree, penalty = penalty, 
    family = family)

  rm(xdat,ydat,pos=1)           #finally, clean up the temporary dataframes

  return(list(mars.table = m.table, basis.functions = bf.data, y.values = ydat,
    fitted.values = fitted.values, residuals = model.residuals, weights = weights, deviances = deviances,
    p.values = p.values, mars.call = mars.detail))
}

"mars.new.dataframe" <-
function (input.data) 
{
#
# j leathwick, j elith - August 2006
#
# version 3.1 - developed in R 2.3.1 using mda 0.3-1
#
# takes an input data frame and checks for factor variables 
# converting these to dummy variables, one each for each factor level
# returning it for use with mars.glm so that factor vars can be included
# in a mars analysis
#

  if (!is.data.frame(input.data)) {
    print("input data must be a dataframe..",quote = FALSE)
    return()
  }

  n <- 1
  for (i in 1:ncol(input.data)) {  #first transfer the vector variables
    if (is.vector(input.data[,i])) {
      if (n == 1) {
        output.data <- as.data.frame(input.data[,i]) 
        names.list <- names(input.data)[i]
        var.type <- "vector"
        factor.level <- "na"
      }
      else {
        output.data[,n] <- input.data[,i]
        names.list <- c(names.list,names(input.data)[i])
        var.type <- c(var.type,"vector")
        factor.level <- c(factor.level,"na")
      }
      names(output.data)[n] <- names(input.data)[i]
      n <- n + 1
    }
  }

  for (i in 1:ncol(input.data)) {  # and then the factor variables
    if (is.factor(input.data[,i])) {
      temp.table <- summary(input.data[,i])
      for (j in 1:length(temp.table)) {
        names.list <- c(names.list,names(input.data)[i])
        var.type <- c(var.type,"factor")
        factor.level <- c(factor.level,names(temp.table)[j])
        output.data[,n] <- ifelse(input.data[,i] == names(temp.table)[j],1,0)
        names(output.data)[n] <- paste(names(input.data)[i],".",names(temp.table)[j],sep="")
        n <- n + 1
      }
    }
  }

  lineage <- data.frame(names(output.data),names.list,var.type,factor.level)
  for (i in 1:4) lineage[,i] <- as.character(lineage[,i])
  names(lineage) <- c("full.name","base.name","type","level")
   
  return(list(dataframe = output.data, lineage = lineage))
}

"mars.plot" <-
function (mars.glm.object,  #the input mars object
   sp.no = 0,               # the species number for multi-response models
   plot.rug=T,              # plot a rug of deciles
   plot.layout = c(3,4),    # the plot layout to use
   file.name = NA)          # giving a file name will send results to a pdf
{

# j leathwick/j elith August 2006
#
# version 3.1 - developed in R 2.3.1 using mda 0.3-1
#
# requires mars.export of leathwick/elith
# 
# takes a mars or mars/glm model and either 
# creates a mars export table (vanilla mars object)
# and works from this or uses the updated mars export table
# stored as the first list item from mars.binomial
# plotting out the fitted functions, amalgamating terms
# for variables and naming the pages as required
#
# caters for multispecies mars models by successively plotting
# all species unless a value other than zero is given for sp.no
#

  max.plots <- plot.layout[1] * plot.layout[2]

  if (is.na(file.name)) {
    use.windows = TRUE }
  else {
    use.windows = FALSE
    pdf(file=file.name,width = 11, height=8)
  }

  if (class(mars.glm.object) == "mars") {  #then we have a mars object
    mars.binomial = FALSE
    model <- mars.glm.object
    xdat <- eval(model$call$x)
    Y <- as.data.frame(eval(model$call$y))
    n.env <- ncol(xdat)
    m.table <- mars.export(mars.glm.object)
  }
  else {
    mars.binomial = TRUE

    dat <- mars.glm.object$mars.call$dataframe
    mars.x <- mars.glm.object$mars.call$mars.x
    xdat <- as.data.frame(eval(parse(text=dat)))
    xdat <- xdat[,mars.x]

    m.table <- mars.glm.object[[1]]

  }

  n.bfs <- length(m.table[,1])
  n.spp <- length(m.table[1,]) - 10
 
  spp.names <- names(m.table)[(10+1):(10+n.spp)]

  if (sp.no == 0) {
    wanted.species <- seq(1:n.spp) 
  }
  else {
    wanted.species <- sp.no
  }

  xrange <- matrix(0,nrow = 2,ncol = ncol(xdat))
  factor.filter <- rep(FALSE,ncol(xdat))
  for (i in 1:ncol(xdat)) factor.filter[i] <- is.vector(xdat[,i])
  xrange[,factor.filter] <- sapply(xdat[,factor.filter], range)
  for (i in wanted.species) {
    n.pages <- 1
    plotit <- rep(TRUE, n.bfs)
    print(paste("plotting responses for ",spp.names[i]),quote=F)
    nplots <- 0
    for (j in 2:n.bfs) {
      if (m.table$names2[j] == "null") {
        if (plotit[j]) {
          varno <- pmatch(as.character(m.table$names1[j]), 
          names(xdat))
          if (factor.filter[varno]) {
            Xi <- seq(xrange[1, varno], xrange[2, varno], 
               length = 100)
            bf <- pmax(0, m.table$signs1[j] * (Xi - m.table$cuts1[j]))
            bf <- bf * m.table[j, i + 10]
            bf <- bf - mean(bf)
          }
          else {
            factor.table <- as.data.frame(table(xdat[,varno]))
            names(factor.table) <- c("levels","coefficients")
            factor.table$coefficients <- 0
            level.no <- match(m.table$levels1[j],factor.table$levels)
            factor.table$coefficients[level.no] <- m.table[j, i + 10]
          }
          if (j < n.bfs) {
            for (k in ((j + 1):n.bfs)) {
              if (m.table$names1[j] == m.table$names1[k] & m.table$names2[k] == "null") {
                if (factor.filter[varno]) {
                  bf.add <- pmax(0, m.table$signs1[k] * 
                      (Xi - m.table$cuts1[k]))
                  bf.add <- bf.add * m.table[k, i + 10]
                  bf <- bf + bf.add
                }
                else {
                  level.no <- match(m.table$levels1[k],factor.table$levels)
                  factor.table$coefficients[level.no] <- m.table[k, i + 10]
                }
                plotit[k] <- FALSE
              }
            }
          }
          if (nplots == 0) {
            if (use.windows) windows(width = 11, height = 8)
              par(mfrow = plot.layout)
            }
            if (factor.filter[varno]) {
              plot(Xi, bf, type = "l", xlab = names(xdat)[varno], ylab = "response")
              if (plot.rug) rug(quantile(xdat[,varno], probs = seq(0, 1, 0.1), na.rm = FALSE))
            }
            else {
              plot(factor.table$levels, factor.table$coefficients, xlab = names(xdat)[varno])
            }
            nplots = nplots + 1
            plotit[j] <- FALSE
          }
        }
        else {
          if (plotit[j]) {
            varno1 <- pmatch(as.character(m.table$names1[j]), names(xdat))
            X1 <- seq(xrange[1, varno1], xrange[2, varno1], length = 20)
            bf1 <- pmax(0, m.table$signs1[j] * (X1 - m.table$cuts1[j]))
            varno2 <- pmatch(as.character(m.table$names2[j]), names(xdat))
            X2 <- seq(xrange[1, varno2], xrange[2, varno2], length = 20)
            bf2 <- pmax(0, m.table$signs2[j] * (X2 - m.table$cuts2[j]))
            zmat <- bf1 %o% bf2
            zmat <- zmat * m.table[j, i + 10]
            if (j < n.bfs) {
              for (k in ((j + 1):n.bfs)) {
                if (m.table$names1[j] == m.table$names1[k] & m.table$names2[j] == m.table$names2[k]) {
                  bf1 <- pmax(0, m.table$signs1[k] * (X1 - m.table$cuts1[k]))
                  bf2 <- pmax(0, m.table$signs2[j] * (X2 - m.table$cuts2[j]))
                  zmat2 <- bf1 %o% bf2
                  zmat2 <- zmat2 * m.table[j, i + 10]
                  zmat = zmat + zmat2
                  plotit[k] <- FALSE
                }
              }
            }
          if (nplots == 0) {
            if (use.windows) windows(width = 11, height = 8)
            par(mfrow = plot.layout)
          }
          persp(x = X1, y = X2, z = zmat, xlab = names(xdat)[varno1], 
                    ylab = names(xdat)[varno2], theta = 45, phi = 25)
          nplots = nplots + 1
        }
      }
      if (nplots == 1) {
        title(paste(spp.names[i], " - page ", n.pages, sep = ""))
      }
      if (nplots == max.plots) {
        nplots = 0
        n.pages <- n.pages + 1
      }
    }
  }
  if (!use.windows) dev.off()
}

"mars.plot.fits" <-
function(mars.glm.object,    # the input mars object
   sp.no = 0,                # allows selection of individual spp for multiresponse models
   mask.presence = FALSE,    # plots out just presence records
   use.factor = FALSE,       # draws plots as factors for faster printing
   plot.layout = c(4,2),     # the default plot layout
   file.name = NA)           # allows plotting to a pdf file
{
#
# j leathwick, j elith - August 2006
#
# version 3.1 - developed in R 2.3.1 using mda 0.3-1
#
# to plot distribution of fitted values in relation to ydat from mars or other p/a models
# allows masking out of absences to enable focus on sites with high predicted values
# fitted values = those from model; raw.values = original y values
# label = text species name; ydat = predictor dataset
# mask.presence forces function to only plot fitted values for presences
# use.factor forces to use quicker printing box and whisker plot
# file.name routes to a pdf file of this name
#

  max.plots <- plot.layout[1] * plot.layout[2]

  if (is.na(file.name)) {    #setup for windows or file output
    use.windows = TRUE 
  }
  else {
    pdf(file.name, width=8, height = 11)
    par(mfrow = plot.layout)
    par(cex = 0.5)
    use.windows = FALSE
  }

  dat <- mars.glm.object$mars.call$dataframe    #get the dataframe name
  dat <- as.data.frame(eval(parse(text=dat)))   #and now the data

  n.cases <- nrow(dat)

  mars.call <- mars.glm.object$mars.call	#and the mars call details
  mars.x <- mars.call$mars.x    
  mars.y <- mars.call$mars.y
  family <- mars.call$family

  xdat <- as.data.frame(dat[,mars.x])
  ydat <- as.data.frame(dat[,mars.y])

  n.spp <- ncol(ydat)
  n.preds <- ncol(xdat)

  fitted.values <- mars.glm.object$fitted.values

  pred.names <- names(dat)[mars.x]
  spp.names <- names(dat)[mars.y]

  if (sp.no == 0) {
    wanted.species <- seq(1:n.spp) 
    }
  else {
    wanted.species <- sp.no
    }

  for (i in wanted.species) {

    if (mask.presence) {
	mask <- ydat[,i] == 1 }
    else {
      mask <- rep(TRUE, length = n.cases) 
    }

    robust.max.fit <- approx(ppoints(fitted.values[mask,i]), sort(fitted.values[mask,i]), 0.99) #find 99%ile value
    nplots <- 0

    for (j in 1:n.preds) {
      if (use.windows & nplots == 0) {
        windows(width = 8, height = 11)
        par(mfrow = plot.layout)
        par(cex = 0.5)
      }
	nplots <- nplots + 1    
      if (is.vector(xdat[,j])) wt.mean <- mean((xdat[mask, j] * fitted.values[mask, i]^5)/mean(fitted.values[mask, i]^5))
        else wt.mean <- 0
	if (use.factor) {
	temp <- factor(cut(xdat[mask, j], breaks = 12))
	if (family == "binomial") {
	  plot(temp, fitted.values[mask,i], xlab = pred.names[j], ylab = "fitted values", ylim = c(0, 1))
      }
	else {
	  plot(temp, fitted.values[mask,i], xlab = pred.names[j], ylab = "fitted values")}
	}
	else {
	  if (family == "binomial") {
	    plot(xdat[mask, j], fitted.values[mask,i], xlab = pred.names[j], ylab = "fitted values", 
					ylim = c(0, 1))
        }
	  else {
          plot(xdat[mask, j], fitted.values[mask,i], xlab = pred.names[j], ylab = "fitted values")
        }
	}
	abline(h = (0.333 * robust.max.fit$y), lty = 2.)
	if (nplots == 1) { 
  	  title(paste(spp.names[i], ", wtm = ", zapsmall(wt.mean, 4.)))}
	else {
	  title(paste("wtm = ", zapsmall(wt.mean, 4.)))}
	  nplots <- ifelse(nplots == max.plots, 0, nplots)
	}
    }
  if (!use.windows) dev.off()
}

"mars.predict" <-
function (mars.glm.object,new.data) 
{
#
# j leathwick, j elith - August 2006
#
# version 3.1 - developed in R 2.3.1 using mda 0.3-1
#
# calculates a mars/glm object in which basis functions are calculated
# using an initial mars model with single or multiple responses 
# data for individual species are then fitted as glms using the 
# common set of mars basis functions with results returned as a list
#
# takes as input a dataset and args selecting x and y variables, and degree of interaction
# along with site and species weights, the CV penalty, and the glm family argument
# the latter would normally be one of "binomial" or "poisson" - "gaussian" could be used
# but in this case the model shouldn't differ from that fitted using mars on its own
#
# naming problem for dataframes fixed - je - 15/12/06
#
# requires mda and leathwick/elith's mars.export
#
  require(mda)

# first recreate both the original mars model and the glm model

# setup input data and create original temporary data

  dataframe.name <- mars.glm.object$mars.call$dataframe  # get the dataframe name
  mars.x <- mars.glm.object$mars.call$mars.x
  mars.y <- mars.glm.object$mars.call$mars.y
  n.spp <- length(mars.y)
  family <- mars.glm.object$mars.call$family
  mars.degree <- mars.glm.object$mars.call$degree
  penalty <- mars.glm.object$mars.call$penalty
  site.weights <- mars.glm.object$weights[[1]]
  spp.weights <- mars.glm.object$weights[[2]]

  print("creating original data frame...",quote=FALSE)

  base.data <- as.data.frame(eval(parse(text = dataframe.name)))

  x.temp <- eval(base.data[, mars.x])                 #form the temporary datasets
  base.names <- names(x.temp)

  xdat <- mars.new.dataframe(x.temp)[[1]]
   
  ydat <- as.data.frame(base.data[, mars.y])
  names(ydat) <- names(base.data)[mars.y]

  assign("xdat", xdat, pos = 1)               #and assign them for later use
  assign("ydat", ydat, pos = 1)

# now create the temporary dataframe for the new data

  print("checking variable matching with new data",quote = FALSE)

  new.names <- names(new.data)

  for (i in 1:length(base.names)) {

    name <- base.names[i]
   
    if (!(name %in% new.names)) {
      print(paste("Variable ",names," missing from new data",sep=""),quote = FALSE)
      return()
    }
  }

  print("and creating temporary dataframe for new data...",quote=FALSE)

  selector <- match(names(x.temp),names(new.data))

  pred.dat <- mars.new.dataframe(new.data[,selector])[[1]]

  assign("pred.dat", pred.dat, pos = 1)               #and assign them for later use

# fit the mars model and extract the basis functions

  print(paste("re-fitting initial mars model for",n.spp,"responses"),quote = FALSE)
  print(paste("using glm family of",family),quote = FALSE)

  mars.glm.object <- mars(x = xdat, y = ydat, degree = mars.degree, w = site.weights, 
    wp = spp.weights, penalty = penalty)

  old.bf.data <- as.data.frame(eval(mars.glm.object$x))
  n.bfs <- ncol(old.bf.data)
  bf.names <- paste("bf", 1:n.bfs, sep = "")
  old.bf.data <- as.data.frame(old.bf.data[,-1])
  names(old.bf.data) <- bf.names[-1]

  new.bf.data <- as.data.frame(mda:::model.matrix.mars(mars.glm.object,pred.dat))
  new.bf.data <- as.data.frame(new.bf.data[,-1])
  names(new.bf.data) <- bf.names[-1]

# now cycle through the species fitting glm models 

  print("fitting glms for individual responses", quote = F)

  prediction <- as.data.frame(matrix(0, ncol = n.spp, nrow = nrow(pred.dat)))
  names(prediction) <- names(ydat)
  standard.errors <- as.data.frame(matrix(0, ncol = n.spp, nrow = nrow(pred.dat)))
  names(standard.errors) <- names(ydat)

  for (i in 1:n.spp) {

    print(names(ydat)[i], quote = FALSE)
    model.glm <- glm(ydat[, i] ~ ., data = old.bf.data, weights = site.weights, 
      family = family, maxit = 100)
    temp <- predict.glm(model.glm,new.bf.data,type="response",se.fit=TRUE)
    prediction[,i] <- temp[[1]]
    standard.errors[,i] <- temp[[2]]
  }

  return(list("prediction"=prediction,"ses"=standard.errors))
}

"roc" <-
function (obsdat, preddat) 
{
# code adapted from Ferrier, Pearce and Watson's code, by J.Elith
#
# see:
# Hanley, J.A. & McNeil, B.J. (1982) The meaning and use of the area
# under a Receiver Operating Characteristic (ROC) curve.
# Radiology, 143, 29-36
#
# Pearce, J. & Ferrier, S. (2000) Evaluating the predictive performance
# of habitat models developed using logistic regression.
# Ecological Modelling, 133, 225-245.
# this is the non-parametric calculation for area under the ROC curve, 
# using the fact that a MannWhitney U statistic is closely related to
# the area
#
    if (length(obsdat) != length(preddat)) 
        stop("obs and preds must be equal lengths")
    n.x <- length(obsdat[obsdat == 0])
    n.y <- length(obsdat[obsdat == 1])
    xy <- c(preddat[obsdat == 0], preddat[obsdat == 1])
    rnk <- rank(xy)
    wilc <- ((n.x * n.y) + ((n.x * (n.x + 1))/2) - sum(rnk[1:n.x]))/(n.x * 
        n.y)
    return(round(wilc, 4))
}

