###################Hopkins and St. John 2018#####################
##function for applying proposed family of metrics. Input: character matrix in Claddis object format; table showing hierarchical relationships between characters; alpha value.
##Input required:
##matr = character matrix in Claddis object format (see "ReadMorphNexus")
##type = table with three columns with headings "Char", "Type", and "Sub"
##      "Char" is the character number
##      "Type" is the type of character, either "S" for secondary or "T" for tertiary.  In our experience, quaternary characters are rarely to never used
##      "Sub" is the character numeber that the secondary or tertiary character is contingent on
##alpha = desired alpha value.  Should vary between 0 and 1. 
##      If alpha = 0, only primary characters will contribute to dissimilarity estimate
##      If alpha = 1, then shared primary characters will be weighted by the secondary characters shared; same for secondary characters with tertiary characters
##      See text for further explanation.

##note that delta{ijk} = 1 when characters are comparable, consistent with Gower's initial formulation. It may be preferable to replace the total number of 
#comparable characters with the maximum character total possible (Lloyd's MORB metric) especially if there are ordered characters. This is not implemented here
#but could be done by modifying lines 90 and 212 for the new metric and Gower's, respectively 
#################

alpha.coefficient<-function(matr,type,alpha){
  pairs<-combn(nrow(matr$matrix),2)
  HSJ<-matrix(NA,nrow=nrow(matr$matrix),ncol=nrow(matr$matrix))
  alpha<-alpha
  for (i in 1:ncol(pairs)){
    sim.temp<-matrix(abs(suppressWarnings(as.numeric(matr$matrix[pairs[1,i],]))-
                           suppressWarnings(as.numeric(matr$matrix[pairs[2,i],]))))
    #correct values for ordered multistate characters
    if (any(matr$ordering=='ord')){
      m.o<-which(matr$ordering=='ord')
      sim.temp[m.o]<-abs(suppressWarnings(as.numeric(matr$matrix[pairs[1,i],][m.o]))-
                           suppressWarnings(as.numeric(matr$matrix[pairs[2,i],][m.o])))#/matr$max.vals[m.o]
    }
    #correct values for unordered multistate characters
    if (any(matr$ordering=='unord'&matr$max.vals>1)){
      m.u<-which(matr$ordering=='unord'&matr$max.vals>1)
      sim.temp[m.u]<-replace(sim.temp[m.u],which(sim.temp[m.u]>1),1)
    }
    #correct polymorphic characters
    if (length(grep("&", unique(c(matr$matrix[pairs[1,i],], matr$matrix[pairs[2,i],])))) > 0){
      polym.states <- sort(c(grep("&", matr$matrix[pairs[1,i],]), grep("&", matr$matrix[pairs[2,i],])))
      for (v in 1:length(polym.states)) {
        char.state1<-strsplit(matr$matrix[pairs[1,i],][polym.states[v]],"&")[[1]]
        char.state2<-strsplit(matr$matrix[pairs[2,i],][polym.states[v]],"&")[[1]]
        int.value <- intersect(char.state1, char.state2)
        if (length(int.value)>0){
          sim.temp[polym.states[v]]<-0
        }
        if (length(int.value)==0 & anyNA(c(char.state1,char.state2))==FALSE){
          if (matr$ordering[polym.states[v]]=='unord'){
            sim.temp[polym.states[v]]<-1
          }
          if (matr$ordering[polym.states[v]]=='ord'){
            pairs.poly<-matrix(0,nrow=length(char.state1),ncol=length(char.state2))
            for (m in 1:length(char.state1)){
              for (n in 1:length(char.state2)){
                pairs.poly[m,n]<-abs(as.numeric(char.state1[m])-as.numeric(char.state2[n]))
              }
            }
            sim.temp[polym.states[v]]<-min(pairs.poly)
          }
        }
      }
    }
    #account for Tertiary characters (by weighting)
    if (any(type$Type=='T')){
      te<-which(type$Type=='T')
      for (j in 1:length(unique(type$Sub[te]))){
        te.sub<-which(type$Sub==as.character(unique(type$Sub[te])[j]))
        te.sim<-sim.temp[te.sub]
        s.te<-as.numeric(as.character(unique(na.omit(type$Sub[te])))[j])
        if (length(na.omit(te.sim))>0) {
          sim.temp[s.te]<-1-(alpha*(1-(sum(na.omit(te.sim))/length(na.omit(te.sim))))+(1-alpha))  #alpha is applied as if it were a similarity measure, therefore the distance is converted to a similarity by substracted from one, then the whole thing is substracted from one to convert back to dissimilarity
          sim.temp[te.sub]<-NA
        }
      }
    }
    #account for secondary characters (by weighting)
    if (any(type$Type=='S')){
      s<-which(type$Type=='S')
      for (j in 1:length(unique(type$Sub[s]))){
        s.sub<-which(type$Sub==as.character(unique(type$Sub[s])[j]))
        s.sim<-sim.temp[s.sub]
        p.s<-as.numeric(as.character(unique(na.omit(type$Sub[s])))[j])
        if (length(na.omit(s.sim))>0) {
          sim.temp[p.s]<-1-(alpha*(1-(sum(na.omit(s.sim))/length(na.omit(s.sim))))+(1-alpha))
          sim.temp[s.sub]<-NA
        }
      }
    }
    #calculate total dissimilarity
    wt.comp.char<-sum(na.omit(cbind(sim.temp,matr$weights))[,2])
    HSJ[pairs[1,i],pairs[2,i]]<-HSJ[pairs[2,i],pairs[1,i]]<-sum(na.omit(sim.temp*matr$weights))/wt.comp.char
  }
  diag(HSJ)<-0
  return(HSJ)
}



###############
##Implementation of Will's GED
##matr = character matrix in Claddis object format (see "ReadMorphNexus")

GED<-function(matr){
  pairs<-combn(nrow(matr$matrix),2)
  
  GED<-matrix(NA,nrow=nrow(matr$matrix),ncol=nrow(matr$matrix))
  for (i in 1:ncol(pairs)){
    sim.temp<-matrix(abs(suppressWarnings(as.numeric(matr$matrix[pairs[1,i],]))-
                           suppressWarnings(as.numeric(matr$matrix[pairs[2,i],]))))
    #correct values for ordered multistate characters
    if (any(matr$ordering=='ord')){
      m.o<-which(matr$ordering=='ord')
      sim.temp[m.o]<-abs(suppressWarnings(as.numeric(matr$matrix[pairs[1,i],][m.o]))-
                           suppressWarnings(as.numeric(matr$matrix[pairs[2,i],][m.o])))
    }
    #correct values for unordered multistate characters
    if (any(matr$ordering=='unord'&matr$max.vals>1)){
      m.u<-which(matr$ordering=='unord'&matr$max.vals>1)
      sim.temp[m.u]<-replace(sim.temp[m.u],which(sim.temp[m.u]>1),1)
    }
    #correct polymorphic characters
    if (length(grep("&", unique(c(matr$matrix[pairs[1,i],], matr$matrix[pairs[2,i],])))) > 0){
      polym.states <- sort(c(grep("&", matr$matrix[pairs[1,i],]), grep("&", matr$matrix[pairs[2,i],])))
      for (v in 1:length(polym.states)) {
        char.state1<-strsplit(matr$matrix[pairs[1,i],][polym.states[v]],"&")[[1]]
        char.state2<-strsplit(matr$matrix[pairs[2,i],][polym.states[v]],"&")[[1]]
        int.value <- intersect(char.state1, char.state2)
        if (length(int.value)>0){
          sim.temp[polym.states[v]]<-0
        }
        if (length(int.value)==0 & anyNA(c(char.state1,char.state2))==FALSE){
          if (matr$ordering[polym.states[v]]=='unord'){
            sim.temp[polym.states[v]]<-1
          }
          if (matr$ordering[polym.states[v]]=='ord'){
            pairs.poly<-matrix(0,nrow=length(char.state1),ncol=length(char.state2))
            for (m in 1:length(char.state1)){
              for (n in 1:length(char.state2)){
                pairs.poly[m,n]<-abs(as.numeric(char.state1[m])-as.numeric(char.state2[n]))
              }
            }
            sim.temp[polym.states[v]]<-min(pairs.poly)
          }
        }
      }
    }
    
    #replace missing values
    Sij<-mean(na.omit(sim.temp))
    sim.temp[which(is.na(sim.temp)==TRUE)]<-Sij
    
    #calculate total dissimilarity
    #wt.comp.char<-sum(na.omit(cbind(sim.temp,matr$weights))[,2])
    GED[pairs[1,i],pairs[2,i]]<-GED[pairs[2,i],pairs[1,i]]<-sqrt(sum((sim.temp*matr$weights)^2))
  }
  diag(GED)<-0
  return(GED)
}



###############
##Implementation of Gower's coefficient of dissimilarity
##matr = character matrix in Claddis object format (see "ReadMorphNexus")
#Note: this script and functions in Claddis do not standardize ordered multistate by range (as if they were quantitative; this follows Wills 1998)
#If weights are according to P&K (part of the Claddis object format), this returns P&K weighting scheme

gower<-function(matr){
  pairs<-combn(nrow(matr$matrix),2)
  gower<-matrix(NA,nrow=nrow(matr$matrix),ncol=nrow(matr$matrix))
  for (i in 1:ncol(pairs)){
    sim.temp<-matrix(abs(suppressWarnings(as.numeric(matr$matrix[pairs[1,i],]))-
                           suppressWarnings(as.numeric(matr$matrix[pairs[2,i],]))))
    #correct values for ordered multistate characters
    if (any(matr$ordering=='ord')){
      m.o<-which(matr$ordering=='ord')
      sim.temp[m.o]<-abs(suppressWarnings(as.numeric(matr$matrix[pairs[1,i],][m.o]))-
                           suppressWarnings(as.numeric(matr$matrix[pairs[2,i],][m.o])))
    }
    #correct values for unordered multistate characters
    if (any(matr$ordering=='unord'&matr$max.vals>1)){
      m.u<-which(matr$ordering=='unord'&matr$max.vals>1)
      sim.temp[m.u]<-replace(sim.temp[m.u],which(sim.temp[m.u]>1),1)
    }
    #correct polymorphic characters
    if (length(grep("&", unique(c(matr$matrix[pairs[1,i],], matr$matrix[pairs[2,i],])))) > 0){
      polym.states <- sort(c(grep("&", matr$matrix[pairs[1,i],]), grep("&", matr$matrix[pairs[2,i],])))
      for (v in 1:length(polym.states)) {
        char.state1<-strsplit(matr$matrix[pairs[1,i],][polym.states[v]],"&")[[1]]
        char.state2<-strsplit(matr$matrix[pairs[2,i],][polym.states[v]],"&")[[1]]
        int.value <- intersect(char.state1, char.state2)
        if (length(int.value)>0){
          sim.temp[polym.states[v]]<-0
        }
        if (length(int.value)==0 & anyNA(c(char.state1,char.state2))==FALSE){
          if (matr$ordering[polym.states[v]]=='unord'){
            sim.temp[polym.states[v]]<-1
          }
          if (matr$ordering[polym.states[v]]=='ord'){
            pairs.poly<-matrix(0,nrow=length(char.state1),ncol=length(char.state2))
            for (m in 1:length(char.state1)){
              for (n in 1:length(char.state2)){
                pairs.poly[m,n]<-abs(as.numeric(char.state1[m])-as.numeric(char.state2[n]))
              }
            }
            sim.temp[polym.states[v]]<-min(pairs.poly)
          }
        }
      }
    }
    #calculate total dissimilarity
    wt.comp.char<-sum(na.omit(cbind(sim.temp,matr$weights))[,2])
    gower[pairs[1,i],pairs[2,i]]<-gower[pairs[2,i],pairs[1,i]]<-sum(na.omit(sim.temp*matr$weights))/wt.comp.char
  }
  diag(gower)<-0
  return(gower)
}