Hans-Joerg Bibiko's function Levenshtein would help; cf. below for an example (very clumsy with two loops, but you can tweak that with apply stuff). HTH, STG
levenshtein <- function(string1, string2, case=TRUE, map=NULL) { ######## # levenshtein algorithm in R # # Author : Hans-Joerg Bibiko # Date : 29/06/2006 # Contact : bib...@eva.mpg.de ######## # string1, string2 := strings to compare # case = TRUE := case sensitivity; case = FALSE := case insensitivity # map := character vector of c(regexp1, replacement1, regexp2, replacement2, ...) # example: # map <- c("[aeiou]","V","[^aeiou]","C") := replaces all vowels with V and all others with C # levenshtein("Bank","Bond", map=map) => 0 ######## if(!is.null(map)) { m <- matrix(map, ncol=2, byrow=TRUE) s <- c(ifelse(case, string1, tolower(string1)), ifelse(case, string2, tolower(string2))) for(i in 1:dim(m)[1]) s <- gsub(m[i,1], m[i,2], s) string1 <- s[1] string2 <- s[2] } if(ifelse(case, string1, tolower(string1)) == ifelse(case, string2, tolower(string2))) return(0) s1 <- strsplit(paste(" ", ifelse(case, string1, tolower(string1)), sep=""), NULL)[[1]] s2 <- strsplit(paste(" ", ifelse(case, string2, tolower(string2)), sep=""), NULL)[[1]] l1 <- length(s1) l2 <- length(s2) d <- matrix(nrow = l1, ncol = l2) for(i in 1:l1) d[i,1] <- i-1 for(i in 1:l2) d[1,i] <- i-1 for(i in 2:l1) for(j in 2:l2) d[i,j] <- min((d[i-1,j]+1) , (d[i,j-1]+1) , (d[i-1,j-1]+ifelse(s1[i] == s2[j], 0, 1))) d[l1,l2] } # end of function Hans-Joerg Bibiko's levenshtein # generate names set.seed(1) all.names<-character(10) for (i in 1:10) { all.names[i]<-paste(sample(letters, sample(4:10, 1), replace=T), collapse="") } all.names # generate matrix sims<-matrix(0, nrow=10, ncol=10) attr(sims, "dimnames")<-list(all.names, all.names) # fill matrix (clumsy) for (j in 1:9) { for (k in (j+1):10) { sims[j,k]<-sims[k,j]<-levenshtein(all.names[j], all.names[k]) } } plot(hclust(as.dist(sims))) ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.