Here's chainsaw(). It also requires sourcing a few other
functions. Please cite me if you use it :-).
=========================
chainsaw <- function(tr, timepoint=10)
{
# Take a tree and saw it off evenly across a certain timepoint.
# This removes any tips above the timepoint, and replaces them
# with a single tip representing the lineage crossing
# the timepoint (with a new tip name).
# Get the tree in a table
tr_table = prt(tr, printflag=FALSE)
# Find the tips that are less than 10 my old and drop them
TF_exists_more_recently_than_10mya = tr_table$time_bp <
timepoint
# Get the corresponding labels
labels_for_tips_existing_more_recently_than_10mya =
tr_table$label[ TF_exists_more_recently_than_10mya == TRUE ]
###########################################
# Draft chainsaw function
###########################################
# loop through the branches that cross 10 mya
# get a list of the edge start/stops in the phylogeny's edges
edge_times_bp = get_edge_times_before_present(tr)
# which of these branches cross 10 mya?
edges_start_earlier_than_10mya = edge_times_bp[, 1] > timepoint
edges_end_later_than_10mya = edge_times_bp[, 2] <= timepoint
edges_to_chainsaw = edges_start_earlier_than_10mya +
edges_end_later_than_10mya == 2
# then, for each of these edges, figure out how many tips
exist descending from it
nodes_to_chainsaw = tr$edge[, 2][edges_to_chainsaw]
nodes_to_chainsaw = nodes_to_chainsaw[nodes_to_chainsaw >
length(tr$tip.label)]
# create a copy of the tree to chainsaw
tree_to_chainsaw = tr
for (i in 1:length(nodes_to_chainsaw))
{
tmp_subtree = extract.clade(tr, nodes_to_chainsaw[i])
#print(tmp_subtree$tip.label)
tmp_number_of_tips = length(tmp_subtree$tip.label)
#print(tmp_number_of_tips)
# number of tips to drop = (numtips -1)
numtips_to_drop = tmp_number_of_tips - 1
# tips_to_drop
tmp_labels = tmp_subtree$tip.label
labels_to_drop = tmp_labels[1:numtips_to_drop]
# new label
label_kept_num = length(tmp_labels)
label_kept = tmp_labels[label_kept_num]
new_label = paste("CA_", label_kept, "+", numtips_to_drop,
"_tips", sep="")
tree_to_chainsaw$tip.label[tree_to_chainsaw$tip.label ==
label_kept] = new_label
# chop off e.g. 2 of the 3 tips
tree_to_chainsaw = drop.tip(tree_to_chainsaw, labels_to_drop)
}
#plot(tree_to_chainsaw)
#axisPhylo()
tree_to_chainsaw_table = prt(tree_to_chainsaw, printflag=FALSE)
tree_to_chainsaw_table_tips_TF_time_bp_LT_10my =
tree_to_chainsaw_table$time_bp < timepoint
tmp_edge_lengths =
tree_to_chainsaw_table$edge.length[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]
times_bp_for_edges_to_chainsaw =
tree_to_chainsaw_table$time_bp[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]
adjustment = times_bp_for_edges_to_chainsaw - timepoint
revised_tmp_edge_lengths = tmp_edge_lengths + adjustment
tree_to_chainsaw_table$edge.length[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]
= revised_tmp_edge_lengths
# revised
ordered_nodenames = get_nodenums(tree_to_chainsaw)
parent_branches =
get_indices_where_list1_occurs_in_list2(ordered_nodenames,
tree_to_chainsaw$edge[,2])
NA_false = is.not.na(tree_to_chainsaw_table$edge.length)
tree_to_chainsaw$edge.length[parent_branches[NA_false]] =
tree_to_chainsaw_table$edge.length[NA_false]
return(tree_to_chainsaw)
}
# print tree in hierarchical format
prt <- function(t, printflag=TRUE, relabel_nodes = FALSE,
time_bp_digits=7)
{
# assemble beginning table
# check if internal node labels exist
if ("node.label" %in% attributes(t)$names == FALSE)
{
rootnum = get_nodenum_structural_root(t)
new_node_labels = paste("inNode",
rootnum:(rootnum+t$Nnode-1), sep="")
t$node.label = new_node_labels
}
# or manually relabel the internal nodes, if desired
if (relabel_nodes == TRUE)
{
rootnum = get_nodenum_structural_root(t)
new_node_labels = paste("inNode",
rootnum:(rootnum+t$Nnode-1), sep="")
t$node.label = new_node_labels
}
labels = c(t$tip.label, t$node.label)
ordered_nodenames = get_nodenums(t)
#nodenums = 1:length(labels)
node.types1 = rep("tip", length(t$tip.label))
node.types2 = rep("internal", length(t$node.label))
node.types2[1] = "root"
node.types = c(node.types1, node.types2)
# These are the index numbers of the edges below each node
parent_branches =
get_indices_where_list1_occurs_in_list2(ordered_nodenames,
t$edge[,2])
#parent_edges = parent_branches
brlen_to_parent = t$edge.length[parent_branches]
parent_nodes = t$edge[,1][parent_branches]
daughter_nodes = lapply(ordered_nodenames, get_daughters, t)
# print out the structural root, if desired
root_nodenum = get_nodenum_structural_root(t)
tmpstr = paste("prt(t): root=", root_nodenum, "\n", sep="")
prflag(tmpstr, printflag=printflag)
levels_for_nodes = unlist(lapply(ordered_nodenames,
get_level, t))
#tmplevel = get_level(23, t)
#print(tmplevel)
#height above root
hts_at_end_of_branches_aka_at_nodes = t$edge.length
hts_at_end_of_branches_aka_at_nodes = get_all_node_ages(t)
h = hts_at_end_of_branches_aka_at_nodes
# times before present, below (ultrametric!) tips
# numbers are positive, i.e. in millions of years before
present
# i.e. mybp, Ma
times_before_present = get_max_height_tree(t) - h
# fill in the ages of each node for the edges
edge_ages = t$edge
edge_ages[,1] = h[t$edge[,1]] # bottom of branch
edge_ages[,2] = h[t$edge[,2]] # top of branch
# fill in the times before present of each node for the edges
edge_times_bp = t$edge
edge_times_bp[,1] = times_before_present[t$edge[,1]] #
bottom of branch
edge_times_bp[,2] = times_before_present[t$edge[,2]] # top
of branch
tmpdtf = cbind(1:length(ordered_nodenames),
ordered_nodenames, levels_for_nodes, node.types,
parent_branches, brlen_to_parent, parent_nodes,
daughter_nodes, h, round(times_before_present,
digits=time_bp_digits), labels)
dtf = as.data.frame(tmpdtf, row.names=NULL)
# nd = node
# edge.length is the same as brlen_2_parent
names(dtf) = c("node", "ord_ndname", "node_lvl",
"node.type", "parent_br", "edge.length", "ancestor",
"daughter_nds", "node_ht", "time_bp", "label")
# convert the cols from class "list" to some natural class
dtf = unlist_dtf_cols(dtf, printflag=FALSE)
# print if desired
prflag(dtf, printflag=printflag)
#tree_strings = c()
#root_str = get_node_info(root_nodenum, t)
return(dtf)
}
get_edge_times_before_present <- function(t)
{
#height above root
hts_at_end_of_branches_aka_at_nodes = t$edge.length
hts_at_end_of_branches_aka_at_nodes = get_all_node_ages(t)
h = hts_at_end_of_branches_aka_at_nodes
# times before present, below (ultrametric!) tips
# numbers are positive, i.e. in millions of years before
present
# i.e. mybp, Ma
times_before_present = get_max_height_tree(t) - h
# fill in the ages of each node for the edges
edge_ages = t$edge
edge_ages[,1] = h[t$edge[,1]] # bottom of branch
edge_ages[,2] = h[t$edge[,2]] # top of branch
# fill in the times before present of each node for the edges
edge_times_bp = t$edge
edge_times_bp[,1] = times_before_present[t$edge[,1]] #
bottom of branch
edge_times_bp[,2] = times_before_present[t$edge[,2]] # top
of branch
return(edge_times_bp)
}
extract.clade <- function(phy, node, root.edge = 0,
interactive = FALSE)
{
Ntip <- length(phy$tip.label)
ROOT <- Ntip + 1
Nedge <- dim(phy$edge)[1]
wbl <- !is.null(phy$edge.length)
if (interactive) node <- identify(phy)$nodes else {
if (length(node) > 1) {
node <- node[1]
warning("only the first value of 'node' has
been considered")
}
if (is.character(node)) {
if (is.null(phy$node.label))
stop("the tree has no node labels")
node <- which(phy$node.label %in% node) + Ntip
}
if (node <= Ntip)
stop("node number must be greater than the
number of tips")
}
if (node == ROOT) return(phy)
phy <- reorder(phy) # insure it is in cladewise order
root.node <- which(phy$edge[, 2] == node)
start <- root.node + 1 # start of the clade looked for
anc <- phy$edge[root.node, 1] # the ancestor of 'node'
next.anc <- which(phy$edge[-(1:start), 1] <= anc) #
find the next occurence of 'anc' or an 'older' node
keep <- if (length(next.anc)) start + 0:(next.anc[1] -
1) else start:Nedge
if (root.edge) {
NewRootEdge <- phy$edge.length[root.node]
root.edge <- root.edge - 1
while (root.edge) {
if (anc == ROOT) break
i <- which(phy$edge[, 2] == anc)
NewRootEdge <- NewRootEdge + phy$edge.length[i]
root.edge <- root.edge - 1
anc <- phy$edge[i, 1]
}
if (root.edge && !is.null(phy$root.edge))
NewRootEdge <- NewRootEdge + phy$root.edge
phy$root.edge <- NewRootEdge
}
phy$edge <- phy$edge[keep, ]
if (wbl) phy$edge.length <- phy$edge.length[keep]
TIPS <- phy$edge[, 2] <= Ntip
tip <- phy$edge[TIPS, 2]
phy$tip.label <- phy$tip.label[sort(tip)] # <- added
sort to avoid shuffling of tip labels (2010-07-21)
## keep the ordering so no need to reorder tip.label:
phy$edge[TIPS, 2] <- order(tip)
if (!is.null(phy$node.label))
phy$node.label <-
phy$node.label[sort(unique(phy$edge[, 1])) - Ntip]
Ntip <- length(phy$tip.label)
phy$Nnode <- dim(phy$edge)[1] - Ntip + 1L
## The block below renumbers the nodes so that they conform
## to the "phylo" format -- same as in root()
newNb <- integer(Ntip + phy$Nnode)
newNb[node] <- Ntip + 1L
sndcol <- phy$edge[, 2] > Ntip
## executed from right to left, so newNb is modified
before phy$edge:
phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <-
(Ntip + 2):(Ntip + phy$Nnode)
phy$edge[, 1] <- newNb[phy$edge[, 1]]
phy
}
# this returns the NUMBERS identifying each node
get_nodenums <- function(t)
{
# get just the unique node numbers from the edge list (left
column: start node; right column: end node):
nodenames = unique(c(t$edge))
ordered_nodenames = nodenames[order(nodenames)]
return(ordered_nodenames)
}
# NOTE!!! THESE MATCH FUNCTIONS JUST RETURN THE *FIRST*
MATCH, *NOT* ALL MATCHES
# (argh)
# return indices in 2nd list matching the first list
# It WILL return one match for each item in the list, though...
get_indices_where_list1_occurs_in_list2 <- function(list1,
list2)
{
match_indices = match(list1, list2)
return(match_indices)
}
is.not.na <- function(x)
{
return(is.na(x) == FALSE)
}
=========================
On 7/12/11 10:06 PM, David Bapst wrote:
Do you want a function that gives you a tree of taxa observed in a time bin
(like, in the fossil record, as in Ruta et al., 2007)? Or gives you the tree
of the lineages extant at point in time based on some global phylogeny? If
that is what you want, the following function I wrote for doing time-slices
on trees should do that. My most recent version (not posted) has some
additions so that the same time-slice can be taken across multiple trees
that may have the root in different place. (These modifications use the
earliest fossil species as an anchor time.)
This version of the code seems to work okay, for example try:
tree1<-rtree(500)
timeslice.phy(tree1,slice.time=4,plot=T)
Let me know if this works alright,
-Dave, UChicago
timeslice.phy<-function(ttree,slice.time,plot=T){
#take a phylogeny and produce a phylogenetic 'slice' at time X
#makes an ultrametric tree, as if made of lineages extant at time X
#where X is a point in time after root time = 0
require(ape)
tslice<-slice.time #time from root to slice time
#first let's drop all edges that branch later than the slice
#make them all single lineages by dropping all but one taxon
dnode<-dist.nodes(ttree)[,Ntip(ttree)+1]
#identify the ancestor nodes of edges which cross the tslice
cedge<-which(sapply(1:Nedge(ttree),function(x)
any(ttree$edge[x,1]==which(dnode<tslice))
& any(ttree$edge[x,2]==which(dnode>=tslice))))
droppers<-numeric()
for(i in 1:length(cedge)){
desc<-ttree$edge[cedge[i],2]
if(desc>Ntip(ttree)){ #if an internal edge that goes past the
tslice
desctip<-unlist(prop.part(ttree)[desc-Ntip(ttree)]) #drop all
but one tip
droppers<-c(droppers,desctip[-1])
}}
stree<-drop.tip(ttree,droppers)
#which edges cross over tslice?
dnode<-dist.nodes(stree)[,Ntip(stree)+1]
cedge<-sapply(1:Nedge(stree),function(x)
any(stree$edge[x,2]==which(dnode>=tslice)))
cnode_depth<-dnode[stree$edge[cedge,1]]
stree$edge.length[cedge]<-tslice-cnode_depth
#last, let's drop all terminal taxa that are less than the tslice
dnode<-dist.nodes(stree)[,Ntip(stree)+1]
droppers<-which((dnode[1:Ntip(stree)]+0.1)<tslice)
stree<-drop.tip(stree,droppers)
if(plot){layout(matrix(1:2,,2));plot(ladderize(ttree),show.tip.label=F);axisPhylo();plot(stree,show.tip.label=F)}
stree
}
On Tue, Jul 12, 2011 at 3:19 PM,<ppi...@uniroma3.it> wrote:
Hi all,
someone knows a smart coding to cut a tree in order to
retain only taxa present in a given time interval bin?
Thanks in advance for any suggestion
best
paolo
_______________________________________________
R-sig-phylo mailing list
R-sig-phylo@r-project.org
https://stat.ethz.ch/mailman/listinfo/r-sig-phylo
--
====================================================
Nicholas J. Matzke
Ph.D. Candidate, Graduate Student Researcher
Huelsenbeck Lab
Center for Theoretical Evolutionary Genomics
4151 VLSB (Valley Life Sciences Building)
Department of Integrative Biology
University of California, Berkeley
Graduate Student Instructor, IB200B
Principles of Phylogenetics: Ecology and Evolution
http://ib.berkeley.edu/courses/ib200b/
http://phylo.wikidot.com/
Lab websites:
http://ib.berkeley.edu/people/lab_detail.php?lab=54
http://fisher.berkeley.edu/cteg/hlab.html
Dept. personal page:
http://ib.berkeley.edu/people/students/person_detail.php?person=370
Lab personal page:
http://fisher.berkeley.edu/cteg/members/matzke.html
Lab phone: 510-643-6299
Dept. fax: 510-643-6264
Cell phone: 510-301-0179
Email: mat...@berkeley.edu
Mailing address:
Department of Integrative Biology
3060 VLSB #3140
Berkeley, CA 94720-3140
-----------------------------------------------------
"[W]hen people thought the earth was flat, they were wrong.
When people thought the earth was spherical, they were
wrong. But if you think that thinking the earth is spherical
is just as wrong as thinking the earth is flat, then your
view is wronger than both of them put together."
Isaac Asimov (1989). "The Relativity of Wrong." The
Skeptical Inquirer, 14(1), 35-44. Fall 1989.
http://chem.tufts.edu/AnswersInScience/RelativityofWrong.htm
_______________________________________________
R-sig-phylo mailing list
R-sig-phylo@r-project.org
https://stat.ethz.ch/mailman/listinfo/r-sig-phylo