Source code for phylogeny.models.cfn

"""
Module implementing the Cavender-Farris-Neymann stochastic tree model.

The model is based on the description by the book:
    
    Computational Phylogenetics.
      An introduction to designing methods for phylogeny estimation.
      -- by Tandy Warnow

The CFN model in words from the book:

"The Cavender-Farris-Neyman (CFN) model describes how a trait (which 
 can either be present or absent) evolves down a tree (Cavender, 
 1978; Farris, 1973; Neyman, 1971).

 ...a CFN model has a rooted binary tree T (i.e., a tree in which 
 every node is either a leaf or has two children) with numerical 
 parameters that describe the evolutionary process of a trait. Under 
 the CFN model, the probability of absence (0) or presence (1) is the 
 same at the root, but the state can change on the edges (also called 
 branches) of the tree. Thus, we associate a parameter p(e) to every 
 edge e in the tree, where p(e) denotes the probability that the 
 endpoints of the edge e have different states. In other words, p(e)
 is the probability of changing state (from 1 to 0, or vice-versa)
 ... we require 0 < p(e) < 0.5.

 Under  the  CFN  model,  a  trait  (which  is  also  called  a  
 “character”)  evolves  down  the tree under this random process, and 
 hence attains a state at every node in the tree, and in particular 
 at the leaves of the tree. You could write a computer program for a 
 CFN model tree that would generate 0s and 1s at the leaves of the 
 tree; thus, CFN is a generative model.
 
 Each time you ran the program you would get another pattern of 0s and 
 1s at the leaves of the tree. Thus, if you repeated the process 10 
 times, each time independently generating a new trait down the tree, 
 you would produce sequences of length 10 at the leaves of the tree."
    -- from the book.
"""

import math
import random as rnd
import itertools as itr
import numpy as np
from collections import defaultdict
from ..core import Tree


[docs]def swap(binary): if binary: return 0 else: return 1
# ---
[docs]def random_test(probability): 'Return True with a given probability, otherwise return False.' return (rnd.random() < probability)
# ---
[docs]class CFN_Tree(Tree): """A Cavender-Farris-Neymann stochastic tree model. Usage:: # Create a new empty tree. >>> cfn = CFN_Tree() # Branch randomly until you have 5 leaves. >>> cfn.populate(5) >>> print(cfn) Node: 0, node probability: 0 Node: 1, node probability: 0.2192846999188683 Node: 2, node probability: 0.06144844447962278 Node: 3, node probability: 0.14342505932071808 Node: 4, node probability: 0.1370117188846906 Node: 5, node probability: 0.44060196062669255 Node: 6, node probability: 0.009555798385131098 Node: 7, node probability: 0.48946332859444935 Node: 8, node probability: 0.39505550345399304 /-3 /1| | | /-7 | \4| -0| \-8 | | /-5 \2| \-6 # Evolve 5 traits through the tree >>> sequences = cfn.evolve_traits([1,1,1,1,1]) >>> sequences { 3: [0, 0, 0, 1, 1], 7: [1, 0, 1, 0, 1], 8: [1, 0, 0, 1, 1], 5: [1, 1, 1, 1, 1], 6: [1, 1, 1, 1, 1] } """
[docs] @staticmethod def cfn_metric(probability): "Return the length of an edge with the given probability." return -math.log(1 - 2*probability) / 2
# --- def __str__(self): node_strings = [f"Node: {node.name}, substitution probability: {node.probability}" for node in self.traverse()] return ( '\n'.join(node_strings) + '\n' + self.get_ascii(show_internal=True) ) # ---
[docs] def populate(self, n): 'Populate the tree with nodes and change probabilities.' # Populate the usual way super().populate(n) # Change names and add probabilities for i,node in enumerate(self.traverse()): node.name = i if i > 0: # Add a change probability < 0.5 p = rnd.random() / 2 node.add_feature('probability', p) # The probability in the model is associated to the edge # (branch), in this implementation, we associate the # probability to the node downstream of the edge. # Add the distance from the previous node node.dist = self.cfn_metric(p) # The 'CNF model distance' is an additive # metric representing the average n else: # No probability of change before this node.add_feature('probability', 0)
# ---
[docs] def evolve_traits(self, traits): "Evolve the binary traits through the tree." # The sequences generated sequences = defaultdict(list) for leaf in self.iter_leaves(): path_from_root = list(reversed(leaf.get_ancestors())) path_from_root.append(leaf) # Evolve each trait for trait in traits: # Follow path stochastically final_trait = self.trait_traverse(path_from_root, trait) # Save character's final state sequences[leaf.name].append(final_trait) return dict(sequences)
# ---
[docs] def trait_traverse(self, path_from_root, init): # Follow path stochastically trait = init for node in path_from_root: probability = node.probability if random_test(probability): trait = swap(trait) # Return character's final state return trait
# --- # --- CFN_Tree