Commit fcfe8344 authored by Guilherme Telles's avatar Guilherme Telles
Browse files

sources

parent d8423bdb
// Guilherme P. Telles, 2017.
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <float.h>
#include <errno.h>
#include <string.h>
#include "graph.h"
/**
\brief Live Neighbor-Joining.
\param D An order n strictly lower triangular matrix with distances among
OTUs (as described in arrays.h). Its contents will not be preserved, but it
won't be reallocated.
\param n The number of OTUs.
\returns An undirected acyclic graph where the OTUs are the vertices labeled
from 0 to n-1 and hypothetical nodes are vertices labeled from n to
graph->n-1 (vertices that correspond to OTUs also have their flag field set
to 1), and the flag field in the graph is set to the number of live ancestors.
On failure it returns NULL and sets errno to ENOMEM.
**/
graph* lnj(double** D, int n) {
int i,j,k;
graph* T = 0;
double Sum;
double* sum = 0;
int* root = 0;
int* internal = 0;
if (n < 3) return 0;
// Evaluate sum and Sum:
Sum = 0.0; // the sum of distances among pairs of nodes
sum = malloc(n*sizeof(double)); // the sum of distances from each node
if (!sum) goto NOMEMH;
for (k=0; k<n; k++) {
sum[k] = 0;
for (i=0; i<k; i++)
sum[k] += D[k][i];
for (i=k+1; i<n; i++)
sum[k] += D[i][k];
Sum += sum[k];
}
// The tree:
T = gr_alloc('u',2*n-2);
if (!T) goto NOMEMH;
// Store the label of the node that roots each subtree in an array:
root = (int*) malloc(n*sizeof(int));
if (!root) goto NOMEMH;
for (i=0; i<n; i++) {
root[i] = i;
T->V[i].flag = 1;
}
// An array for recording OTUs that became internal nodes:
internal = calloc(2*n-2,sizeof(int));
if (!internal) goto NOMEMH;
// The next hypo node:
int hyp = n;
while (n > 3) {
// Eval S[i][j] and get minimum:
double smin = DBL_MAX;
int imin = 0, jmin = 0;
for (i=1; i<n; i++) {
for (j=0; j<i; j++) {
double s = (sum[i] + sum[j] - 2*D[i][j])/((double)(2*(n-2)))
+ D[i][j]/2.0
+ (Sum/2.0 - sum[i] - sum[j] + D[i][j])/((double)(n-2));
if (s<smin) {
smin = s;
imin = i;
jmin = j;
}
}
}
// Eval T[i][j][k] and get minimum:
double tmin = DBL_MAX;
int itmin = 0, jtmin = 0, ktmin = 0;
for (i=1; i<n; i++) {
for (j=0; j<i; j++) {
for (k=0; k<n; k++) {
if (k==i || k==j || internal[k]) // An OTU is not taken as an internal node twice.
continue;
double dik = i>k ? D[i][k] : D[k][i];
double djk = j>k ? D[j][k] : D[k][j];
double t = dik + djk + (Sum/2.0 - sum[i] - sum[j] + D[i][j])/((double)(n-3));
if (t < tmin) {
tmin = t;
itmin = i;
jtmin = j;
ktmin = k;
}
}
}
}
//for (i=0; i<n; i++) {
// for (j=0; j<i; j++) {
// printf("%.2lf ",D[i][j]);
// }
// printf("\n");
//}
//
//printf("Sum: %lf\nsum: ",Sum);
//
//for (i=0; i<n; i++)
// printf("%.2lf ",sum[i]);
//printf("\n");
//
//printf("smin %.2lf imin %d id %d jmin %d id %d\n",smin,imin,root[imin],jmin,root[jmin]);
//printf("tmin %.2lf itmin %d id %d jtmin %d id %d ktmin %d id %d\n",
// tmin,itmin,root[itmin],jtmin,root[jtmin],ktmin,root[ktmin]);
if (smin < tmin) {
//printf("join-2 %d and %d by %d\n",root[imin],root[jmin],hyp);
// Evaluate branch lengths:
double dikmin = (sum[imin] - D[imin][jmin])/((double)(n-2));
double djkmin = (sum[jmin] - D[imin][jmin])/((double)(n-2));
double lik = (D[imin][jmin]+dikmin-djkmin)/2.0;
double ljk = (D[imin][jmin]+djkmin-dikmin)/2.0;
// Remove imin and jmin from sum and from Sum:
for (k=0; k<jmin; k++) {
Sum -= 2*D[jmin][k];
sum[k] -= D[jmin][k];
}
for (k=jmin+1; k<n; k++) {
Sum -= 2*D[k][jmin];
sum[k] -= D[k][jmin];
}
for (k=0; k<imin; k++) {
Sum -= 2*D[imin][k];
sum[k] -= D[imin][k];
}
for (k=imin+1; k<n; k++) {
Sum -= 2*D[k][imin];
sum[k] -= D[k][imin];
}
Sum += 2*D[imin][jmin];
// The new hypo node goes in jmin:
if (!gr_add_edge(T,root[imin],hyp,lik) || !gr_add_edge(T,root[jmin],hyp,ljk))
goto NOMEMH;
root[jmin] = hyp;
internal[jmin] = 1;
hyp++;
sum[jmin] = 0;
for (k=0; k<jmin; k++)
if (k != imin) {
D[jmin][k] = (D[jmin][k] + (k<imin ? D[imin][k] : D[k][imin]) - D[imin][jmin]) / 2.0;
Sum += 2*D[jmin][k];
sum[k] += D[jmin][k];
sum[jmin] += D[jmin][k];
}
for (k=jmin+1; k<n; k++)
if (k != imin) {
D[k][jmin] = (D[k][jmin] + (k<imin ? D[imin][k] : D[k][imin]) - D[imin][jmin]) / 2.0;
Sum += 2*D[k][jmin];
sum[k] += D[k][jmin];
sum[jmin] += D[k][jmin];
}
// Move n-1 to imin:
if (n-1 != imin) {
for (k=0; k<imin; k++)
D[imin][k] = D[n-1][k];
for (k=imin+1; k<n-1; k++)
D[k][imin] = D[n-1][k];
sum[imin] = sum[n-1];
root[imin] = root[n-1];
internal[imin] = internal[n-1];
}
n--;
}
else {
//printf("join-3 %d and %d by %d\n",root[itmin],root[jtmin],root[ktmin]);
// Eval branch lengths Lik and Ljk:
double lik = itmin>ktmin ? D[itmin][ktmin] : D[ktmin][itmin];
double ljk = jtmin>ktmin ? D[jtmin][ktmin] : D[ktmin][jtmin];
//if (lik<0) lik = 0;
//if (ljk<0) ljk = 0;
internal[ktmin] = 1;
T->flag++;
// Update tree:
if (!gr_add_edge(T,root[itmin],root[ktmin],lik) ||
!gr_add_edge(T,root[jtmin],root[ktmin],ljk))
goto NOMEMH;
// Remove imin and jmin from sum and from Sum:
for (k=0; k<jtmin; k++) {
Sum -= 2*D[jtmin][k];
sum[k] -= D[jtmin][k];
}
for (k=jtmin+1; k<n; k++) {
Sum -= 2*D[k][jtmin];
sum[k] -= D[k][jtmin];
}
for (k=0; k<itmin; k++) {
Sum -= 2*D[itmin][k];
sum[k] -= D[itmin][k];
}
for (k=itmin+1; k<n; k++) {
Sum -= 2*D[k][itmin];
sum[k] -= D[k][itmin];
}
Sum += 2*D[itmin][jtmin];
// Move n-1 to itmin:
if (n-1 != itmin) {
for (k=0; k<itmin; k++)
D[itmin][k] = D[n-1][k];
for (k=itmin+1; k<n-1; k++)
D[k][itmin] = D[n-1][k];
root[itmin] = root[n-1];
sum[itmin] = sum[n-1];
internal[itmin] = internal[n-1];
}
// Move n-2 to jtmin:
if (n-2 != jtmin) {
for (k=0; k<jtmin; k++)
D[jtmin][k] = D[n-2][k];
for (k=jtmin+1; k<n-2; k++)
D[k][jtmin] = D[n-2][k];
root[jtmin] = root[n-2];
sum[jtmin] = sum[n-2];
internal[jtmin] = internal[n-2];
}
n -= 2;
}
}
if (n == 3) { // 3 points:
//printf("join %d %d %d by %d\n",root[0],root[1],root[2],hyp);
double x = (D[1][0]+D[2][0]-D[2][1])/2.0;
double y = (D[1][0]+D[2][1]-D[2][0])/2.0;
double z = (D[2][0]+D[2][1]-D[1][0])/2.0;
if (!gr_add_edge(T,root[0],hyp,x) ||
!gr_add_edge(T,root[1],hyp,y) ||
!gr_add_edge(T,root[2],hyp,z))
goto NOMEMH;
}
else { // 2 points
//printf("join %d %d\n",root[0],root[1]);
if (!gr_add_edge(T,root[0],root[1],D[1][0]))
goto NOMEMH;
}
free(internal);
free(root);
free(sum);
n = (T->n+2)/2;
T->n = (n-T->flag)*2-2;
return T;
NOMEMH:
free(internal);
free(sum);
free(root);
gr_free(T);
errno = ENOMEM;
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment