Commit 363199cf authored by Guilherme Telles's avatar Guilherme Telles
Browse files

sources

parent d5fd94f8
#CC = /usr/local/bin/gcc
CFLAGS = -O3 -Wall -Wno-char-subscripts -Wno-unused-function
LFLAGS = -lm
all: live-neighbor-joining
arrays.o:
distance.o:
dynarray.o:
graph.o:
live-nj.o:
nj.o:
utils.o:
live-neighbor-joining: live-neighbor-joining.c graph.o distance.o utils.o dynarray.o arrays.o live-nj.o
$(CC) $(CFLAGS) $(LFLAGS) -o live-neighbor-joining live-neighbor-joining.c graph.o distance.o utils.o dynarray.o arrays.o live-nj.o
neighbor-joining: neighbor-joining.c graph.o distance.o utils.o dynarray.o arrays.o nj.o
$(CC) $(CFLAGS) $(LFLAGS) -o neighbor-joining neighbor-joining.c graph.o distance.o utils.o dynarray.o arrays.o nj.o
clean:
-rm -f *.o neighbor-joining live-neighbor-joining
// Guilherme P. Telles, 2010-2017.
#include <stdlib.h>
#include <errno.h>
#include "arrays.h"
/**
\brief Allocates a lower triangular matrix.
This function allocates an order n lower triangular matrix of a given type.
\param type The type of matrix elements.
Implemented types are 'i' (int) 'd' (double).
\return On success it returns the address of the new ltm. If the matrix could
not be created then returns 0 and errno remains set by malloc() on failure.
**/
void** ltm_alloc(char type, unsigned n) {
void** A;
if (type == 'i')
A = malloc(n*sizeof(int*));
else if (type == 'd')
A = malloc(n*sizeof(double*));
else
A = 0;
if (!A)
return 0;
A[0] = 0;
int i;
for (i=1; i<n; i++) {
if (type == 'i')
A[i] = malloc(i*sizeof(int));
else if (type == 'd')
A[i] = malloc(i*sizeof(double));
if (!A[i]) {
int err = errno;
while (--i)
free(A[i]);
free(A);
errno = err;
return 0;
}
}
return A;
}
/**
\brief Release a lower triangular matrix.
This function releases a lower triangular matrix with n rows.
**/
void ltm_free(void** M, unsigned n) {
m_free(M,n);
}
/**
\brief Free a ragged array.
m_free() frees a ragged array with n rows.
**/
void m_free(void** M, unsigned n) {
if (!M) return;
int i;
for (i=0; i<n; i++)
free(M[i]);
free(M);
}
/**
\file arrays.h
\brief Functions for arrays and ragged arrays.
Throughout the functions in this file a lower triangular matrix (ltm) of
order n is a ragged array with the following form:
<pre>
null
M[1][0]
M[2][0] M[2][1]
M[3][0] M[3][1] M[3][2]
...
M[n-1][0] M[n-1][1] M[n-1][2] ... M[n-1][n-2]
</pre>
As illustrated, this representation does not allocate the main diagonal and
has a null row 0, preserving standard indexing.
\internal Guilherme P. Telles, 2010-2017.
**/
#ifndef ARRAYSH
#define ARRAYSH
void** ltm_alloc(char type, unsigned n);
void ltm_free(void** M, unsigned n);
void m_free(void** M, unsigned n);
#endif
// Guilherme P. Telles, June 2017.
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include "distance.h"
#include "arrays.h"
/**
\brief Initializes a new dmat structure.
Creates a new dmat structure and initializes its fields as follows: n is set
to the homonym argument value, M is set to an order n lower triangular matrix
(see arrays.h), and labels is set to n null pointers.
\return On success it returns the address of a new dmat structure. On failure
returns NULL and errno remains set as bu malloc() on failure.
**/
dmat* dmat_alloc(unsigned n) {
int err;
dmat* R = malloc(sizeof(dmat));
if (!R) goto ENOMEMH;
R->n = n;
R->labels = (char**) calloc(n,sizeof(char*));
if (!R->labels) goto ENOMEMH;
R->M = (double**) ltm_alloc('d',n);
if (!R->M) goto ENOMEMH;
return R;
ENOMEMH:
err = errno;
dmat_free(R);
errno = err;
return 0;
}
/**
\brief Releases a dmat structure and its fields.
**/
void dmat_free(dmat* T) {
if (!T) return;
m_free((void**)T->labels,T->n);
if (T->M) ltm_free((void**)T->M,T->n);
free(T);
}
/**
\brief Load distances from a file.
This function loads distances from a file having the following format:
1st line: the number of objects, n.
2nd line: n labels (strings), each followed by a semicolon.
The remaining lines contain a lower triangular distance matrix without the
diagonal (n(n − 1)/2 distances for n objects), with the values followed by
semicolons, that is
<pre>
D(1,0);
D(2,0);D(2,1);
D(3,0);D(3,1);D(3,2);
....
D(n-1,0);D(n-1,1);D(n-1,2);...;D(n-1,n-2);
</pre>
\return On success it returns a new dmat structure address. On failure it
returns NULL and either errno remains set as by fopen() or malloc() on failure
or errno is set to EILSEQ to indicated that parsing the file failed. File
format error checking is not comprehensive, and it is possible that a
malformed distance file will be completely parsed and an odd dmat structure is
returned.
**/
dmat* dmat_load(char* filename) {
FILE* f = fopen(filename,"r");
if (!f) return 0;
int i,j,err;
dmat* R = 0;
// n:
int n;
if (fscanf(f,"%d ",&n) != 1) goto EILSEQH;
R = dmat_alloc(n);
if (!R) goto ENOMEMH;
// Labels:
char c;
int w = 0;
char* word = malloc(1024*sizeof(char));
if (!R) goto ENOMEMH;
i = 0;
while ((c = fgetc(f))) {
if (c == ';') {
R->labels[i] = malloc((w+1)*sizeof(char));
if (!R->labels[i]) goto ENOMEMH;
strncpy(R->labels[i],word,w);
R->labels[i][w] = '\0';
i++;
w = 0;
}
else if (c == '\n') {
break;
}
else {
word[w++] = c;
}
}
// Distances:
for (i=0; i<n; i++)
for (j=0; j<i; j++)
if (fscanf(f,"%lf;",&(R->M[i][j])) != 1) goto EILSEQH;
return R;
ENOMEMH: err = errno; goto STDH;
EILSEQH: err = EILSEQ; goto STDH;
STDH:
dmat_free(R);
errno = err;
return 0;
}
/**
\file distance.h
\brief Functions for distances.
\internal Guilherme P. Telles, June 2017.
**/
#ifndef DISTANCEH
#define DISTANCEH
/**
\brief A distance matrix.
**/
struct dmat {
unsigned n; ///<\brief The matrix order.
double** M; ///<\brief The order n strictly lower triangular matrix.
char** labels; ///<\brief The object labels, with length n.
};
typedef struct dmat dmat;
dmat* dmat_alloc(unsigned n);
void dmat_free(dmat* T);
dmat* dmat_load(char* filename);
#endif
// Guilherme P. Telles, 2015-2017.
#include <stdarg.h>
#include <stdlib.h>
#include <limits.h>
#include <errno.h>
#include <string.h>
#include "dynarray.h"
/**
\brief Allocate a dynamic array.
\param type The type of data in the dynamic array. Must be either 'i' (int)
or 'v' (void*).
\param capacity The initial capacity. It is also the minimum capacity.
\param shrink If true then the dynamic array will shrink. If false it may
grow but will never shrink.
\return The address of a new dynamic array. On failure it returns NULL and
errno remains as set by malloc.
**/
dynarray* da_alloc(char type, unsigned capacity, short shrink) {
if (sizeof(char) != 1) {
errno = EINVAL;
return 0;
}
dynarray* A = malloc(sizeof(dynarray));
if (!A) return 0;
if (type == 'i')
A->data = malloc(capacity*sizeof(int));
else if (type == 'v')
A->data = malloc(capacity*sizeof(void*));
else
return 0;
if (!A->data) { free(A); return 0; }
A->first = 0;
A->cap = capacity;
A->size = 0;
A->type = type;
if (shrink)
A->min_cap = capacity;
else
A->min_cap = UINT_MAX;
return A;
}
/**
\brief Release a dynamic array.
**/
void da_free(dynarray* A) {
free(A->data);
free(A);
}
/**
\brief The number of items currently in the dynamic array.
**/
inline int da_size(dynarray* A) {
return A->size;
}
/*
Resize A to new_cap.
Return 1 on success, 0 on malloc failure.
The first array element will be at A->data[0] after resize.
*/
int da_resize(dynarray* A, int new_cap) {
int s = 0;
if (A->type == 'i')
s = sizeof(int);
else if (A->type == 'v')
s = sizeof(void*);
void* T = malloc(new_cap*s);
if (!T) return 0;
if (A->first+A->size >= A->cap) { // wraps:
int n = A->cap - A->first;
memcpy(T,((char*)A->data)+(A->first*s),n*s);
memcpy(((char*)T)+(n*s),A->data,(A->size-n)*s);
//if (A->type == 'i') {
// memcpy(T,((int*)A->data)+A->first,n*s);
// memcpy(((int*)T)+n,A->data,(A->size-n)*s);
//}
//else if (A->type == 'v') {
// memcpy(T,((void**)A->data)+A->first,n*s);
// memcpy(((void**)T)+n,((void**)A->data),(A->size-n)*s);
//}
}
else // doesn't wrap:
memcpy(T,((char*)A->data)+(A->first*s),A->size*s);
//if (A->type == 'i')
// memcpy(T,((int*)A->data)+A->first,A->size*s);
//else if (A->type == 'v')
// memcpy(T,((void**)A->data)+A->first,A->size*s);
free(A->data);
A->data = T;
A->cap = new_cap;
A->first = 0;
return 1;
}
/**
\brief Add a single data item to the end of the array.
If the dynamic array is full, tries to double its capacity and then adds data.
\param A A dynamic array.
\param ... A single data item to be added, whose type is equal to the type
specified to da_alloc().
\return On success returns 1. Whenever resizing the array is not possible it
fails to push, returns 0 and leaves errno as set by malloc().
**/
int da_push(dynarray* A, ...) {
if (A->size == A->cap)
if (!da_resize(A,2*A->cap))
return 0;
va_list va;
va_start(va,A);
int p = A->first+A->size;
if (p >= A->cap)
p -= A->cap;
if (A->type == 'i')
*(((int*)A->data)+p) = va_arg(va,int);
else if (A->type == 'v')
*(((void**)A->data)+p) = va_arg(va,void*);
va_end(va);
A->size++;
return 1;
}
/**
\brief Remove a single data item from the end of the array.
If the dynamic array is 1/4 full, this function halves its capacity and then
removes data. The capacity will never be smaller than the initial capacity.
\param A A dynamic array.
\param ... The address where the removed value will be placed, whose type is
pointer to the type specified to da_alloc().
\return On success it returns 1. If the dynamic array is empty it returns 0.
**/
int da_pop(dynarray* A, ...) {
if (!A->size) return 0;
if (A->cap > A->min_cap && A->size <= A->cap/4)
da_resize(A,A->cap/2);
va_list va;
va_start(va,A);
A->size--;
int p = A->first+A->size;
if (p >= A->cap)
p -= A->cap;
if (A->type == 'i')
*(va_arg(va,int*)) = *(((int*)A->data)+p);
else if (A->type == 'v')
*(va_arg(va,void**)) = *(((void**)A->data)+p);
va_end(va);
return 1;
}
/**
\brief \brief Add a single data item to the beginning of the array.
If the dynamic array is full, it tries to double its capacity and then adds
data.
\param A The dynamic array.
\param ... A single data item to be added, whose type should be pointer to
the type specified at dynamic array creation.
\return On success returns 1. When resizing the array is not possible it
fails to unshift data, returns 0 and leaves errno as set by malloc().
**/
int da_unshift(dynarray* A, ...) {
if (A->size == A->cap)
if (!da_resize(A,2*A->cap))
return 0;
if (A->first == 0)
A->first = A->cap-1;
else
A->first--;
va_list va;
va_start(va,A);
if (A->type == 'i')
*(((int*)A->data)+A->first) = va_arg(va,int);
else if (A->type == 'v')
*(((void**)A->data)+A->first) = va_arg(va,void*);
va_end(va);
A->size++;
return 1;
}
/**
\brief Remove a data item from the beginning of the array.
If the dynamic array is 1/4 full, it halves its capacity and then removes data.
The capacity will never be smaller than the initial capacity.
\param A The dynamic array.
\param ... The address where the removed value will be placed, whose type is
pointer to the type specified to da_alloc().
\return On success it returns 1. If the dynamic array is empty it returns 0.
**/
int da_shift(dynarray* A, ...) {
if (!A->size) return 0;
if (A->cap > A->min_cap && A->size == A->cap/4)
da_resize(A,A->cap/2);
va_list va;
va_start(va,A);
if (A->type == 'i')
*(va_arg(va,int*)) = *(((int*)A->data)+A->first);
else if (A->type == 'v')
*(va_arg(va,void**)) = *(((void**)A->data)+A->first);
va_end(va);
A->first++;
if (A->first == A->cap)
A->first = 0;
A->size--;
return 1;
}
/**
\brief Set A[i].
If i is outside A, it tries to swell the array first.
\param A A dynamic array.
\param i The index.
\param ... A single data item to set, whose type is equal to the type
specified to da_alloc().
\return On success returns 1. Whenever resizing the array is not possible it
leaves errno as set by malloc(), A remains unchanged and it returns 0.
**/
int da_set(dynarray* A, size_t i, ...) {
if (i >= A->cap) {
size_t k = A->cap;
while (i >= k)
k *= 2;
if (!da_resize(A,k))
return 0;
}
if (i >= A->size)
A->size = i+1;
size_t p = (A->first+i);
if (p >= A->cap)