// Do not remove // Program by Liam J. Revell (2008) // Source code to calculate C matrix // Supplementary for "Size-correction and principal components for interspecific comparative studies" // Takes as input Newick format, fully resolved trees, with numbers as taxa // Returns output file with C matrix in MATLAB format // Do not remove #include #include #include // definitions // for read_tree() #define MAXCHAR 10000 // end definitions // structures struct node { struct node *left_ptr; // Left daughter struct node *right_ptr; // Right daughter struct node *parent_ptr; // Parent node double branch_length; // Branch length leading to parent branch double depth; // Depth in tree from root int tot_species; // Total number of species as of this point in tree char name[40]; // Name of tip or interior node (not used) int tip_number; char node_name[101]; int side; int *descendants; // set of descendants int numdesc; // number of descendants int star; }; // end structures // function declarations FILE *fp_read(char *, int); FILE *fp_append(char *, int); void init_node(struct node *newnode); int read_file_line(FILE *input_file, char *file_ch); int read_tree(char *filechar, int n, struct node *newnode, struct node *currnode); int *mem_1d_int(int); double **mem_alloc(int, int); void print_matrix(char *header, int str_length, double **matrix, int r, int c, FILE *fp); void find_descendants(struct node *top); void calc_C(struct node *top, double **Cmatrix); double **calc_D(int n, double **C); void zero_array(double **array, int width, int length); void free_matrix(double **matrix, int n, int m); void free_tree(struct node *top); // end function declarations // this function frees the memory allocated to the tree (LJH) void free_tree(struct node *top){ if (top==NULL) return; free_tree(top->left_ptr); free_tree(top->right_ptr); free(top); } // free memory allocated to a double matrix void free_matrix(double **matrix, int n, int m){ int i; for(i=0;inumdesc;i++) for(j=0;jnumdesc;j++) Cmatrix[currnode->descendants[i]][currnode->descendants[j]]+=currnode->branch_length; // call on daughters calc_C(currnode->left_ptr,Cmatrix); calc_C(currnode->right_ptr,Cmatrix); } } // this function assigns the set of all descedants to each node void find_descendants(struct node *top){ int i; struct node *currnode; currnode=top; // go to numdesc >=1 if(currnode->numdesc>0) return; else { // find node while((currnode->left_ptr->numdesc<1)||(currnode->right_ptr->numdesc<1)){ // go left while(currnode->left_ptr->numdesc<1){ currnode=currnode->left_ptr; } // go right while(currnode->right_ptr->numdesc<1){ currnode=currnode->right_ptr; } } // picked node currnode->numdesc=currnode->left_ptr->numdesc+currnode->right_ptr->numdesc; currnode->descendants=mem_1d_int(currnode->numdesc); for(i=0;ileft_ptr->numdesc;i++) currnode->descendants[i]=currnode->left_ptr->descendants[i]; for(i=currnode->left_ptr->numdesc;inumdesc;i++) currnode->descendants[i]=currnode->right_ptr->descendants[i-currnode->left_ptr->numdesc]; // call on parent if(currnode->parent_ptr!=NULL) find_descendants(currnode->parent_ptr); } } // print matrix void print_matrix(char *header, int str_length, double **matrix, int r, int c, FILE *fp){ int i, j; for(i=0;iparent_ptr=currnode; newnode->side=0; currnode->left_ptr=newnode; currnode=newnode; i++; } else if (filechar[i]==','){ // Create new right node newnode=malloc(sizeof(struct node)); if (newnode==NULL) printf ("memory error\n"); init_node(newnode); newnode->parent_ptr=currnode->parent_ptr; newnode->side=1; currnode->parent_ptr->right_ptr=newnode; currnode=newnode; i++; } else if (filechar[i]==':'){ // Read branch length j=0; i++; scinote=0; while((filechar[i]>='0' && filechar[i]<='9')||(filechar[i]=='.' || filechar[i]=='e' || filechar[i]=='-')){ if(filechar[i]=='e') scinote=1; temp[j]=filechar[i]; i++; j++; } temp[j]='\0'; if(scinote==0){ sscanf(temp, "%lf", &floater); } if(scinote==1){ j=0; while(j<5){ temp_pre_e[j]=temp[j]; j++; } temp_pre_e[6]='\0'; sscanf(temp_pre_e,"%f",&floater); if(temp[6]=='-') sign=-1; if(temp[6]=='+') sign=1; k=0; for(j=7;j<10;j++){ if(temp[j]=='0'); if(temp[j]>='1' && temp[j]<='9'){ temp_post_e[k]=temp[j]; k++; } } k++; temp_post_e[k]='\0'; sscanf(temp_post_e,"%d",&exponent); exponent*=sign; floater=floater*pow(10,exponent); } // assign estimated branch length currnode->branch_length = floater; // already at next character } else if (filechar[i]==')'){ // Back down into tree currnode=currnode->parent_ptr; i++; } else if (filechar[i]==';'){ break; // End tree reading } else { // Read species name j=0; while(filechar[i]!=':'){ temp[j]=filechar[i]; // currnode->name[j]=filechar[i]; i++; j++; } temp[j]='\0'; sscanf(temp,"%d",&currnode->tip_number); // also assign numdesc=1 to node currnode->numdesc=1; currnode->descendants=mem_1d_int(currnode->numdesc); currnode->descendants[0]=currnode->tip_number; // currnode->name[j]='\0'; } } } // this function reads a line of text from the input file (LJH) int read_file_line(FILE *input_file, char *file_ch){ int i=0; while(1){ file_ch[i]=fgetc(input_file); if (file_ch[i]=='\n' || file_ch[i]==EOF) break; i++; } return(i-1); } // this function initiliazes a new node with 0s and NULLs (LJH/LJR) void init_node(struct node *newnode){ newnode->left_ptr=NULL; newnode->right_ptr=NULL; newnode->parent_ptr=NULL; newnode->name[0]='\0'; newnode->branch_length=0; newnode->depth=0; newnode->tot_species=0; newnode->tip_number=0; newnode->numdesc=0; newnode->star=1; } // function returns file pointer "a" FILE *fp_append(char *filename, int str_size){ FILE *fp; filename[str_size-1]='\0'; fp=fopen(filename,"a"); if(!fp){ printf("Failed to open %s.\nExiting.\n",filename); exit(1); } return fp; } // function returns file pointer "r" FILE *fp_read(char *filename, int str_size){ FILE *fp; filename[str_size-1]='\0'; fp=fopen(filename,"r"); if(!fp){ printf("Failed to open %s.\nExiting.\n",filename); exit(1); } return fp; } // main function int main(){ char filename[40]; FILE *treefp, *datafp, *outfp; int i, j; struct node *root=NULL; int nchars; char file_char[MAXCHAR]; struct node *new_node; char temp[10]; int *tipnums; double **D; int numsp; double **C; char carrot; // get filenames printf("Enter filename of tree file. \n> "); fgets(filename,sizeof(filename),stdin); treefp=fp_read(filename,strlen(filename)); printf("Enter filename for output file. \n> "); fgets(filename,sizeof(filename),stdin); outfp=fp_append(filename,strlen(filename)); // filenames read, file pointers assigned // read tree into memory root=(struct node *)malloc(sizeof(struct node)); init_node(root); nchars=read_file_line(treefp, file_char); read_tree(file_char, nchars, new_node, root); // set all descendants at each node find_descendants(root); numsp=root->numdesc; // calculate coancestry matrix C=mem_alloc(numsp+1,numsp+1); zero_array(C,numsp+1,numsp+1); calc_C(root,C); print_matrix("C",strlen("C"),C,numsp,numsp,outfp); // free memory free_matrix(C,numsp+1,numsp+1); free_tree(root); free(tipnums); printf("Printed matrix 'C' to output file. Press a key to exit."); carrot=fgetc(stdin); return 0; }