diff --git a/column.c b/column.c index 7e74ad2..46f585a 100644 --- a/column.c +++ b/column.c @@ -16,6 +16,7 @@ * external functions declaration */ extern int compare_row_list(const void *p1, const void *p2); +extern int split_ip(char ***tab, char *s, char *delim); /* * global variables declaration @@ -77,6 +78,160 @@ void print_row(FILE *output, GTF_ROW *r, char delim, int add_chr) { fprintf(output, "\n"); } +/* + * prints a list of comma (or other sep) separated attribute values from a GTF row. + * + * + * Parameters: + * row: the GTF row from which to print attributes + * output: where to print (a file or stdout) + * keys: a comma separated list of keys to be selected. + * sep: the separator to use (e.g '|', ','...). + */ +void print_attr_with_sep(GTF_ROW *row, FILE *output, char delim, char *keys, char *sep, char *more_info) { + int n, i, j; + int found = 0; + char **token; + char *keys_concat = (char *)calloc(5000, sizeof(char)); + + + n = split_ip(&token, strdup(keys), ","); + + for(i=0; ifield[0]); + if (i < (n-1)) strcat(keys_concat, sep); + found = 1; + } + else if(strcmp("chrom", token[i]) == 0) + { + strcat(keys_concat, row->field[0]); + if (i < (n-1)) strcat(keys_concat, sep); + found = 1; + } + else if(strcmp("source", token[i]) == 0) + { + strcat(keys_concat, row->field[1]); + if (i < (n-1)) strcat(keys_concat, sep); + found = 1; + } + else if(strcmp("feature", token[i]) == 0) + { + strcat(keys_concat, row->field[2]); + if (i < (n-1)) strcat(keys_concat, sep); + found = 1; + } + else if(strcmp("start", token[i]) == 0) + { + strcat(keys_concat, row->field[3]); + if (i < (n-1)) strcat(keys_concat, sep); + found = 1; + } + else if(strcmp("end", token[i]) == 0) + { + strcat(keys_concat, row->field[4]); + if (i < (n-1)) strcat(keys_concat, sep); + found = 1; + } + else if(strcmp("score", token[i]) == 0) + { + strcat(keys_concat, row->field[5]); + if (i < (n-1)) strcat(keys_concat, sep); + found = 1; + } + else if(strcmp("strand", token[i]) == 0) + { + strcat(keys_concat, row->field[6]); + if (i < (n-1)) strcat(keys_concat, sep); + found = 1; + } + else if(strcmp("phase", token[i]) == 0) + { + strcat(keys_concat, row->field[7]); + if (i < (n-1)) strcat(keys_concat, sep); + found = 1; + } + + if(found == 0) + { + if (row->attributes.nb != -1) + { + for(j=0; j< row->attributes.nb; j++) + { + + if(strcmp(row->attributes.attr[j]->key, token[i]) == 0) + { + strcat(keys_concat, row->attributes.attr[j]->value); + if (i < (n-1)) strcat(keys_concat, sep); + found = 1; + + } + } + } + } + + if(found == 0){ + if(found == 0){ + strcat(keys_concat, "?"); + if (i < (n-1)) strcat(keys_concat, sep); + } + } + } + + //keys_concat = (char *)realloc(keys_concat, (strlen(keys_concat) + 1) * sizeof(char)); + + + if(strlen(more_info)){ + strcat(keys_concat, sep); + strcat(keys_concat, more_info); + } + + if(keys_concat){ + fprintf(output, "%s", keys_concat); + } + else + { + fprintf(output, ""); + } + + fprintf(output, "%c", delim); + + free(keys_concat); +} + +/* + * prints a GTF row in bed format to output with the given character delimiter + * + * Parameters: + * output: where to print + * r: the row to print + * delim: the delimiter character + * add_chr: boolean; if true(1), add "chr" at the begining of the row + * keys: a comma separated list of keys whose values will appear in the name + * (4th) colum of the bed. + * sep: The separator to use for names (4th column). + */ +void print_row_bed(FILE *output, GTF_ROW *r, char delim, int add_chr, char *keys, char *sep, char *more_info){ + + + if (add_chr) fprintf(output, "chr"); + // print chr, start, end + print_string(r->field[0], output, column[0], delim); + fprintf(output,"%d%c", atoi(r->field[3]) - 1, delim); + print_string(r->field[4], output, column[4], delim); + // print requested columns with user-defined delim + print_attr_with_sep(r, output, delim, keys, sep, more_info); + // print score strand + print_string(r->field[5], output, column[5], delim); + fprintf(output,"%s", r->field[6]); + fprintf(output, "\n"); +} + + /* * Creates an empty index and add/link it in the index list of the column * in index_id. diff --git a/command/add_attr_to_pos.c b/command/add_attr_to_pos.c index 0b0c3f6..6060da2 100644 --- a/command/add_attr_to_pos.c +++ b/command/add_attr_to_pos.c @@ -18,7 +18,6 @@ extern GTF_DATA *clone_gtf_data(GTF_DATA *gtf_data); extern void add_attribute(GTF_ROW *row, char *key, char *value); extern int split_ip(char ***tab, char *s, char *delim); -extern char *trim_ip(char *); /* * global variables declaration diff --git a/command/convert_to_ensembl.c b/command/convert_to_ensembl.c index c817563..5812b14 100644 --- a/command/convert_to_ensembl.c +++ b/command/convert_to_ensembl.c @@ -95,9 +95,9 @@ static void action_transcript(const void *nodep, const VISIT which, const int de nbrow++; } } - asprintf(&(tr_row->field[3]), "%d", start); - asprintf(&(tr_row->field[4]), "%d", end); - if (ok) { + if (asprintf(&(tr_row->field[3]), "%d", start) > 0 && + asprintf(&(tr_row->field[4]), "%d", end) > 0 && + ok) { if (!strcmp(gtf_d->data[datap->row[0]]->field[2], "gene")) { tr_row->next = gtf_d->data[datap->row[0]]->next; gtf_d->data[datap->row[0]]->next = tr_row; @@ -186,14 +186,14 @@ static void action_gene(const void *nodep, const VISIT which, const int depth) { } } } - asprintf(&(g_row->field[3]), "%d", start); - asprintf(&(g_row->field[4]), "%d", end); - - g_row->next = gtf_d->data[datap->row[0]]; - if (datap->row[0] != 0) - gtf_d->data[datap->row[0] - 1]->next = g_row; - else - gtf_d0 = g_row; + if (asprintf(&(g_row->field[3]), "%d", start) > 0 && + asprintf(&(g_row->field[4]), "%d", end) > 0) { + g_row->next = gtf_d->data[datap->row[0]]; + if (datap->row[0] != 0) + gtf_d->data[datap->row[0] - 1]->next = g_row; + else + gtf_d0 = g_row; + } } break; case endorder: diff --git a/command/get_sequences.c b/command/get_sequences.c index 6c2e061..fd6bb34 100644 --- a/command/get_sequences.c +++ b/command/get_sequences.c @@ -84,7 +84,7 @@ void get_chunk(char *ret, FILE *fasta_file, long seqpos, int L, int N, int p, ch fseek(fasta_file, -1, SEEK_CUR); reste_row_file = L; } - } while (reste_row & !eof); + } while (reste_row && !eof); } } @@ -188,8 +188,9 @@ FILE *get_fasta_file_index(FILE *fasta_file, char *index) { FILE *ffi = NULL; long pfasta; char *buffer = NULL, *p_end_dir = NULL; - int maxLineSize = 0; + size_t maxLineSize = 0; size_t size = 0; + int n; unsigned long old_crc, crc; @@ -215,7 +216,7 @@ FILE *get_fasta_file_index(FILE *fasta_file, char *index) { free(buffer); buffer = NULL; } - fprintf(ffi, "%d\n", maxLineSize - 1); + fprintf(ffi, "%lu\n", maxLineSize - 1); fprintf(ffi, "%lx\n", crc); fflush(ffi); rewind(ffi); @@ -249,7 +250,7 @@ FILE *get_fasta_file_index(FILE *fasta_file, char *index) { free(buffer); buffer = NULL; } - fprintf(ffi, "%d\n", maxLineSize - 1); + fprintf(ffi, "%lu\n", maxLineSize - 1); fprintf(ffi, "%lx\n", crc); fflush(ffi); } @@ -260,14 +261,15 @@ FILE *get_fasta_file_index(FILE *fasta_file, char *index) { } void print_fasta_sequence(SEQUENCE *seq) { - unsigned int k; + size_t k; + int l; FEATURE *feat; fprintf(stdout, "%s\n", seq->header); for (k = 0; k < strlen(seq->sequence); k += 60) fprintf(stdout, "%.60s\n", seq->sequence + k); - for (k = 0; k < seq->features->nb; k++) { - feat = seq->features->feature[k]; + for (l = 0; l < seq->features->nb; l++) { + feat = seq->features->feature[l]; fprintf(stdout, " %s : %d-%d (%d-%d)\n", feat->name, feat->start, feat->end, feat->tr_start, feat->tr_end); } } diff --git a/command/print_bed.c b/command/print_bed.c new file mode 100644 index 0000000..c0bca13 --- /dev/null +++ b/command/print_bed.c @@ -0,0 +1,30 @@ +/* + * write_bed.c + * + * Created on: December 7, 2018 + * Author: puthier (based on Fafa code...) + * Objective: print a gtf obj in bed format + */ + +#include "libgtftk.h" + +extern void print_row_bed(FILE *output, GTF_ROW *r, char delim, int add_chr, char *keys, char *sep, char *more_info); + +__attribute__ ((visibility ("default"))) +void *print_bed(GTF_DATA *gtf_data, char *output, int add_chr, char *keys, char *sep, char *more_info) { + int i; + FILE *out = stdout; + + if (gtf_data != NULL) { + if (*output != '-') out = fopen(output, "w"); + if (out == NULL) out = stdout; + for (i = 0; i < gtf_data->size; i++){ + print_row_bed(out, gtf_data->data[i], '\t', add_chr, keys, sep, more_info); + } + if (out != stdout) { + fflush(out); + fclose(out); + } + } + return 0; +} diff --git a/config.h b/config.h new file mode 100644 index 0000000..11d97a7 --- /dev/null +++ b/config.h @@ -0,0 +1,98 @@ +/* config.h. Generated from config.h.in by configure. */ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* Define to 1 if you have the header file. */ +#define HAVE_DLFCN_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_LIMITS_H 1 + +/* Define to 1 if your system has a GNU libc compatible `malloc' function, and + to 0 otherwise. */ +#define HAVE_MALLOC 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the `mkdir' function. */ +#define HAVE_MKDIR 1 + +/* Define to 1 if your system has a GNU libc compatible `realloc' function, + and to 0 otherwise. */ +#define HAVE_REALLOC 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the `strchr' function. */ +#define HAVE_STRCHR 1 + +/* Define to 1 if you have the `strdup' function. */ +#define HAVE_STRDUP 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the `strrchr' function. */ +#define HAVE_STRRCHR 1 + +/* Define to 1 if you have the `strstr' function. */ +#define HAVE_STRSTR 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to the sub-directory in which libtool stores uninstalled libraries. + */ +#define LT_OBJDIR ".libs/" + +/* Name of package */ +#define PACKAGE "libgtftk" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "fabrice.lopez@inserm.fr" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "libgtftk" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "libgtftk 0.9.3" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "libgtftk" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "0.9.3" + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Version number of package */ +#define VERSION "0.9.3" + +/* Define to rpl_malloc if the replacement function should be used. */ +/* #undef malloc */ + +/* Define to rpl_realloc if the replacement function should be used. */ +/* #undef realloc */ + +/* Define to `unsigned int' if does not define. */ +/* #undef size_t */