From 377a5fc0361c9cf93352f03c56289152162d4fe4 Mon Sep 17 00:00:00 2001 From: David Jones Date: Fri, 25 Oct 2019 17:17:51 +0100 Subject: [PATCH] Changes to ignore_reg_access_get_ign_regs_covered from ignore_reg_access_get_ign_reg_contained. Changed logic in ignore_reg_access_resolve_ignores_to_analysis_sections --- src/ignore_reg_access.c | 291 ++++++++++++++++++---------------------- 1 file changed, 134 insertions(+), 157 deletions(-) diff --git a/src/ignore_reg_access.c b/src/ignore_reg_access.c index ea928ef..ed36449 100644 --- a/src/ignore_reg_access.c +++ b/src/ignore_reg_access.c @@ -40,189 +40,166 @@ #include int ignore_reg_access_get_ign_reg_count_for_chr(char *ign_file, char *chr){ - assert(ign_file != NULL); - assert(chr != NULL); - FILE *file = fopen(ign_file,"r"); - check(file != NULL,"Couldn't open ignored region file: %s.",ign_file); - //Read all lines, only including the ones that are of the correct chromosome in the count - int entry_count = 0; - //read and count - char rd[200]; - while(fgets(rd, 200, file) != NULL){ - check(rd != NULL,"Invalid line read in ignored region file."); - char *chr_nom = malloc(sizeof(char)*50); - check_mem(chr_nom); - int chk = sscanf(rd,"%s",chr_nom); - check(chk == 1,"Incorrect line read.\n"); - if(strcmp(chr_nom,chr) == 0){ - entry_count++; - } - free(chr_nom); - } - check(fclose(file)==0,"Error closing ignored region file '%s'.",ign_file); - return entry_count; + assert(ign_file != NULL); + assert(chr != NULL); + FILE *file = fopen(ign_file,"r"); + check(file != NULL,"Couldn't open ignored region file: %s.",ign_file); + //Read all lines, only including the ones that are of the correct chromosome in the count + int entry_count = 0; + //read and count + char rd[200]; + while(fgets(rd, 200, file) != NULL){ + check(rd != NULL,"Invalid line read in ignored region file."); + char *chr_nom = malloc(sizeof(char)*50); + check_mem(chr_nom); + int chk = sscanf(rd,"%s",chr_nom); + check(chk == 1,"Incorrect line read.\n"); + if(strcmp(chr_nom,chr) == 0){ + entry_count++; + } + free(chr_nom); + } + check(fclose(file)==0,"Error closing ignored region file '%s'.",ign_file); + return entry_count; error: - if(file) fclose(file); - return -1; -} - -seq_region_t *ignore_reg_access_get_ign_reg_inside(int pos, struct seq_region_t **regions, int entry_count){ - int i=0; - for(i=0; ibeg < pos && regions[i]->end > pos){ - seq_region_t *reg_copy = malloc(sizeof(struct seq_region_t)); - check_mem(reg_copy); - reg_copy->beg = regions[i]->beg; - reg_copy->end = regions[i]->end; - return reg_copy; - } - } -error: - return NULL; + if(file) fclose(file); + return -1; } seq_region_t *ignore_reg_access_get_ign_reg_overlap(int pos, struct seq_region_t **regions, int entry_count){ - int i=0; - for(i=0; ibeg <= pos && regions[i]->end >= pos){ - seq_region_t *reg_copy = malloc(sizeof(struct seq_region_t)); - check_mem(reg_copy); - reg_copy->beg = regions[i]->beg; - reg_copy->end = regions[i]->end; - return reg_copy; - } - } + int i=0; + for(i=0; ibeg <= pos && regions[i]->end >= pos){ + seq_region_t *reg_copy = malloc(sizeof(struct seq_region_t)); + check_mem(reg_copy); + reg_copy->beg = regions[i]->beg; + reg_copy->end = regions[i]->end; + return reg_copy; + } + } error: - return NULL; + return NULL; } int ignore_reg_access_get_ign_reg_for_chr(char *ign_file,char *chr, int entry_count, struct seq_region_t **regions){ - assert(ign_file != NULL); - assert(chr != NULL); - assert(entry_count >= 0); - assert(sizeof(regions)>0); - int is_bed = 0; - if(entry_count == 0){ - return 0; - } - //assign the right size to the array - //then reread so we can parse the actual lines. - //Check for bed extension - const char *ext = strrchr(ign_file, '.'); - if(ext && ext != ign_file && strcmp(ext+1,"bed")==0){ - is_bed = 1; - } + assert(ign_file != NULL); + assert(chr != NULL); + assert(entry_count >= 0); + assert(sizeof(regions)>0); + int is_bed = 0; + if(entry_count == 0){ + return 0; + } + //assign the right size to the array + //then reread so we can parse the actual lines. + //Check for bed extension + const char *ext = strrchr(ign_file, '.'); + if(ext && ext != ign_file && strcmp(ext+1,"bed")==0){ + is_bed = 1; + } - FILE *file = fopen(ign_file,"r"); - check(file != NULL,"Couldn't open ignored region file: %s.",ign_file); - int found_count = 0; + FILE *file = fopen(ign_file,"r"); + check(file != NULL,"Couldn't open ignored region file: %s.",ign_file); + int found_count = 0; - char rd[200]; - while(fgets(rd, 200, file) != NULL){ - check(rd != NULL,"Invalid line read in ignored region file."); - char *chr_nom = malloc(sizeof(char)*250); - check_mem(chr_nom); - int beg,end; - int chk = sscanf(rd,"%s\t%d\t%d",chr_nom,&beg,&end); - if(chk==3){ - if(strcmp(chr_nom,chr) == 0){ - regions[found_count] = malloc(sizeof(struct seq_region_t)); - check_mem(regions[found_count]); - regions[found_count]->beg = beg + is_bed; - regions[found_count]->end = end; - found_count++; - } - }else if(1==sscanf(rd,"%s",chr_nom)){//Check for just a chromosome. - if(strcmp(chr_nom,chr) == 0){ - regions[found_count] = malloc(sizeof(struct seq_region_t)); - check_mem(regions[found_count]); - regions[found_count]->beg = 1; - regions[found_count]->end = INT_MAX; - found_count++; - } - }else{ - free(chr_nom); - sentinel("Incorrect line read from ignore file %s.",rd); - } - free(chr_nom); - } - check(entry_count == found_count,"Wrong number of lines found %d for chr: %s. Expected %d.",found_count,chr,entry_count); - check(fclose(file)==0,"Error closing ignored region file '%s'.",ign_file); - return 0; + char rd[200]; + while(fgets(rd, 200, file) != NULL){ + check(rd != NULL,"Invalid line read in ignored region file."); + char *chr_nom = malloc(sizeof(char)*250); + check_mem(chr_nom); + int beg,end; + int chk = sscanf(rd,"%s\t%d\t%d",chr_nom,&beg,&end); + if(chk==3){ + if(strcmp(chr_nom,chr) == 0){ + regions[found_count] = malloc(sizeof(struct seq_region_t)); + check_mem(regions[found_count]); + regions[found_count]->beg = beg + is_bed; + regions[found_count]->end = end; + found_count++; + } + }else if(1==sscanf(rd,"%s",chr_nom)){//Check for just a chromosome. + if(strcmp(chr_nom,chr) == 0){ + regions[found_count] = malloc(sizeof(struct seq_region_t)); + check_mem(regions[found_count]); + regions[found_count]->beg = 1; + regions[found_count]->end = INT_MAX; + found_count++; + } + }else{ + free(chr_nom); + sentinel("Incorrect line read from ignore file %s.",rd); + } + free(chr_nom); + } + check(entry_count == found_count,"Wrong number of lines found %d for chr: %s. Expected %d.",found_count,chr,entry_count); + check(fclose(file)==0,"Error closing ignored region file '%s'.",ign_file); + return 0; error: - if(file) fclose(file); - if(regions) ignore_reg_access_destroy_seq_region_t_arr(entry_count, regions); - return -1; + if(file) fclose(file); + if(regions) ignore_reg_access_destroy_seq_region_t_arr(entry_count, regions); + return -1; } -List *ignore_reg_access_get_ign_reg_contained(int from, int to, struct seq_region_t **regions, int entry_count){ - List *li = List_create(); - int i=0; - for(i=0; ibeg >= from && regions[i]->end <= to){ - //Make a copy of this region and put in the list - seq_region_t *reg_copy = malloc(sizeof(struct seq_region_t)); - check_mem(reg_copy); - reg_copy->beg = regions[i]->beg; - reg_copy->end = regions[i]->end; - List_push(li,reg_copy); - } - } - return li; +List *ignore_reg_access_get_ign_regs_covered(int from, int to, struct seq_region_t **regions, int entry_count){ + List *li = List_create(); + int i=0; + for(i=0; i= regions[i]->beg && from <= regions[i]->end) || + (to >= regions[i]->beg && to <= regions[i]->end) || + from <= regions[i]->beg && to >= regions[i]->end){ + seq_region_t *reg_copy = malloc(sizeof(struct seq_region_t)); + check_mem(reg_copy); + reg_copy->beg = regions[i]->beg; + reg_copy->end = regions[i]->end; + List_push(li,reg_copy); + } //End of checking if this ignore region overlaps + } + return li; error: - return NULL; + return NULL; } List *ignore_reg_access_resolve_ignores_to_analysis_sections(int start, int end, struct seq_region_t **regions, int entry_count){ - List *li = ignore_reg_access_get_ign_reg_contained(start,end,regions,entry_count); + List *li = ignore_reg_access_get_ign_regs_covered(start,end,regions,entry_count); check(li != NULL,"Error fetching contained ignore regions."); - // Test for start overlap - seq_region_t *start_overlap = ignore_reg_access_get_ign_reg_inside(start, regions, entry_count); - - //Test for end overlap - seq_region_t *stop_overlap = ignore_reg_access_get_ign_reg_inside(end, regions, entry_count); - List *reg_for_analysis = List_create(); - seq_region_t *range = malloc(sizeof(struct seq_region_t)); - if(start_overlap != NULL){ - range->beg = start_overlap->end+1; - }else{ - range->beg = start; - } + List *reg_for_analysis = List_create(); + seq_region_t *range = malloc(sizeof(struct seq_region_t)); - LIST_FOREACH(li, first, next, cur){ - range->end = ((seq_region_t *) cur->value)->beg - 1; - List_push(reg_for_analysis,range); - range = malloc(sizeof(struct seq_region_t)); - range->beg = ((seq_region_t *) cur->value)->end + 1; - } - - if(stop_overlap != NULL){ - if(stop_overlap->end+1 > end){ - sentinel("Error in resolving ignored regions. End %d was lower than the end of the region %d\n",end,stop_overlap->end+1); + range->beg = start; + if(List_count(li) > 0){ + if(start >= ((seq_region_t *) li->first->value)->beg){ + range->beg = ((seq_region_t *) li->first->value)->end+1; + List_unshift(li); } - range->end = stop_overlap->end+1; - }else{ - range->end = end; } - List_push(reg_for_analysis,range); - List_clear_destroy(li); - return reg_for_analysis; + LIST_FOREACH(li, first, next, cur){ + range->end = ((seq_region_t *) cur->value)->beg - 1; + List_push(reg_for_analysis,range); + range = malloc(sizeof(struct seq_region_t)); + range->beg = ((seq_region_t *) cur->value)->end + 1; + } + + range->end = end; + List_push(reg_for_analysis,range); + List_clear_destroy(li); + return reg_for_analysis; error: - List_clear_destroy(li); - return NULL; + List_clear_destroy(li); + return NULL; } void ignore_reg_access_destroy_seq_region_t_arr(int entry_count, seq_region_t **regions){ - if(sizeof(regions) > 0){ - int i=0; - for(i=0;i 0){ + int i=0; + for(i=0;i