Changeset 1586:4e44d29377e3
- Timestamp:
- 10/30/08 14:46:57
(2 months ago)
- Author:
- Dan Blankenberg <dan@bx.psu.edu>
- branch:
- default
- Message:
Annotation profiler: Better handling of the case where regions extend beyond known chromosome boundaries.
-
Files:
-
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
| r1387 |
r1586 |
|
| 123 | 123 | self.table_regions_overlaped_count = {} #total number of table regions overlaping user's input intervals (non unique) |
|---|
| 124 | 124 | self.interval_table_overlap_count = {} #total number of user input intervals which overlap table |
|---|
| | 125 | self.region_size_errors = {} #dictionary of lists of invalid ranges by chromosome |
|---|
| 125 | 126 | def add_region( self, chrom, start, end ): |
|---|
| 126 | | self.total_interval_size += ( end - start ) |
|---|
| | 127 | chrom_length = self.chrom_lengths.get( chrom ) |
|---|
| | 128 | region_start = min( start, chrom_length ) |
|---|
| | 129 | region_end = min( end, chrom_length ) |
|---|
| | 130 | region_length = region_end - region_start |
|---|
| | 131 | |
|---|
| | 132 | if region_length < 1 or region_start != start or region_end != end: |
|---|
| | 133 | if chrom not in self.region_size_errors: |
|---|
| | 134 | self.region_size_errors[chrom] = [] |
|---|
| | 135 | self.region_size_errors[chrom].append( ( start, end ) ) |
|---|
| | 136 | if region_length < 1: return |
|---|
| | 137 | |
|---|
| | 138 | self.total_interval_size += region_length |
|---|
| 127 | 139 | self.total_interval_count += 1 |
|---|
| 128 | 140 | if chrom not in self.chromosome_coverage: |
|---|
| 129 | | self.chromosome_coverage[chrom] = bx.bitset.BitSet( self.chrom_lengths.get( chrom ) ) |
|---|
| 130 | | self.chromosome_coverage[chrom].set_range( start, end - start ) |
|---|
| 131 | | for table_name, coverage, regions in self.coverage_reader.iter_table_coverage_regions_by_region( chrom, start, end ): |
|---|
| | 141 | self.chromosome_coverage[chrom] = bx.bitset.BitSet( chrom_length ) |
|---|
| | 142 | |
|---|
| | 143 | self.chromosome_coverage[chrom].set_range( region_start, region_length ) |
|---|
| | 144 | for table_name, coverage, regions in self.coverage_reader.iter_table_coverage_regions_by_region( chrom, region_start, region_end ): |
|---|
| 132 | 145 | if table_name not in self.table_coverage: |
|---|
| 133 | 146 | self.table_coverage[table_name] = 0 |
|---|
| … | … | |
| 215 | 228 | out.write( "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % ( table_name, table_chromosome_size, table_chromosome_count, table_region_coverage, table_region_count, total_interval_count, total_interval_size, total_coverage, table_regions_overlaped_count, interval_region_overlap_count, nr_interval_count, nr_interval_size, nr_coverage, nr_table_regions_overlaped_count, nr_interval_table_overlap_count ) ) |
|---|
| 216 | 229 | out.close() |
|---|
| | 230 | |
|---|
| | 231 | #report chrom size errors as needed: |
|---|
| | 232 | if table_coverage_summary.region_size_errors: |
|---|
| | 233 | print "Regions provided extended beyond known chromosome lengths, and have been truncated as necessary, for the following intervals:" |
|---|
| | 234 | for chrom, regions in table_coverage_summary.region_size_errors.items(): |
|---|
| | 235 | if len( regions ) > 3: |
|---|
| | 236 | extra_region_info = ", ... " |
|---|
| | 237 | else: |
|---|
| | 238 | extra_region_info = "" |
|---|
| | 239 | print "%s has max length of %s, exceeded by %s%s." % ( chrom, chrom_lengths.get( chrom ), ", ".join( map( str, regions[:3] ) ), extra_region_info ) |
|---|
| 217 | 240 | |
|---|
| 218 | 241 | class ChromosomeLengths: |
|---|