Changeset 863:f28df3d85602

Show
Ignore:
Timestamp:
01/16/08 13:01:09 (1 year ago)
Author:
Dan Blankenberg <dan@bx.psu.edu>
branch:
default
convert_revision:
svn:9bcadc22-80f8-0310-8a53-c8f022958886/galaxy/trunk@2217
Message:

Fix memory issues when dealing with MAF tools.

Add new bx-python eggs.

Reorganize all MAF tools into a unified directory, and update tool_conf.

Remember to update tool_conf.xml from one of the samples (tool_conf.xml.sample or tool_conf.xml.main).

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • tool_conf.xml.main

    r796 r863  
    2222  </section> 
    2323  <section name="ENCODE Tools" id="EncodeTools"> 
    24 <!--    <tool file="extract/interval2maf.xml" /> 
     24<!--    <tool file="maf/interval2maf.xml" /> 
    2525    <tool file="extract/phastOdds/phastOdds_tool.xml" /> 
    2626    <tool file="stats/aggregate_binned_scores_in_intervals.xml" /> --> 
     
    5353  </section> 
    5454  <section name="Convert Formats" id="convert"> 
    55     <tool file="filters/maf/maf_to_fasta.xml" /> 
    56     <tool file="filters/maf/maf_to_bed.xml" /> 
     55    <tool file="maf/maf_to_fasta.xml" /> 
     56    <tool file="maf/maf_to_bed.xml" /> 
    5757    <tool file="filters/gff2bed.xml" /> 
    5858    <tool file="filters/bed2gff.xml" /> 
     
    7474  </section> 
    7575  <section name="Fetch Alignments" id="fetchAlign"> 
    76     <tool file="extract/interval2maf_pairwise.xml" /> 
    77     <tool file="extract/interval2maf.xml" /> 
    78     <tool file="extract/interval_maf_to_merged_fasta.xml" /> 
    79 <!--    <tool file="extract/genebed_maf_to_fasta.xml"/> 
    80     <tool file="filters/maf/maf_stats.xml"/> --> 
    81     <tool file="filters/maf/maf_limit_to_species.xml"/> 
    82     <tool file="filters/maf/maf_limit_size.xml"/> 
    83     <tool file="filters/maf/maf_by_block_number.xml"/> 
     76    <tool file="maf/interval2maf_pairwise.xml" /> 
     77    <tool file="maf/interval2maf.xml" /> 
     78    <tool file="maf/interval_maf_to_merged_fasta.xml" /> 
     79    <tool file="maf/genebed_maf_to_fasta.xml"/> 
     80    <tool file="maf/maf_stats.xml"/> 
     81    <tool file="maf/maf_thread_for_species.xml"/> 
     82    <tool file="maf/maf_limit_to_species.xml"/> 
     83    <tool file="maf/maf_limit_size.xml"/> 
     84    <tool file="maf/maf_by_block_number.xml"/> 
     85<!--    <tool file="maf/maf_reverse_complement.xml"/> 
     86    <tool file="maf/maf_filter.xml"/> --> 
    8487  </section> 
    8588  <section name="Get Genomic Scores" id="scores"> 
  • tool_conf.xml.sample

    r848 r863  
    2222  </section> 
    2323  <section name="ENCODE Tools" id="EncodeTools"> 
    24 <!--    <tool file="extract/interval2maf.xml" /> 
     24<!--    <tool file="maf/interval2maf.xml" /> 
    2525    <tool file="extract/phastOdds/phastOdds_tool.xml" /> 
    2626    <tool file="stats/aggregate_binned_scores_in_intervals.xml" /> --> 
     
    5353  </section> 
    5454  <section name="Convert Formats" id="convert"> 
    55     <tool file="filters/maf/maf_to_fasta.xml" /> 
    56     <tool file="filters/maf/maf_to_bed.xml" /> 
     55    <tool file="maf/maf_to_fasta.xml" /> 
     56    <tool file="maf/maf_to_bed.xml" /> 
    5757    <tool file="filters/gff2bed.xml" /> 
    5858    <tool file="filters/bed2gff.xml" /> 
     
    7474  </section> 
    7575  <section name="Fetch Alignments" id="fetchAlign"> 
    76     <tool file="extract/interval2maf_pairwise.xml" /> 
    77     <tool file="extract/interval2maf.xml" /> 
    78     <tool file="extract/interval_maf_to_merged_fasta.xml" /> 
    79     <tool file="extract/genebed_maf_to_fasta.xml"/> 
    80     <tool file="filters/maf/maf_stats.xml"/> 
    81     <tool file="filters/maf/maf_thread_for_species.xml"/> 
    82     <tool file="filters/maf/maf_limit_to_species.xml"/> 
    83     <tool file="filters/maf/maf_limit_size.xml"/> 
    84     <tool file="filters/maf/maf_by_block_number.xml"/> 
    85     <tool file="filters/maf/maf_reverse_complement.xml"/> 
    86     <tool file="filters/maf/maf_filter.xml"/> 
     76    <tool file="maf/interval2maf_pairwise.xml" /> 
     77    <tool file="maf/interval2maf.xml" /> 
     78    <tool file="maf/interval_maf_to_merged_fasta.xml" /> 
     79    <tool file="maf/genebed_maf_to_fasta.xml"/> 
     80    <tool file="maf/maf_stats.xml"/> 
     81    <tool file="maf/maf_thread_for_species.xml"/> 
     82    <tool file="maf/maf_limit_to_species.xml"/> 
     83    <tool file="maf/maf_limit_size.xml"/> 
     84    <tool file="maf/maf_by_block_number.xml"/> 
     85    <tool file="maf/maf_reverse_complement.xml"/> 
     86    <tool file="maf/maf_filter.xml"/> 
    8787  </section> 
    8888  <section name="Get Genomic Scores" id="scores"> 
  • tools/maf/genebed_maf_to_fasta.xml

    r765 r863  
    11<tool id="GeneBed_Maf_Fasta2" name="Stitch Gene blocks"> 
    22  <description>given a set of coding exon intervals</description> 
    3   <command interpreter="python2.4">#if $maf_source_type.maf_source == "user":#genebed_maf_to_fasta.py $dbkey $maf_source_type.species $maf_source_type.maf_file $input1 $out_file1 $maf_source_type.maf_source 
    4 #else:#genebed_maf_to_fasta.py $dbkey $maf_source_type.species $maf_source_type.maf_identifier $input1 $out_file1 $maf_source_type.maf_source 
     3  <command interpreter="python2.4">#if $maf_source_type.maf_source == "user":#interval_maf_to_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_file --interval_file=$input1 --output_file=$out_file1 --mafSourceType=$maf_source_type.maf_source --geneBED 
     4#else:#interval_maf_to_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_identifier --interval_file=$input1 --output_file=$out_file1 --mafSourceType=$maf_source_type.maf_source  --geneBED 
    55#end if 
    66  </command> 
  • tools/maf/interval2maf.py

    r749 r863  
    2020   -o, --output_file=o:      Output MAF file 
    2121   -p, --species=p: Species to include in output 
     22   -l, --indexLocation=l: Override default maf_index.loc file 
    2223""" 
    2324 
     
    2728import bx.align.maf 
    2829import bx.intervals.io 
    29 import bx.interval_index_file 
    30 import sys, os, tempfile 
    31  
    32 MAF_LOCATION_FILE = "/depot/data2/galaxy/maf_index.loc" 
    33  
    34 def maf_index_by_uid( maf_uid ): 
    35     for line in open( MAF_LOCATION_FILE ): 
    36         try: 
    37             #read each line, if not enough fields, go to next line 
    38             if line[0:1] == "#" : continue 
    39             fields = line.split('\t') 
    40             if maf_uid == fields[1]: 
    41                 try: 
    42                     maf_files = fields[3].replace( "\n", "" ).replace( "\r", "" ).split( "," ) 
    43                     return bx.align.maf.MultiIndexed( maf_files, keep_open = True, parse_e_rows = True ) 
    44                 except Exception, e: 
    45                     raise 'MAF UID (%s) found, but configuration appears to be malformed: %s' % ( maf_uid, e ) 
    46         except: 
    47             pass 
    48     return None 
    49  
    50 #builds and returns (index, index_filename) for specified maf_file 
    51 def build_maf_index( maf_file, species = None ): 
    52     indexes = bx.interval_index_file.Indexes() 
    53     try: 
    54         maf_reader = bx.align.maf.Reader( open( maf_file ) ) 
    55         # Need to be a bit tricky in our iteration here to get the 'tells' right 
    56         while True: 
    57             pos = maf_reader.file.tell() 
    58             block = maf_reader.next() 
    59             if block is None: break 
    60             for c in block.components: 
    61                 if species is not None and c.src.split( "." )[0] not in species: 
    62                     continue 
    63                 indexes.add( c.src, c.forward_strand_start, c.forward_strand_end, pos ) 
    64         fd, index_filename = tempfile.mkstemp() 
    65         out = os.fdopen( fd, 'w' ) 
    66         indexes.write( out ) 
    67         out.close() 
    68         return ( bx.align.maf.Indexed( maf_file, index_filename = index_filename, keep_open = True, parse_e_rows = True ), index_filename ) 
    69     except: 
    70         return ( None, None ) 
     30import maf_utilities 
     31import sys 
    7132 
    7233def __main__(): 
     
    7435    mincols = 0 
    7536     
    76     # Parse Command Line 
     37    #Parse Command Line 
    7738    options, args = doc_optparse.parse( __doc__ ) 
    7839     
     
    11778        print >>sys.stderr, "Output file has not been specified." 
    11879        sys.exit() 
     80    #Finish parsing command line 
    11981     
    12082    #Open indexed access to MAFs 
    12183    if options.mafType: 
    122         index = maf_index_by_uid( options.mafType ) 
     84        if options.indexLocation: 
     85            index = maf_utilities.maf_index_by_uid( options.mafType, options.indexLocation ) 
     86        else: 
     87            index = maf_utilities.maf_index_by_uid( options.mafType ) 
    12388        if index is None: 
    12489            print >> sys.stderr, "The MAF source specified (%s) appears to be invalid." % ( options.mafType ) 
    12590            sys.exit() 
    12691    elif options.mafFile: 
    127         index, index_filename = build_maf_index( options.mafFile, species = [dbkey] ) 
     92        index, index_filename = maf_utilities.build_maf_index( options.mafFile, species = [dbkey] ) 
    12893        if index is None: 
    12994            print >> sys.stderr, "Your MAF file appears to be malformed." 
     
    13398        sys.exit() 
    13499     
     100    #Create MAF writter 
    135101    out = bx.align.maf.Writer( open(output_file, "w") ) 
    136102     
    137     # Iterate over input regions  
     103    #Iterate over input regions  
    138104    num_blocks = 0 
    139     num_lines = 0 
    140     for num_lines, region in enumerate( bx.intervals.io.NiceReaderWrapper( open( interval_file, 'r' ), chrom_col = chromCol, start_col = startCol, end_col = endCol, strand_col = strandCol, fix_strand = True, return_header = False, return_comments = False ) ): 
    141         try: 
    142             src = "%s.%s" % ( dbkey, region.chrom ) 
    143              
    144             blocks = index.get( src, region.start, region.end ) 
    145              
    146             for block in blocks:  
    147                 ref = block.get_component_by_src( src ) 
    148                 #We want our block coordinates to be from positive strand 
    149                 if ref.strand == "-": 
    150                     block = block.reverse_complement() 
    151                     ref = block.get_component_by_src( src ) 
    152                  
    153                 #save old score here for later use 
    154                 old_score =  block.score 
    155                 slice_start = max( region.start, ref.start ) 
    156                 slice_end = min( region.end, ref.end ) 
    157                  
    158                 #when interval is out-of-range (not in maf index), fail silently: else could create tons of scroll 
    159                 try: 
    160                     block = block.slice_by_component( ref, slice_start, slice_end )  
    161                 except: 
    162                     continue 
    163                      
    164                 if block.text_size > mincols: 
    165                     if region.strand != ref.strand: block = block.reverse_complement() 
    166                     # restore old score, may not be accurate, but it is better than 0 for everything 
    167                     block.score = old_score 
    168                     if species is not None: 
    169                         block = block.limit_to_species( species ) 
    170                         block.remove_all_gap_columns() 
    171                     out.write( block ) 
    172                     num_blocks += 1 
    173         except Exception, e: 
    174             print "Error found on input line %s: %s." % ( num_lines, e ) 
    175             continue 
     105    num_regions = None 
     106    for num_regions, region in enumerate( bx.intervals.io.NiceReaderWrapper( open( interval_file, 'r' ), chrom_col = chromCol, start_col = startCol, end_col = endCol, strand_col = strandCol, fix_strand = True, return_header = False, return_comments = False ) ): 
     107        src = "%s.%s" % ( dbkey, region.chrom ) 
     108        for block in maf_utilities.get_chopped_blocks_for_region( index, src, region, species, mincols ): 
     109            out.write( block ) 
     110            num_blocks += 1 
    176111     
    177     # Close output MAF 
     112    #Close output MAF 
    178113    out.close() 
    179114     
    180115    #remove index file if created during run 
    181     if index_filename is not None: 
    182         os.unlink( index_filename ) 
     116    maf_utilities.remove_temp_index_file( index_filename ) 
    183117     
    184     print "%s MAF blocks extracted." % num_blocks 
     118    if num_blocks: 
     119        print "%i MAF blocks extracted for %i regions." % ( num_blocks, ( num_regions + 1 ) ) 
     120    elif num_regions is not None: 
     121        print "No MAF blocks could be extracted for %i regions." % ( num_regions + 1 ) 
     122    else: 
     123        print "No valid regions have been provided." 
    185124     
    186125if __name__ == "__main__": __main__() 
  • tools/maf/interval2maf_pairwise.xml

    r760 r863  
    11<tool id="Interval2Maf_pairwise1" name="Extract Pairwise MAF blocks"> 
    22  <description>given a set of genomic intervals</description> 
    3   <command interpreter="python2.4">interval2maf_pairwise.py --dbkey=$dbkey --chromCol=$input1_chromCol --startCol=$input1_startCol --endCol=$input1_endCol --strandCol=$input1_strandCol --mafType=$mafType --interval_file=$input1 --output_file=$out_file1</command> 
     3  <command interpreter="python2.4">interval2maf.py --dbkey=$input1_dbkey --chromCol=$input1_chromCol --startCol=$input1_startCol --endCol=$input1_endCol --strandCol=$input1_strandCol --mafType=$mafType --interval_file=$input1 --output_file=$out_file1 --indexLocation=/depot/data2/galaxy/maf_pairwise.loc</command> 
    44  <inputs> 
    55    <param name="input1" type="data" format="interval" label="Interval File"/> 
  • tools/maf/interval_maf_to_merged_fasta.xml

    r765 r863  
    11<tool id="Interval_Maf_Merged_Fasta2" name="Stitch MAF blocks"> 
    22  <description>given a set of genomic intervals</description> 
    3   <command interpreter="python2.4">#if $maf_source_type.maf_source == "user":#interval_maf_to_merged_fasta.py $dbkey $maf_source_type.species $maf_source_type.maf_file $input1 $out_file1 $input1_chromCol $input1_startCol $input1_endCol $input1_strandCol $maf_source_type.maf_source 
    4 #else:#interval_maf_to_merged_fasta.py $dbkey $maf_source_type.species $maf_source_type.maf_identifier $input1 $out_file1 $input1_chromCol $input1_startCol $input1_endCol $input1_strandCol $maf_source_type.maf_source 
     3  <command interpreter="python2.4">#if $maf_source_type.maf_source == "user":#interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_file --interval_file=$input1 --output_file=$out_file1 --chromCol=$input1_chromCol --startCol=$input1_startCol --endCol=$input1_endCol --strandCol=$input1_strandCol --mafSourceType=$maf_source_type.maf_source 
     4#else:#interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_identifier --interval_file=$input1 --output_file=$out_file1 --chromCol=$input1_chromCol --startCol=$input1_startCol --endCol=$input1_endCol --strandCol=$input1_strandCol --mafSourceType=$maf_source_type.maf_source 
    55#end if 
    66  </command> 
  • tools/maf/maf_by_block_number.py

    r740 r863  
    3030            continue 
    3131        try: 
    32             for count, m in enumerate( bx.align.maf.Reader( open( input_maf_filename, 'r' ) ) ): 
     32            for count, block in enumerate( bx.align.maf.Reader( open( input_maf_filename, 'r' ) ) ): 
    3333                if count == block_wanted: 
    34                     maf_writer.write( m
     34                    maf_writer.write( block
    3535                    break 
    3636        except: 
  • tools/maf/maf_stats.py

    r742 r863  
    55""" 
    66 
    7 import sys, tempfile, os 
     7import sys 
    88import pkg_resources; pkg_resources.require( "bx-python" ) 
    9 import bx.align.maf 
    109import bx.intervals.io 
    11 import bx.interval_index_file 
    12 import psyco_full 
    1310from numpy import zeros 
    14  
    15 MAF_LOCATION_FILE = "/depot/data2/galaxy/maf_index.loc" 
    16  
    17 def maf_index_by_uid( maf_uid ): 
    18     for line in open( MAF_LOCATION_FILE ): 
    19         try: 
    20             #read each line, if not enough fields, go to next line 
    21             if line[0:1] == "#" : continue 
    22             fields = line.split('\t') 
    23             if maf_uid == fields[1]: 
    24                 try: 
    25                     maf_files = fields[3].replace( "\n", "" ).replace( "\r", "" ).split( "," ) 
    26                     return bx.align.maf.MultiIndexed( maf_files, keep_open = True, parse_e_rows = False ) 
    27                 except Exception, e: 
    28                     raise 'MAF UID (%s) found, but configuration appears to be malformed: %s' % ( maf_uid, e ) 
    29         except: 
    30             pass 
    31     return None 
    32  
    33 #builds and returns (index, index_filename) for specified maf_file 
    34 def build_maf_index( maf_file, species = None ): 
    35     indexes = bx.interval_index_file.Indexes() 
    36     try: 
    37         maf_reader = bx.align.maf.Reader( open( maf_file ) ) 
    38         # Need to be a bit tricky in our iteration here to get the 'tells' right 
    39         while True: 
    40             pos = maf_reader.file.tell() 
    41             block = maf_reader.next() 
    42             if block is None: break 
    43             for c in block.components: 
    44                 if species is not None and c.src.split( "." )[0] not in species: 
    45                     continue 
    46                 indexes.add( c.src, c.forward_strand_start, c.forward_strand_end, pos ) 
    47         fd, index_filename = tempfile.mkstemp() 
    48         out = os.fdopen( fd, 'w' ) 
    49         indexes.write( out ) 
    50         out.close() 
    51         return ( bx.align.maf.Indexed( maf_file, index_filename = index_filename, keep_open = True, parse_e_rows = False ), index_filename ) 
    52     except: 
    53         return ( None, None ) 
    54  
     11import maf_utilities 
    5512 
    5613def __main__(): 
     
    7431    if maf_source_type == "user": 
    7532        #index maf for use here 
    76         index, index_filename = build_maf_index( input_maf_filename, species = [dbkey] ) 
     33        index, index_filename = maf_utilities.build_maf_index( input_maf_filename, species = [dbkey] ) 
    7734        if index is None: 
    7835            print >>sys.stderr, "Your MAF file appears to be malformed." 
     
    8037    elif maf_source_type == "cached": 
    8138        #access existing indexes 
    82         index = maf_index_by_uid( input_maf_filename ) 
     39        index = maf_utilities.maf_index_by_uid( input_maf_filename ) 
    8340        if index is None: 
    8441            print >> sys.stderr, "The MAF source specified (%s) appears to be invalid." % ( input_maf_filename ) 
     
    9956        coverage = { dbkey: zeros( region.end - region.start, dtype = bool ) } 
    10057         
    101         blocks = index.get( src, region.start, region.end ) 
    102         for maf in blocks: 
     58        for block in maf_utilities.get_chopped_blocks_for_region( index, src, region, force_strand='+' ): 
    10359            #make sure all species are known 
    104             for c in maf.components: 
     60            for c in block.components: 
    10561                spec = c.src.split( '.' )[0] 
    10662                if spec not in coverage: coverage[spec] = zeros( region.end - region.start, dtype = bool ) 
    107             #slice maf by start and end 
    108             ref = maf.get_component_by_src( src ) 
    109             # If the reference component is on the '-' strand we should complement the interval 
    110             if ref.strand == '-': 
    111                 maf = maf.reverse_complement() 
    112                 ref = maf.get_component_by_src( src ) 
    113             slice_start = max( region.start, ref.start ) 
    114             slice_end = min( region.end, ref.end ) 
    115             try: 
    116                 maf = maf.slice_by_component( ref, slice_start, slice_end ) 
    117             except: 
    118                 continue 
    119             ref = maf.get_component_by_src( ref.src ) 
    120              
     63            ref = block.get_component_by_src( src ) 
    12164            #skip gap locations due to insertions in secondary species relative to primary species 
    122             start_offset = slice_start - region.start 
     65            start_offset = ref.start - region.start 
    12366            num_gaps = 0 
    12467            for i in range( len( ref.text.rstrip().rstrip( "-" ) ) ): 
     
    12770                    continue 
    12871                #Toggle base if covered 
    129                 for comp in maf.components: 
     72                for comp in block.components: 
    13073                    spec = comp.src.split( '.' )[0] 
    13174                    if comp.text and comp.text[i] not in ['-']: 
     
    15093    out.close() 
    15194    print "%i regions were processed with a total length of %i." % ( num_region, total_length ) 
    152     if index_filename is not None: 
    153         os.unlink( index_filename ) 
     95    maf_utilities.remove_temp_index_file( index_filename ) 
     96 
    15497if __name__ == "__main__": __main__() 
  • tools/maf/maf_thread_for_species.py

    r761 r863  
    4242                m.score = 0.0  
    4343                maf_writer.write( m ) 
    44     except
    45         print >> sys.stderr, "Error steping through MAF File" 
     44    except Exception, e
     45        print >> sys.stderr, "Error steping through MAF File: %s" % e 
    4646        sys.exit() 
    4747    maf_reader.close() 
  • tools/maf/maf_to_bed.py

    r740 r863  
    44Read a maf and output intervals for specified list of species. 
    55""" 
    6  
    7 from __future__ import division 
    8  
    9 import textwrap 
    10 import sys, tempfile, os 
     6import sys, os 
    117import pkg_resources; pkg_resources.require( "bx-python" ) 
    128from bx.align import maf 
  • tools/maf/maf_to_fasta_concat.py

    r740 r863  
    77""" 
    88#Dan Blankenberg 
    9 from __future__ import division 
    10  
    11 import textwrap 
    129import sys 
    1310import pkg_resources; pkg_resources.require( "bx-python" ) 
    1411from bx.align import maf 
     12import maf_utilities 
    1513 
    1614def __main__(): 
     
    1816     
    1917    texts = {} 
    20  
     18     
    2119    input_filename = sys.argv[2] 
    2220    output_filename = sys.argv[3] 
     
    2422     
    2523    if "None" in species: 
    26         species = get_species( input_filename ) 
     24        species = maf_utilities.get_species_in_maf( input_filename ) 
    2725     
    2826    file_out = open( output_filename, 'w' ) 
     
    3028        file_out.write( ">" + spec + "\n" ) 
    3129        try: 
    32             for m in maf.Reader( open( input_filename, 'r' ) ): 
    33                 c = m.get_component_by_src_start( spec )  
    34                 if c: file_out.write( c.text ) 
     30            for block in maf.Reader( open( input_filename, 'r' ) ): 
     31                component = block.get_component_by_src_start( spec ) 
     32                if component: file_out.write( component.text ) 
    3533                else: file_out.write( "-" * m.text_size ) 
    3634        except: 
     
    4038    file_out.close() 
    4139 
    42 def get_species( maf_filename ): 
    43         try: 
    44             species={} 
    45              
    46             file_in = open( maf_filename, 'r' ) 
    47             maf_reader = maf.Reader( file_in ) 
    48              
    49             for i, m in enumerate( maf_reader ): 
    50                 l = m.components 
    51                 for c in l: 
    52                     spec, chrom = maf.src_split( c.src ) 
    53                     if not spec or not chrom: 
    54                         spec = chrom = c.src 
    55                     species[spec] = spec 
    56              
    57             file_in.close() 
    58              
    59             species = species.keys() 
    60             species.sort() 
    61             return species 
    62         except: 
    63             return [] 
    6440 
    6541if __name__ == "__main__": __main__() 
  • tools/maf/maf_to_fasta_multiple_sets.py

    r740 r863  
    55""" 
    66#Dan Blankenberg 
    7 from __future__ import division 
    8  
    9 import textwrap 
    107import sys 
    118import pkg_resources; pkg_resources.require( "bx-python" ) 
     
    1411def __main__(): 
    1512    print "Restricted to species:", sys.argv[3] 
    16          
     13     
    1714    input_filename = sys.argv[1] 
    1815    output_filename = sys.argv[2] 
     
    2724        file_out = open( output_filename, 'w' ) 
    2825         
    29         block_num = -1 
    30          
    31         for i, m in enumerate( maf_reader ): 
    32             block_num += 1 
     26        for block_num, block in enumerate( maf_reader ): 
    3327            if "None" not in species: 
    34                 m = m.limit_to_species( species ) 
    35             l = m.components 
    36             if len(l) < num_species and partial == "partial_disallowed": continue 
    37             for c in l: 
    38                 spec, chrom = maf.src_split( c.src ) 
     28                block = block.limit_to_species( species ) 
     29            if len( block.components ) < num_species and partial == "partial_disallowed": continue 
     30            for component in block.components: 
     31                spec, chrom = maf.src_split( component.src ) 
    3932                if not spec or not chrom: 
    40                         spec = chrom = c.src 
    41                 file_out.write( ">" + c.src + "(" + c.strand + "):" + str( c.start ) + "-" + str( c.end ) + "|" + spec + "_" + str( block_num ) + "\n" ) 
    42                 file_out.write( c.text + "\n" ) 
     33                        spec = chrom = component.src 
     34                file_out.write( ">" + component.src + "(" + component.strand + "):" + str( component.start ) + "-" + str( component.end ) + "|" + spec + "_" + str( block_num ) + "\n" ) 
     35                file_out.write( component.text + "\n" ) 
    4336            file_out.write( "\n" ) 
    4437        file_in.close()