Changeset 511:635e3f5fc1c6

Show
Ignore:
Timestamp:
08/03/07 12:37:44 (1 year ago)
Author:
Dan Blankenberg <dan@bx.psu.edu>
branch:
default
convert_revision:
svn:9bcadc22-80f8-0310-8a53-c8f022958886/galaxy/trunk@1832
Message:

Add the ability for secondary (child) and new primary datasets to be collected automatically. Datasets can now be set to not be visible, which will prevent their display in a user's history.

To have a child dataset harvested automatically, simply name the file:
child_PARENTID_DESIGNATION_VISIBILITY_EXT (To make a new primary dataset, simply use the form: primary_ASSOCIATEDWITHDATASETID_DESIGNATION_VISIBILITY_EXT)
and place this file in the directory specified by $new_file_path

For example:
You define the command in the tool XML as:
<command interpreter="python2.4">some_command.py $input1 $out_file1 $out_file1.id $new_file_path</command>

Suppose the input dataset is 1 and the output dataset is 2, the commandline becomes:
python2.4 some_command.py ./database/files/dataset_1.dat ./database/files/dataset_2.dat 2 ./database/tmp

In addition to the primary file (a HTML file), this program creates files in the ./database/tmp directory named:
child_2_SomeImage_invisible_jpg
child_2_SomeText_visible_text

These files are discovered and added as children to the appropriate dataset. The text file will appear in the user's history, but the jpg will not.

All files can be viewed however, by making links in the primary (HTML) history item like:
<img src="display_child?parent_id=2&designation=SomeImage?" alt="Some Image"/>
<a href="display_child?parent_id=2&designation=SomeText?">Some Text</a>

Designations need to be unique for each originally declared output file, simply using a counter can work well.

These new files can be accessed in the exec_after_process hook under param_dict__collected_datasets__?.

Note the update to the universe_wsgi.ini.sample file.

Some database changes are required:
ALTER TABLE dataset ADD visible boolean;
UPDATE dataset SET visible = true WHERE visible is NULL;

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • lib/galaxy/config.py

    r475 r511  
    2626        self.database_connection =  kwargs.get( "database_connection", False ) 
    2727        self.file_path = resolve_path( kwargs.get( "file_path", "database/files" ), self.root ) 
     28        self.new_file_path = resolve_path( kwargs.get( "new_file_path", "database/tmp" ), self.root ) 
    2829        self.tool_path = resolve_path( kwargs.get( "tool_path", "tools" ), self.root ) 
    2930        self.test_conf = resolve_path( kwargs.get( "test_conf", "" ), self.root ) 
  • lib/galaxy/jobs/__init__.py

    r472 r511  
    287287        param_dict = dict( [ ( p.name, p.value ) for p in job.parameters ] ) 
    288288        param_dict = self.tool.params_from_strings( param_dict, self.app ) 
     289        # Create generated output children and primary datasets and add to param_dict 
     290        collected_datasets = {'children':self.tool.collect_child_datasets(out_data),'primary':self.tool.collect_primary_datasets(out_data)} 
     291        param_dict.update({'__collected_datasets__':collected_datasets}) 
     292        # Call 'exec_after_process' hook 
    289293        self.tool.call_hook( 'exec_after_process', self.queue.app, inp_data=inp_data,  
    290294                             out_data=out_data, param_dict=param_dict,  
  • lib/galaxy/model/__init__.py

    r483 r511  
    144144    def __init__( self, id=None, hid=None, name=None, info=None, blurb=None, peek=None, extension=None,  
    145145                  dbkey=None, state=None, metadata=None, history=None, parent_id=None, designation=None, 
    146                   validation_errors=None ): 
     146                  validation_errors=None, visible=True ): 
    147147        self.name = name or "Unnamed dataset" 
    148148        self.id = id 
     
    159159        self.deleted = False 
    160160        self.purged = False 
     161        self.visible = visible 
    161162        # Relationships 
    162163        self.history = history 
     
    268269        #             if designation == data.designation: 
    269270        #                 return data 
    270         for child_assocation in self.children: 
     271        for child_association in self.children: 
    271272            if child_association.designation == designation: 
    272                 return child 
     273                return child_association.child 
    273274        return None 
    274275    def purge( self ): 
  • lib/galaxy/model/mapping.py

    r485 r511  
    7474    Column( "deleted", Boolean ), 
    7575    Column( "purged", Boolean ), 
     76    Column( "visible", Boolean ), 
    7677    ForeignKeyConstraint(['parent_id'],['dataset.id'], ondelete="CASCADE") ) 
    7778 
  • lib/galaxy/tools/__init__.py

    r483 r511  
    77pkg_resources.require( "simplejson" ) 
    88 
    9 import logging, os, string, sys, tempfile 
     9import logging, os, string, sys, tempfile, glob, shutil 
    1010import simplejson 
    1111import sha, hmac, binascii 
     
    783783                key = "_CHILD___%s___%s" % ( name, child.designation )  
    784784                param_dict[ key ] = DatasetFilenameWrapper( child ) 
    785         # Return the dictionary of parameters         
     785        # We add access to app here, this allows access to app.config, etc 
     786        param_dict['__app__'] = RawObjectWrapper( self.app ) 
     787        param_dict['__new_file_path__'] = self.app.config.new_file_path #More convienent access to this value; we don't need to wrap a string 
     788        # Return the dictionary of parameters 
    786789        return param_dict 
    787790     
     
    860863            raise  
    861864         
     865    def collect_child_datasets( self, output): 
     866        children = {} 
     867        #Loop through output file names, looking for generated children in form of 'child_parentId_designation_visibility_extension' 
     868        for name, outdata in output.items(): 
     869            for filename in glob.glob(os.path.join(self.app.config.new_file_path,"child_%i_*" % outdata.id) ): 
     870                if not name in children: 
     871                    children[name] = {} 
     872                fields = os.path.basename(filename).split("_") 
     873                fields.pop(0) 
     874                parent_id = int(fields.pop(0)) 
     875                designation = fields.pop(0) 
     876                visible = fields.pop(0).lower() 
     877                if visible == "visible": visible = True 
     878                else: visible = False 
     879                ext = fields.pop(0).lower() 
     880                # Create new child dataset 
     881                child_data = self.app.model.Dataset(extension=ext, parent_id=parent_id, designation=designation, visible=visible, dbkey=outdata.dbkey) 
     882                child_data.flush() 
     883                # Move data from temp location to dataset location 
     884                shutil.move(filename, child_data.file_name) 
     885                child_data.name = "Secondary Dataset (%s)" % (designation) 
     886                child_data.state = child_data.states.OK 
     887                child_data.init_meta() 
     888                child_data.set_peek() 
     889                child_data.flush() 
     890                # Add to child accociation table 
     891                assoc = self.app.model.DatasetChildAssociation() 
     892                assoc.child = child_data 
     893                assoc.designation = child_data.designation 
     894                outdata.children.append( assoc ) 
     895                # Add child to return dict  
     896                children[name][designation] = child_data 
     897        return children 
     898         
     899    def collect_primary_datasets( self, output): 
     900        primary_datasets = {} 
     901        #Loop through output file names, looking for generated primary datasets in form of 'primary_associatedWithDatasetID_designation_visibility_extension' 
     902        for name, outdata in output.items(): 
     903            for filename in glob.glob(os.path.join(self.app.config.new_file_path,"primary_%i_*" % outdata.id) ): 
     904                if not name in primary_datasets: 
     905                    primary_datasets[name] = {} 
     906                fields = os.path.basename(filename).split("_") 
     907                fields.pop(0) 
     908                parent_id = int(fields.pop(0)) 
     909                designation = fields.pop(0) 
     910                visible = fields.pop(0).lower() 
     911                if visible == "visible": visible = True 
     912                else: visible = False 
     913                ext = fields.pop(0).lower() 
     914                # Create new primary dataset 
     915                primary_data = self.app.model.Dataset(extension=ext, designation=designation, visible=visible, dbkey=outdata.dbkey) 
     916                primary_data.flush() 
     917                self.app.model.History.get(outdata.history_id).add_dataset(primary_data) 
     918                # Move data from temp location to dataset location 
     919                shutil.move(filename, primary_data.file_name) 
     920                primary_data.name = outdata.name 
     921                primary_data.info = outdata.info 
     922                primary_data.state = primary_data.states.OK 
     923                primary_data.init_meta(copy_from=outdata) 
     924                primary_data.set_peek() 
     925                primary_data.flush() 
     926                # Add dataset to return dict  
     927                primary_datasets[name][designation] = primary_data 
     928        return primary_datasets 
     929 
     930         
    862931# ---- Utility classes to be factored out ----------------------------------- 
    863932         
     
    866935        self.value = value 
    867936         
     937 
     938class RawObjectWrapper( object ): 
     939    """ 
     940    Wraps an object so that __str__ returns module_name:class_name. 
     941    """ 
     942    def __init__( self, obj ): 
     943        self.obj = obj 
     944    def __str__( self ): 
     945        return "%s:%s" % (self.obj.__module__, self.obj.__class__.__name__) 
     946    def __getattr__( self, key ): 
     947        return getattr( self.obj, key ) 
     948 
    868949class InputValueWrapper( object ): 
    869950    """ 
  • lib/galaxy/web/controllers/root.py

    r490 r511  
    122122        else: 
    123123            return "No data with id=%d" % id 
     124 
     125    @web.expose 
     126    def display_child(self, trans, parent_id=None, designation=None, tofile=None, toext=".txt"): 
     127        """ 
     128        Returns child data directly into the browser, based upon parent_id and designation. 
     129        """ 
     130        try: 
     131            data = self.app.model.Dataset.get( parent_id ) 
     132            if data: 
     133                child = data.get_child_by_designation(designation) 
     134                if child: 
     135                    return self.display(trans, id=child.id, tofile=tofile, toext=toext) 
     136        except Exception: 
     137            pass 
     138        return "A child named %s could not be found for data %s" % ( designation, parent_id ) 
    124139 
    125140    @web.expose 
  • templates/history.tmpl

    r416 r511  
    258258                    #if $data.has_data: 
    259259                        <a href="display?id=$data.id&tofile=yes&toext=$data.ext" target="_blank">save</a> 
    260                        #for $display_app in $data.datatype.get_display_types(): 
     260                        #for $display_app in $data.datatype.get_display_types(): 
    261261                            #set $display_links = $data.datatype.get_display_links($data, $display_app, $app, $request.base) 
    262262                            #if $len($display_links) > 0: 
     
    277277            ## Child datasets 
    278278            ##  
    279                                
     279             
    280280            #if $len( $data.children ) > 0: 
    281                 <div> 
    282                     There are ${len( $data.children )} secondary datasets. 
    283                     #for $idx, $child_assoc in $enumerate($data.children) 
    284                                                         #set $child = $child_assoc.child 
    285                                                         $render_dataset( $child, $idx + 1 ) 
    286                                         #end for 
    287                                 </div> 
    288                         #end if 
    289                                                              
    290             </div> 
     281                #set $children = [] 
     282                #for $child_assoc in $data.children: 
     283                    #if $child_assoc.child.visible: 
     284                        $children.append($child_assoc.child) 
     285                    #end if 
     286                #end for 
     287                #if $len( $children ) > 0: 
     288                    <div> 
     289                        There are ${len( $children )} secondary datasets. 
     290                        #for $idx, $child in $enumerate($children) 
     291                            $render_dataset( $child, $idx + 1 ) 
     292                        #end for 
     293                    </div> 
     294                #end if 
     295            #end if 
     296                                 
     297        </div> 
    291298    </div> 
    292299 
     
    300307    ## Render all active (not deleted) datasets, ordered from newest to oldest 
    301308    #for $data in reversed( $history.active_datasets ) 
    302         $render_dataset( $data, $data.hid ) 
     309        #if $data.visible: 
     310            $render_dataset( $data, $data.hid ) 
     311        #end if 
    303312    #end for 
    304313#end if 
  • universe_wsgi.ini.sample

    r488 r511  
    3030# Where dataset files are saved 
    3131file_path = database/files 
     32# Temporary storage for additional datasets, this should be shared through the cluster 
     33new_file_path = database/tmp 
    3234 
    3335# Tools