pax_global_header00006660000000000000000000000064140372406610014515gustar00rootroot0000000000000052 comment=16a5356e4bd8326180b8f0dea23d4da99cbc671f CTDConverter-3.0a1/000077500000000000000000000000001403724066100141035ustar00rootroot00000000000000CTDConverter-3.0a1/.gitignore000066400000000000000000000005241403724066100160740ustar00rootroot00000000000000*.py[cod] # C extensions *.so # Packages *.egg *.egg-info build eggs parts bin var sdist develop-eggs .installed.cfg lib lib64 __pycache__ # Installer logs pip-log.txt # Unit test / coverage reports .coverage .tox nosetests.xml # Translations *.mo # Mr Developer .mr.developer.cfg .project .pydevproject # ignore PyCharm stuff .idea CTDConverter-3.0a1/.settings/000077500000000000000000000000001403724066100160215ustar00rootroot00000000000000CTDConverter-3.0a1/.settings/org.eclipse.core.resources.prefs000066400000000000000000000001211403724066100242260ustar00rootroot00000000000000eclipse.preferences.version=1 encoding//galaxyconfiggenerator/generator.py=utf-8 CTDConverter-3.0a1/.travis.yml000066400000000000000000000014541403724066100162200ustar00rootroot00000000000000language: python branches: only: - master python: - "3.6" - "3.7" - "3.8" # - "3.9" # not yet, due to Galaxy cache: pip addons: apt: update: true before_install: - sudo apt -y install libhdf5-dev install: - pip install git+https://github.com/WorkflowConversion/CTDopts - pip install lxml ruamel.yaml planemo - pip install flake8 flake8-import-order - cd $TRAVIS_BUILD_DIR - make install install-dep script: - flake8 - make diff_pydocstyle_report - make test - planemo l tests/test-data/ # planemo test content of tests/test-data (this is OK, because the previous # tests ensure equality of the xmls that are generated and those in the package) - export PATH=$(pwd)/tests/test-data:$PATH && planemo t tests/test-data/ # after_script: # - python -m codecov CTDConverter-3.0a1/LICENSE000066400000000000000000000020511403724066100151060ustar00rootroot00000000000000Copyright 2019, WorkflowConversion group Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. CTDConverter-3.0a1/Makefile000066400000000000000000000111241403724066100155420ustar00rootroot00000000000000# This file is part of CTDConverter, # https://github.com/WorkflowConversion/CTDConverter/, and is # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. MODULE=ctdconverter # `SHELL=bash` doesn't work for some, so don't use BASH-isms like # `[[` conditional expressions. PYSOURCES=$(wildcard tests/*.py) setup.py $(shell find ${MODULE} -name "*.py") DEVPKGS=diff_cover black pylint coverage pep257 pydocstyle flake8 mypy\ pytest-xdist isort wheel autoflake DEBDEVPKGS=pep8 python-autopep8 pylint python-coverage pydocstyle sloccount \ python-flake8 python-mock shellcheck VERSION=$(shell grep __version__ ctdconverter/__init__.py | awk '{ print $3 }') ## all : default task all: install ## help : print this help message and exit help: Makefile @sed -n 's/^##//p' $< | sed 's/$${MODULE}/${MODULE}/g' ## install-dep : install most of the development dependencies via pip install-dep: install-dependencies install-dependencies: pip install --upgrade $(DEVPKGS) #pip install -r requirements.txt ## install-deb-dep: install most of the dev dependencies via apt-get install-deb-dep: sudo apt-get install $(DEBDEVPKGS) ## install : install the ${MODULE} module and any scripts install: FORCE pip install . ## dev : install the ${MODULE} module in dev mode dev: install-dep pip install -e . ## dist : create a module package for distribution dist: dist/${MODULE}-$(VERSION).tar.gz dist/${MODULE}-$(VERSION).tar.gz: $(SOURCES) ./setup.py sdist bdist_wheel # ## docs : make the docs # docs: FORCE # cd docs && $(MAKE) html ## clean : clean up all temporary / machine-generated files clean: FORCE rm -f ${MODULE}/*.pyc tests/*.pyc ./setup.py clean --all || true rm -Rf .coverage rm -f diff-cover.html # Linting and code style related targets ## isort: sort inputs using https://github.com/timothycrosley/isort sort_imports: isort ${MODULE}/*.py tests/*.py setup.py remove_unused_imports: $(PYSOURCES) autoflake --in-place --remove-all-unused-imports $^ pep257: pydocstyle ## pydocstyle : check Python code style pydocstyle: $(PYSOURCES) pydocstyle --add-ignore=D100,D101,D102,D103 $^ || true pydocstyle_report.txt: $(filter-out tests/%,${PYSOURCES}) pydocstyle setup.py $^ > $@ 2>&1 || true diff_pydocstyle_report: pydocstyle_report.txt diff-quality --compare-branch=master --violations=pydocstyle --fail-under=100 $^ ## format : check/fix all code indentation and formatting (runs black) format: $(PYSOURCES) black $^ ## pylint : run static code analysis on Python code pylint: $(PYSOURCES) pylint --msg-template="{path}:{line}: [{msg_id}({symbol}), {obj}] {msg}" \ $^ -j0|| true pylint_report.txt: ${PYSOURCES} pylint --msg-template="{path}:{line}: [{msg_id}({symbol}), {obj}] {msg}" \ $^ -j0> $@ || true diff_pylint_report: pylint_report.txt diff-quality --violations=pylint pylint_report.txt .coverage: testcov coverage: .coverage coverage report coverage.xml: .coverage coverage xml coverage.html: htmlcov/index.html htmlcov/index.html: .coverage coverage html @echo Test coverage of the Python code is now in htmlcov/index.html coverage-report: .coverage coverage report diff-cover: coverage.xml diff-cover $^ diff-cover.html: coverage.xml diff-cover $^ --html-report $@ ## test : run the ${MODULE} test suite test: $(pysources) python setup.py test ## testcov : run the ${MODULE} test suite and collect coverage testcov: $(pysources) python setup.py test --addopts "--cov ${MODULE} -n auto --dist=loadfile" sloccount.sc: ${PYSOURCES} Makefile sloccount --duplicates --wide --details $^ > $@ ## sloccount : count lines of code sloccount: ${PYSOURCES} Makefile sloccount $^ list-author-emails: @echo 'name, E-Mail Address' @git log --format='%aN,%aE' | sort -u | grep -v 'root' mypy: $(filter-out setup.py,${PYSOURCES}) mypy --disallow-untyped-calls \ --warn-redundant-casts \ $^ shellcheck: create-galaxy-tests.sh shellcheck $^ FORCE: # Use this to print the value of a Makefile variable # Example `make print-VERSION` # From https://www.cmcrossroads.com/article/printing-value-makefile-variable print-% : ; @echo $* = $($*) CTDConverter-3.0a1/README.md000066400000000000000000000306151403724066100153670ustar00rootroot00000000000000# CTDConverter Given one or more CTD files, `CTDConverter` generates the needed wrappers to include them in workflow engines, such as Galaxy and CWL. ## Dependencies `CTDConverter` has the following python dependencies: - [CTDopts] - `lxml` - `ruamel.yaml` - Python 3.6+ Python 2.x is no longer supported as of release 3.0.0 ### Installing Dependencies We recommend the use of `conda` to manage all dependencies. If you're not sure what `conda` is, make sure to read about the [conda project](https://conda.io/docs/index.html). The easiest way to get you started with CTD conversion is to create a `conda` environment on which you'll install all dependencies. Using environments in `conda` allows you to have parallel, independent python environments, thus avoiding conflicts between libraries. If you haven't installed `conda`, check [conda-install](conda's installation guide). See [this guide](https://conda.io/docs/user-guide/tasks/manage-environments.html) if you need help creating a `conda` environment. Once you've installed `conda`, create an environment named `ctd-converter`, like so: ```sh $ conda create --name ctd-converter ``` You will now need to *activate* the environment by executing the following command: ```sh $ source activate ctd-converter ``` Install the required dependencies as follows (the order of execution **is actually important**, due to transitive dependencies): ```sh $ conda install --channel workflowconversion ctdopts $ conda install lxml $ conda install --channel conda-forge ruamel.yaml $ conda install libxml2=2.9.2 ``` `lxml` depends on `libxml2`. When you install `lxml` you'll get the latest version of `libxml2` (2.9.4) by default. You would usually want the latest version, but there is, however, a bug in validating XML files against a schema in this version of `libxml2`. If you require validation of input CTDs against a schema (which we recommend), you will need to downgrade to the latest known version of `libxml2` that works, namely, 2.9.2. You could just download dependencies manually and make them available through your `PYTHONPATH` environment variable, if you're into that. To get more information about how to install python modules without using `conda`, visit: https://docs.python.org/2/install/. ## How to install `CTDConverter` `CTDConverter` is not a python module, rather, a series of scripts, so installing it is as easy as downloading the source code from https://github.com/genericworkflownodes/CTDConverter. Once you've installed all dependencies, downloaded `CTDConverter` and activated your `conda` environment, you're good to go. ## Usage The first thing that you need to tell `CTDConverter` is the output format of the converted wrappers. `CTDConverter` supports conversion of CTDs into Galaxy and CWL. Invoking it is as simple as follows: $ python convert.py [FORMAT] [ADDITIONAL_PARAMETERS ...] Here `[FORMAT]` can be any of the supported formats (i.e., `cwl`, `galaxy`). `CTDConverter` offers a series of format-specific scripts and we've designed these scripts to behave *somewhat* similarly. All converter scripts have the same core functionality, that is, read CTD files, parse them using [CTDopts], validate against a schema, etc. Of course, each converter script might add extra functionality that is not present in other engines. Only the Galaxy converter script supports generation of a `tool_conf.xml` file, for instance. The following sections in this file describe the parameters that all converter scripts share. Please refer to the detailed documentation for each of the converters for more information: - [Generation of Galaxy ToolConfig files](galaxy/README.md) - [Generation of CWL CommandLineTool descriptions](ctdconverter/cwl/README.md) ## Fail Policy while processing several Files `CTDConverter` can parse several CTDs and convert them. However, the process will be interrupted and an error code will be returned at the first encountered error (e.g., a CTD is not valid, there are missing support files, etc.). ## Converting a single CTD In its simplest form, the converter takes an input CTD file and generates an output file. The following usage of `CTDConverter`: $ python convert.py [FORMAT] -i /data/sample_input.ctd -o /data/sample_output.xml will parse `/data/sample_input.ctd` and generate an appropriate converted file under `/data/sample_output.xml`. The generated file can be added to your workflow engine as usual. ## Converting several CTDs When converting several CTDs, the expected value for the `-o`/`--output` parameter is a folder. For example: $ python convert.py [FORMAT] -i /data/ctds/one.ctd /data/ctds/two.ctd -o /data/converted-files Will convert `/data/ctds/one.ctd` into `/data/converted-files/one.[EXT]` and `/data/ctds/two.ctd` into `/data/converted-files/two.[EXT]`. Each converter has a preferred extension, here shown as a variable (`[EXT]`). Galaxy prefers `xml`, while CWL prefers `cwl`. You can use wildcard expansion, as supported by most modern operating systems: $ python convert.py [FORMAT] -i /data/ctds/*.ctd -o /data/converted-files ## Common Parameters ### Input File(s) * Purpose: Provide input CTD file(s) to convert. * Short/long version: `-i` / `--input` * Required: yes. * Taken values: a list of input CTD files. Examples: Any of the following invocations will convert `/data/input_one.ctd` and `/data/input_two.ctd`: $ python convert.py [FORMAT] -i /data/input_one.ctd -i /data/input_two.ctd -o /data/generated $ python convert.py [FORMAT] -i /data/input_one.ctd /data/input_two.ctd -o /data/generated $ python convert.py [FORMAT] --input /data/input_one.ctd /data/input_two.ctd -o /data/generated $ python convert.py [FORMAT] --input /data/input_one.ctd --input /data/input_two.ctd -o /data/generated The following invocation will convert `/data/input.ctd` into `/data/output.xml`: $ python convert.py [FORMAT] -i /data/input.ctd -o /data/output.xml Of course, you can also use wildcards, which will be automatically expanded by any modern operating system. This is extremely useful if you want to convert several files at a time. Let's assume that the folder `/data/ctds` contains three files: `input_one.ctd`, `input_two.ctd` and `input_three.ctd`. The following two invocations will produce the same output in the `/data/wrappers` folder: $ python convert.py [FORMAT] -i /data/input_one.ctd /data/input_two.ctd /data/input_three.ctd -o /data/wrappers $ python convert.py [FORMAT] -i /data/*.ctd -o /data/wrappers ### Output Destination * Purpose: Provide output destination for the converted wrapper files. * Short/long version: `-o` / `--output-destination` * Required: yes. * Taken values: if a single input file is given, then a single output file is expected. If multiple input files are given, then an existent folder in which all converted CTDs will be written is expected. Examples: A single input is given, and the output will be generated into `/data/output.xml`: $ python convert.py [FORMAT] -i /data/input.ctd -o /data/output.xml Several inputs are given. The output is the already existent folder, `/data/wrappers`, and at the end of the operation, the files `/data/wrappers/input_one.[EXT]` and `/data/wrappers/input_two.[EXT]` will be generated: $ python convert.py [FORMAT] -i /data/ctds/input_one.ctd /data/ctds/input_two.ctd -o /data/stubs Please note that the output file name is **not** taken from the name of the input file, rather from the name of the tool, that is, from the `name` attribute in the `` element in its corresponding CTD. By convention, the name of the CTD file and the name of the tool match. ### Exclusion, hardcoding, and modification of Parameters * Purpose: Some parameters present in the CTD are not to be exposed on the output files (e.g. parameters such as `--help`, `--debug` that might won't make much sense to be exposed to users in a workflow management system), other parameters should be hardcoded (i.e. parameters that should not be exposed to the user but still set to a fixed value on the generated command line), and for other parameters it might be necessary to modify attributes of the input CTD or the generated output. * Short/long version: `-p` / `--hardcoded-parameters` * Required: no. * Taken values: A json file defining: exclusion, hardcoded, and modifications of parameters Example: $ pythonconvert.py [FORMAT] ... -p JSON_FILE The json defines a mapping from parameter names to a list of modifications: ``` { "parameter1": [MODIFICATION1, ...], "parameter2": [MODIFICATION1, ...] ... } ``` where each modification is a mapping as defined below. #### Hardcoding parameters If a parameter should always be set on the command line using a fixed value, i.e. the user can to choose the value, this can be done as follows: `"parameter": [{"value":"HARDCODED_VALUE"}]` #### Excluding parameters In order to exclude a parameter, it will not appear in the generated tool or the generated command line, the same syntax as for hardcoding is used but a special reserved value is used: `"parameter": [{"value": "@"}]` #### Modifying parameters It's possible to modify attributes of the input CTD definition of a parameter as well as attributes of the generated Galaxy XML tags. ``` "test": [{ "CTD:type": "text", "XML:type": "hidden" }], ``` ### Restricting modifications to a subset of the tools Its possible to specify modifications to a parameter for only a subset of the tools by specifying a list of tools as follows: ``` "output_files": [{ "CTD:required": true, "tools": ["OpenSwathDIAPreScoring"] }, { "CTD:restrictions": "txt,tsv,pep.xml,pepXML,html", "tools": ["SpectraSTSearchAdapter"] }] ``` ### Schema Validation * Purpose: Provide validation of input CTDs against a schema file (i.e, a XSD file). * Short/long version: `-V` / `--validation-schema` * Required: no. * Taken values: location of the schema file (e.g., CTD.xsd). CTDs can be validated against a schema. The master version of the schema can be found on [CTDSchema]. If a schema is provided, all input CTDs will be validated against it. **NOTE:** Please make sure to read the [section on issues with schema validation](#issues-with-libxml2-and-schema-validation) if you require validation of CTDs against a schema. ### Providing a default executable Path * Purpose: Help workflow engines locate tools by providing a path. * Short/long version: `-x` / `--default-executable-path` * Required: no. * Taken values: The default executable path of the tools in the target workflow engine. CTDs can contain an `` element that will be used when executing the tool binary. If this element is missing, the value provided by this parameter will be used as a prefix when building the appropriate sections in the output files. The following invocation of the converter will use `/opt/suite/bin` as a prefix when providing the executable path in the output files for any input CTD that lacks the `` section: $ python convert.py [FORMAT] -x /opt/suite/bin ... ### Bump wrapper versions There are two ways to bump tool versions. - Definition of a `@GALAXY_VERSION@` token in the macros file. This can be used to bump all tools at once. Tool versions will be `@TOOL_VERSION@+galaxy@GALAXY_VERSION@`. - Use the `--bump-file` parameter to specify the wrapper version of a subset of the tools in a json file that maps tool names/ids to a wrapper version. Tool version will be set to `@TOOL_VERSION@+galaxyX`, where `X` is the version found in the json file or `0` if not found. In case of an update of the tool version, i.e. `@TOOL_VERSION@`, in the first case `@GALAXY_VERSION@` should be reset to 0 and the dictionary in the bump file should be emptied otherwise. Rationale: the auto-generation of the tool xml files would overwrite the wrapper version when regenerated. Hence it needs to be specified externally, e.g. in the macros.xml or in the bump file. ### Tests Tests for Galaxy tools are generated with: ``` PATH=$(pwd)/tests/test-data/:$PATH for i in tests/test-data/*ctd do b=$(basename $i .ctd) python convert.py galaxy -i tests/test-data/$b.ctd -o tests/test-data/$b.xml -m tests/test-data/macros.xml -f tests/test-data/filetypes.txt --test-test -p tests/test-data/hardcoded_params.json --tool-version 5.0.011 done ``` [CTDopts]: https://github.com/genericworkflownodes/CTDopts [CTDSchema]: https://github.com/WorkflowConversion/CTDSchema [conda-install]: https://conda.io/docs/install/quick.html [using-conda]: https://conda.io/docs/using/envs.html CTDConverter-3.0a1/create_galaxy_tests.sh000077500000000000000000000005331403724066100204750ustar00rootroot00000000000000conda create -y --quiet\ --override-channels -c iuc -c conda-forge -c bioconda -c defaults\ --name ctdopts-1.4 ctdopts=1.4 lxml CTDConverter galaxy -i tests/test-data/*.ctd -o tests/test-data/\ -m tests/test-data/macros.xml -f tests/test-data/filetypes.txt\ --test-test -p tests/test-data/hardcoded_params.json --tool-version 3.8 CTDConverter-3.0a1/ctdconverter/000077500000000000000000000000001403724066100166055ustar00rootroot00000000000000CTDConverter-3.0a1/ctdconverter/__init__.py000066400000000000000000000001111403724066100207070ustar00rootroot00000000000000__version__ = '3.0a1' __date__ = '2021-02-15' __updated__ = '2021-02-15' CTDConverter-3.0a1/ctdconverter/common/000077500000000000000000000000001403724066100200755ustar00rootroot00000000000000CTDConverter-3.0a1/ctdconverter/common/__init__.py000066400000000000000000000000001403724066100221740ustar00rootroot00000000000000CTDConverter-3.0a1/ctdconverter/common/exceptions.py000066400000000000000000000015641403724066100226360ustar00rootroot00000000000000#!/usr/bin/env python """ @author: delagarza """ from CTDopts.CTDopts import ModelError class CLIError(Exception): # Generic exception to raise and log different fatal errors. def __init__(self, msg): super(CLIError).__init__(type(self)) self.msg = "E: %s" % msg def __str__(self): return self.msg def __unicode__(self): return self.msg class InvalidModelException(ModelError): def __init__(self, message): super().__init__() self.message = message def __str__(self): return self.message def __repr__(self): return self.message class ApplicationException(Exception): def __init__(self, msg): super(ApplicationException).__init__(type(self)) self.msg = msg def __str__(self): return self.msg def __unicode__(self): return self.msg CTDConverter-3.0a1/ctdconverter/common/logger.py000066400000000000000000000012511403724066100217250ustar00rootroot00000000000000#!/usr/bin/env python import sys MESSAGE_INDENTATION_INCREMENT = 2 def _get_indented_text(text, indentation_level): return ("%(indentation)s%(text)s" % {"indentation": " " * (MESSAGE_INDENTATION_INCREMENT * indentation_level), "text": text}) def warning(warning_text, indentation_level=0): sys.stdout.write(_get_indented_text("WARNING: %s\n" % warning_text, indentation_level)) def error(error_text, indentation_level=0): sys.stderr.write(_get_indented_text("ERROR: %s\n" % error_text, indentation_level)) def info(info_text, indentation_level=0): sys.stdout.write(_get_indented_text("INFO: %s\n" % info_text, indentation_level)) CTDConverter-3.0a1/ctdconverter/common/utils.py000066400000000000000000000432741403724066100216210ustar00rootroot00000000000000#!/usr/bin/env python import json import ntpath import operator import os from functools import reduce from CTDopts.CTDopts import ( _InFile, _OutFile, CTDModel, ModelTypeError, Parameter, ParameterGroup, Parameters ) from lxml import etree from ..common import logger from ..common.exceptions import ApplicationException MESSAGE_INDENTATION_INCREMENT = 2 # simple struct-class containing a tuple with input/output location and the in-memory CTDModel class ParsedCTD: def __init__(self, ctd_model=None, input_file=None, suggested_output_file=None): self.ctd_model = ctd_model self.input_file = input_file self.suggested_output_file = suggested_output_file class ParameterHardcoder: def __init__(self): # map whose keys are the composite names of tools and parameters in the following pattern: # [ToolName][separator][ParameterName] -> HardcodedValue # if the parameter applies to all tools, then the following pattern is used: # [ParameterName] -> HardcodedValue # examples (assuming separator is '#'): # threads -> 24 # XtandemAdapter#adapter -> xtandem.exe # adapter -> adapter.exe self.separator = "!" # hard coded values self.parameter_map = {} # ctd/xml attributes to overwrite self.attribute_map = {'CTD': {}, 'XML': {}} # blacklisted parameters self.blacklist = set() def register_blacklist(self, parameter_name, tool_name): k = self.build_key(parameter_name, tool_name) self.blacklist.add(k) # the most specific value will be returned in case of overlap def get_blacklist(self, parameter_name, tool_name): # look for the value that would apply for all tools if self.build_key(parameter_name, tool_name) in self.blacklist: return True elif parameter_name in self.blacklist: return True else: return False def register_attribute(self, parameter_name, attribute, value, tool_name): tpe, attribute = attribute.split(':') if tpe not in ['CTD', 'XML']: raise Exception('Attribute hardcoder not in CTD/XML') k = self.build_key(parameter_name, tool_name) if k not in self.attribute_map[tpe]: self.attribute_map[tpe][k] = {} self.attribute_map[tpe][k][attribute] = value # the most specific value will be returned in case of overlap def get_hardcoded_attributes(self, parameter_name, tool_name, tpe): # look for the value that would apply for all tools try: return self.attribute_map[tpe][self.build_key(parameter_name, tool_name)] except KeyError: return self.attribute_map[tpe].get(parameter_name, None) # the most specific value will be returned in case of overlap def get_hardcoded_value(self, parameter_name, tool_name): # look for the value that would apply for all tools try: return self.parameter_map[self.build_key(parameter_name, tool_name)] except KeyError: return self.parameter_map.get(parameter_name, None) def register_parameter(self, parameter_name, parameter_value, tool_name=None): self.parameter_map[self.build_key(parameter_name, tool_name)] = parameter_value def build_key(self, parameter_name, tool_name): if tool_name is None: return parameter_name return f"{parameter_name}{self.separator}{tool_name}" def validate_path_exists(path): if not os.path.exists(path) or not os.path.isfile(os.path.realpath(path)): raise ApplicationException("The provided path (%s) does not exist or is not a valid file path." % path) def validate_argument_is_directory(args, argument_name): file_name = getattr(args, argument_name) logger.info("REALPATH %s" % os.path.realpath(file_name)) if file_name is not None and os.path.isdir(os.path.realpath(file_name)): raise ApplicationException("The provided output file name (%s) points to a directory." % file_name) def validate_argument_is_valid_path(args, argument_name): paths_to_check = [] # check if we are handling a single file or a list of files member_value = getattr(args, argument_name) if member_value is not None: if isinstance(member_value, list): for file_name in member_value: paths_to_check.append(str(file_name).strip()) else: paths_to_check.append(str(member_value).strip()) for path_to_check in paths_to_check: try: validate_path_exists(path_to_check) except ApplicationException: raise ApplicationException(f"Argument {argument_name}: The provided output file name ({path_to_check}) points to a directory.") # taken from # http://stackoverflow.com/questions/8384737/python-extract-file-name-from-path-no-matter-what-the-os-path-format def get_filename(path): head, tail = ntpath.split(path) return tail or ntpath.basename(head) def get_filename_without_suffix(path): root, ext = os.path.splitext(os.path.basename(path)) return root def parse_input_ctds(xsd_location, input_ctds, output_destination, output_file_extension): is_converting_multiple_ctds = len(input_ctds) > 1 parsed_ctds = [] schema = None if xsd_location is not None: try: logger.info("Loading validation schema from %s" % xsd_location, 0) schema = etree.XMLSchema(etree.parse(xsd_location)) except Exception as e: logger.error("Could not load validation schema {}. Reason: {}".format(xsd_location, str(e)), 0) else: logger.warning("Validation against a schema has not been enabled.", 0) for input_ctd in input_ctds: if schema is not None: validate_against_schema(input_ctd, schema) output_file = output_destination # if multiple inputs are being converted, we need to generate a different output_file for each input if is_converting_multiple_ctds: output_file = os.path.join(output_file, get_filename_without_suffix(input_ctd) + "." + output_file_extension) logger.info("Parsing %s" % input_ctd) model = None try: model = CTDModel(from_file=input_ctd) except ModelTypeError: pass try: model = Parameters(from_file=input_ctd) except ModelTypeError: pass assert model is not None, "Could not parse %s, seems to be no CTD/PARAMS" % (input_ctd) parsed_ctds.append(ParsedCTD(model, input_ctd, output_file)) return parsed_ctds def flatten_list_of_lists(args, list_name): setattr(args, list_name, [item for sub_list in getattr(args, list_name) for item in sub_list]) def validate_against_schema(ctd_file, schema): try: parser = etree.XMLParser(schema=schema) etree.parse(ctd_file, parser=parser) except etree.XMLSyntaxError as e: raise ApplicationException("Invalid CTD file {}. Reason: {}".format(ctd_file, str(e))) def add_common_parameters(parser, version, last_updated): parser.add_argument("FORMAT", default=None, help="Output format (mandatory). Can be one of: cwl, galaxy.") parser.add_argument("-i", "--input", dest="input_files", default=[], required=True, nargs="+", action="append", help="List of CTD files to convert.") parser.add_argument("-o", "--output-destination", dest="output_destination", required=True, help="If multiple input files are given, then a folder in which all converted " "files will be generated is expected; " "if a single input file is given, then a destination file is expected.") parser.add_argument("-x", "--default-executable-path", dest="default_executable_path", help="Use this executable path when is not present in the CTD", default=None, required=False) parser.add_argument("-p", "--hardcoded-parameters", dest="hardcoded_parameters", default=None, required=False, help="File containing hardcoded values for the given parameters. Run with '-h' or '--help' " "to see a brief example on the format of this file.") parser.add_argument("-V", "--validation-schema", dest="xsd_location", default=None, required=False, help="Location of the schema to use to validate CTDs. If not provided, no schema validation " "will take place.") # TODO: add verbosity, maybe? program_version = "v%s" % version program_build_date = str(last_updated) program_version_message = f"%(prog)s {program_version} ({program_build_date})" parser.add_argument("-v", "--version", action="version", version=program_version_message) def parse_hardcoded_parameters(hardcoded_parameters_file): parameter_hardcoder = ParameterHardcoder() if hardcoded_parameters_file is None: return parameter_hardcoder with open(hardcoded_parameters_file) as fp: data = json.load(fp) for parameter_name in data: if parameter_name == "#": continue for el in data[parameter_name]: hardcoded_value = el.get("value", None) tool_names = el.get("tools", [None]) for tool_name in tool_names: if tool_name is not None: tool_name = tool_name.strip() # hardcoded / blacklisted: # - blacklisted: if value is @ # - hardcoded: otherwise if hardcoded_value is not None: if hardcoded_value == '@': parameter_hardcoder.register_blacklist(parameter_name, tool_name) else: parameter_hardcoder.register_parameter(parameter_name, hardcoded_value, tool_name) else: for a in el: if a in ["tools", "value"]: continue if el[a] == "output-file": el[a] = _OutFile if el[a] == "input-file": el[a] = _InFile parameter_hardcoder.register_attribute(parameter_name, a, el[a], tool_name) return parameter_hardcoder def extract_tool_help_text(ctd_model): manual = "" doc_url = None if "manual" in ctd_model.opt_attribs.keys(): manual += "%s\n\n" % ctd_model.opt_attribs["manual"] if "docurl" in ctd_model.opt_attribs.keys(): doc_url = ctd_model.opt_attribs["docurl"] help_text = "No help available" if manual is not None: help_text = manual if doc_url is not None: help_text = ("" if manual is None else manual) if doc_url != "": help_text += "\nFor more information, visit %s" % doc_url return help_text def extract_tool_executable_path(model, default_executable_path): # rules to build the executable path: # if executablePath is null, then use default_executable_path # if executablePath is null and executableName is null, then the name of the tool will be used # if executablePath is null and executableName is not null, then executableName will be used # if executablePath is not null and executableName is null, # then executablePath and the name of the tool will be used # if executablePath is not null and executableName is not null, then both will be used # first, check if the model has executablePath / executableName defined executable_path = model.opt_attribs.get("executablePath", None) executable_name = model.opt_attribs.get("executableName", None) # check if we need to use the default_executable_path if executable_path is None: executable_path = default_executable_path # fix the executablePath to make sure that there is a '/' in the end if executable_path is not None: executable_path = executable_path.strip() if not executable_path.endswith("/"): executable_path += "/" # assume that we have all information present command = str(executable_path) + str(executable_name) if executable_path is None: if executable_name is None: command = model.name else: command = executable_name else: if executable_name is None: command = executable_path + model.name return command def _extract_and_flatten_parameters(parameter_group, nodes=False): """ get the parameters of a OptionGroup as generator """ for parameter in parameter_group.values(): if type(parameter) is Parameter: yield parameter else: if nodes: yield parameter yield from _extract_and_flatten_parameters(parameter.parameters, nodes) def extract_and_flatten_parameters(ctd_model, nodes=False): """ get the parameters of a CTD as generator """ if type(ctd_model) is CTDModel: return _extract_and_flatten_parameters(ctd_model.parameters.parameters, nodes) else: return _extract_and_flatten_parameters(ctd_model.parameters, nodes) # names = [_.name for _ in ctd_model.parameters.values()] # if names == ["version", "1"]: # return _extract_and_flatten_parameters(ctd_model.parameters.parameters["1"], nodes) # else: # return _extract_and_flatten_parameters(ctd_model, nodes) # for parameter in ctd_model.parameters.parameters: # if type(parameter) is not ParameterGroup: # yield parameter # else: # for p in extract_and_flatten_parameters(parameter): # yield p # parameters = [] # if len(ctd_model.parameters.parameters) > 0: # # use this to put parameters that are to be processed # # we know that CTDModel has one parent ParameterGroup # pending = [ctd_model.parameters] # while len(pending) > 0: # # take one element from 'pending' # parameter = pending.pop() # if type(parameter) is not ParameterGroup: # parameters.append(parameter) # else: # # append the first-level children of this ParameterGroup # pending.extend(parameter.parameters.values()) # # returned the reversed list of parameters (as it is now, # # we have the last parameter in the CTD as first in the list) # return reversed(parameters) # some parameters are mapped to command line options, this method helps resolve those mappings, if any def resolve_param_mapping(param, ctd_model, fix_underscore=False): # go through all mappings and find if the given param appears as a reference name in a mapping element param_mapping = None ctd_model_cli = [] if hasattr(ctd_model, "cli"): ctd_model_cli = ctd_model.cli for cli_element in ctd_model_cli: for mapping_element in cli_element.mappings: if mapping_element.reference_name == param.name: if param_mapping is not None: logger.warning("The parameter %s has more than one mapping in the section. " "The first found mapping, %s, will be used." % (param.name, param_mapping), 1) else: param_mapping = cli_element.option_identifier if param_mapping is not None: ret = param_mapping else: ret = param.name if fix_underscore and ret.startswith("_"): return ret[1:] else: return ret def _extract_param_cli_name(param, ctd_model, fix_underscore=False): # we generate parameters with colons for subgroups, but not for the two topmost parents (OpenMS legacy) if type(param.parent) == ParameterGroup: if hasattr(ctd_model, "cli") and ctd_model.cli: logger.warning("Using nested parameter sections (NODE elements) is not compatible with ", 1) return ":".join(extract_param_path(param, fix_underscore)[:-1]) + ":" + resolve_param_mapping(param, ctd_model, fix_underscore) else: return resolve_param_mapping(param, ctd_model, fix_underscore) def extract_param_path(param, fix_underscore=False): pl = param.get_lineage(name_only=True) if fix_underscore: for i, p in enumerate(pl): if p.startswith("_"): pl[i] = pl[i][1:] return pl # if type(param.parent) == ParameterGroup or type(param.parent) == Parameters: # if not hasattr(param.parent.parent, "parent"): # return [param.name] # elif not hasattr(param.parent.parent.parent, "parent"): # return [param.name] # else: # return extract_param_path(param.parent) + [param.name] # else: # return [param.name] def extract_param_name(param, fix_underscore=False): # we generate parameters with colons for subgroups, but not for the two topmost parents (OpenMS legacy) return ":".join(extract_param_path(param, fix_underscore)) def extract_command_line_prefix(param, ctd_model): param_name = extract_param_name(param, True) param_cli_name = _extract_param_cli_name(param, ctd_model, True) if param_name == param_cli_name: # there was no mapping, so for the cli name we will use a '-' in the prefix param_cli_name = "-" + param_name return param_cli_name def indent(s, indentation=" "): """ helper function to indent text @param s the text (a string) @param indentation the desired indentation @return indented text """ return [indentation + _ for _ in s] def getFromDict(dataDict, mapList): return reduce(operator.getitem, mapList, dataDict) def setInDict(dataDict, mapList, value): getFromDict(dataDict, mapList[:-1])[mapList[-1]] = value CTDConverter-3.0a1/ctdconverter/convert.py000066400000000000000000000147341403724066100206500ustar00rootroot00000000000000import os import sys import traceback from argparse import ( ArgumentParser, RawDescriptionHelpFormatter ) from . import ( __updated__, __version__ ) from .common import utils from .common.exceptions import ( ApplicationException, ModelError ) program_version = "v%s" % __version__ program_build_date = str(__updated__) program_version_message = f'%(prog)s {program_version} ({program_build_date})' program_short_description = "CTDConverter - A project from the WorkflowConversion family " \ "(https://github.com/WorkflowConversion/CTDConverter)" program_usage = ''' USAGE: $ python convert.py [FORMAT] [ARGUMENTS ...] FORMAT can be either one of the supported output formats: cwl, galaxy. There is one converter for each supported FORMAT, each taking a different set of arguments. Please consult the detailed documentation for each of the converters. Nevertheless, all converters have the following common parameters/options: I - Parsing a single CTD file and convert it: $ python convert.py [FORMAT] -i [INPUT_FILE] -o [OUTPUT_FILE] II - Parsing several CTD files, output converted wrappers in a given folder: $ python converter.py [FORMAT] -i [INPUT_FILES] -o [OUTPUT_DIRECTORY] For more detailed help see README.md in the root folder as well as `galaxy/README.md` or `cwl/README.md`. ''' program_license = '''{short_description} Copyright 2017, WorklfowConversion Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. {usage} '''.format(short_description=program_short_description, usage=program_usage) def main(argv=None): if argv is None: argv = sys.argv else: sys.argv.extend(argv) # check that we have, at least, one argument provided # at this point we cannot parse the arguments, because each converter takes different arguments, meaning each # converter will register its own parameters after we've registered the basic ones... we have to do it old school if len(argv) < 2: utils.logger.error("Not enough arguments provided") print("\nUsage: $ CTDConverter [TARGET] [ARGUMENTS]\n\n" "Where:\n" " target: one of 'cwl' or 'galaxy'\n\n" "Run again using the -h/--help option to print more detailed help.\n") return 1 # TODO: at some point this should look like real software engineering and use a map containing converter instances # whose keys would be the name of the converter (e.g., cwl, galaxy), but for the time being, only two formats # are supported target = str.lower(argv[1]) if target == 'cwl': from .cwl import converter elif target == 'galaxy': from .galaxy import converter # elif target == '-h' or target == '--help' or target == '--h' or target == 'help': # print(program_license) # return 0 else: utils.logger.error("Unrecognized target engine. Supported targets are 'cwl' and 'galaxy'.") return 1 utils.logger.info("Using %s converter" % target) try: # Setup argument parser parser = ArgumentParser(prog="CTDConverter", description=program_license, formatter_class=RawDescriptionHelpFormatter, add_help=True) utils.add_common_parameters(parser, program_version_message, program_build_date) # add tool-specific arguments converter.add_specific_args(parser) # parse arguments and perform some basic, common validation args = parser.parse_args() validate_and_prepare_common_arguments(args) # parse the input CTD files into CTDModels parsed_ctds = utils.parse_input_ctds(args.xsd_location, args.input_files, args.output_destination, converter.get_preferred_file_extension()) # let the converter do its own thing converter.convert_models(args, parsed_ctds) return 0 except KeyboardInterrupt: print("Interrupted...") return 0 except ApplicationException as e: traceback.print_exc() utils.logger.error("CTDConverter could not complete the requested operation.", 0) utils.logger.error("Reason: " + e.msg, 0) return 1 except ModelError as e: traceback.print_exc() utils.logger.error("There seems to be a problem with one of your input CTDs.", 0) utils.logger.error("Reason: " + e.msg, 0) return 1 except Exception as e: traceback.print_exc() utils.logger.error("CTDConverter could not complete the requested operation.", 0) utils.logger.error("Reason: " + e.msg, 0) return 2 def validate_and_prepare_common_arguments(args): # flatten lists of lists to a list containing elements lists_to_flatten = ["input_files"] for list_to_flatten in lists_to_flatten: utils.flatten_list_of_lists(args, list_to_flatten) # if input is a single file, we expect output to be a file (and not a dir that already exists) if len(args.input_files) == 1: if os.path.isdir(args.output_destination): raise ApplicationException("If a single input file is provided, output (%s) is expected to be a file " "and not a folder.\n" % args.output_destination) # if input is a list of files, we expect output to be a folder if len(args.input_files) > 1: if not os.path.isdir(args.output_destination): raise ApplicationException("If several input files are provided, output (%s) is expected to be an " "existing directory.\n" % args.output_destination) # check that the provided input files, if provided, contain a valid file path input_arguments_to_check = ["xsd_location", "input_files", "hardcoded_parameters"] for argument_name in input_arguments_to_check: utils.validate_argument_is_valid_path(args, argument_name) # add the parameter hardcoder args.parameter_hardcoder = utils.parse_hardcoded_parameters(args.hardcoded_parameters) if __name__ == "__main__": sys.exit(main()) CTDConverter-3.0a1/ctdconverter/cwl/000077500000000000000000000000001403724066100173725ustar00rootroot00000000000000CTDConverter-3.0a1/ctdconverter/cwl/README.md000066400000000000000000000002631403724066100206520ustar00rootroot00000000000000# Conversion of CTD Files to CWL ## How to use: Parameters in Detail The CWL converter has, for now, only the basic parameters described in the [top README file](../README.md). CTDConverter-3.0a1/ctdconverter/cwl/__init__.py000066400000000000000000000000001403724066100214710ustar00rootroot00000000000000CTDConverter-3.0a1/ctdconverter/cwl/converter.py000077500000000000000000000162041403724066100217610ustar00rootroot00000000000000#!/usr/bin/env python # instead of using cwlgen, we decided to use PyYAML directly # we promptly found a problem with cwlgen, namely, it is not possible to construct something like: # some_paramter: # type: ['null', string] # which kind of sucks, because this seems to be the way to state that a parameter is truly optional and has no default # since cwlgen is just "fancy classes" around the yaml.dump() method, we implemented our own generation of yaml import ruamel.yaml as yaml from CTDopts.CTDopts import ( _Choices, _InFile, _Null, _OutFile ) from .. import __version__ as version from ..common import ( logger, utils ) # all cwl-related properties are defined here CWL_SHEBANG = "#!/usr/bin/env cwl-runner" CURRENT_CWL_VERSION = 'v1.0' CWL_VERSION = 'cwlVersion' CLASS = 'class' BASE_COMMAND = 'baseCommand' INPUTS = 'inputs' ID = 'id' TYPE = 'type' INPUT_BINDING = 'inputBinding' OUTPUT_BINDING = 'outputBinding' PREFIX = 'prefix' OUTPUTS = 'outputs' POSITION = 'position' VALUE_FROM = 'valueFrom' GLOB = 'glob' LABEL = 'label' DOC = 'doc' DEFAULT = 'default' # types TYPE_NULL = 'null' TYPE_BOOLEAN = 'boolean' TYPE_INT = 'int' TYPE_LONG = 'long' TYPE_FLOAT = 'float' TYPE_DOUBLE = 'double' TYPE_STRING = 'string' TYPE_FILE = 'File' TYPE_DIRECTORY = 'Directory' TYPE_TO_CWL_TYPE = {int: TYPE_INT, float: TYPE_DOUBLE, str: TYPE_STRING, bool: TYPE_BOOLEAN, _InFile: TYPE_FILE, _OutFile: TYPE_FILE, _Choices: TYPE_STRING} def add_specific_args(parser): # no specific arguments for CWL conversion, for now # however, this method has to be defined, otherwise ../convert.py won't work for CWL pass def get_preferred_file_extension(): return "cwl" def convert_models(args, parsed_ctds): """Go through each CTD model and perform the conversion.""" for parsed_ctd in parsed_ctds: model = parsed_ctd.ctd_model origin_file = parsed_ctd.input_file output_file = parsed_ctd.suggested_output_file logger.info("Converting {} (source {})".format(model.name, utils.get_filename(origin_file))) cwl_tool = convert_to_cwl(model, args) logger.info("Writing to %s" % utils.get_filename(output_file), 1) stream = open(output_file, 'w') stream.write(CWL_SHEBANG + '\n\n') stream.write(f"# This CWL file was automatically generated using CTDConverter version {version}.\n") stream.write("# Visit https://github.com/WorkflowConversion/CTDConverter for more information.\n\n") yaml.dump(cwl_tool, stream, default_flow_style=False) stream.close() # returns a dictionary def convert_to_cwl(ctd_model, args): # create cwl_tool object with the basic information base_command = utils.extract_tool_executable_path(ctd_model, args.default_executable_path) # add basic properties cwl_tool = {} cwl_tool[CWL_VERSION] = CURRENT_CWL_VERSION cwl_tool[CLASS] = 'CommandLineTool' cwl_tool[LABEL] = ctd_model.opt_attribs["description"] cwl_tool[DOC] = utils.extract_tool_help_text(ctd_model) cwl_tool[BASE_COMMAND] = base_command # TODO: test with optional output files # add inputs/outputs for param in utils.extract_and_flatten_parameters(ctd_model): param_name = utils.extract_param_name(param) cwl_fixed_param_name = fix_param_name(param_name) hardcoded_value = args.parameter_hardcoder.get_hardcoded_value(param_name, ctd_model.name) param_default = str(param.default) if param.default is not _Null and param.default is not None else None if param.type is _OutFile: create_lists_if_missing(cwl_tool, [INPUTS, OUTPUTS]) # we know the only outputs are of type _OutFile # we need an input of type string that will contain the name of the output file label = "Filename for %s output file" % param_name input_name_for_output_filename = get_input_name_for_output_filename(param) input_param = {} input_param[ID] = input_name_for_output_filename input_param[DOC] = label input_param[LABEL] = label if param_default is not None: input_param[DEFAULT] = param_default input_param[TYPE] = generate_cwl_param_type(param, TYPE_STRING) insert_input_binding(ctd_model, param, hardcoded_value, input_param) output_binding = {} output_binding[GLOB] = "$(inputs.%s)" % input_name_for_output_filename output_param = {} output_param[ID] = cwl_fixed_param_name output_param[OUTPUT_BINDING] = output_binding output_param[DOC] = param.description output_param[LABEL] = param.description output_param[TYPE] = generate_cwl_param_type(param) cwl_tool[INPUTS].append(input_param) cwl_tool[OUTPUTS].append(output_param) else: create_lists_if_missing(cwl_tool, [INPUTS]) # we know that anything that is not an _OutFile is an input input_param = {} input_param[ID] = cwl_fixed_param_name input_param[DOC] = param.description input_param[LABEL] = param.description if param_default is not None: input_param[DEFAULT] = param_default input_param[TYPE] = generate_cwl_param_type(param) insert_input_binding(ctd_model, param, hardcoded_value, input_param) cwl_tool[INPUTS].append(input_param) return cwl_tool def create_lists_if_missing(cwl_tool, keys): for key in keys: if key not in cwl_tool: cwl_tool[key] = [] def get_input_name_for_output_filename(param): assert param.type is _OutFile, "Only output files can get a generated filename input parameter." return fix_param_name(utils.extract_param_name(param)) + "_filename" def fix_param_name(param_name): # IMPORTANT: there seems to be a problem in CWL if the prefix and the parameter name are the same, so we need to # prepend something to the parameter name that will be registered in CWL, also, using colons in parameter # names seems to bring all sorts of problems for cwl-runner return 'param_' + param_name.replace(":", "_") # in order to provide "true" optional params, the parameter type should be something like ['null', ], # for instance ['null', int] def generate_cwl_param_type(param, forced_type=None): cwl_type = TYPE_TO_CWL_TYPE[param.type] if forced_type is None else forced_type return cwl_type if param.required else ['null', cwl_type] # generate, and insert, the inputBinding def insert_input_binding(ctd_model, param, hardcoded_value, cwl_input_param): prefix = utils.extract_command_line_prefix(param, ctd_model) prefix = None if prefix is None or not prefix.strip() else prefix input_binding = {} if prefix is not None: input_binding[PREFIX] = prefix if hardcoded_value is not None: input_binding[VALUE_FROM] = hardcoded_value if param.is_positional(): input_binding[POSITION] = param.position # insert input binding if there's something in it if input_binding: cwl_input_param[INPUT_BINDING] = input_binding CTDConverter-3.0a1/ctdconverter/galaxy/000077500000000000000000000000001403724066100200725ustar00rootroot00000000000000CTDConverter-3.0a1/ctdconverter/galaxy/README.md000066400000000000000000000170361403724066100213600ustar00rootroot00000000000000# Conversion of CTD Files to Galaxy ToolConfigs ## Generating a `tool_conf.xml` File * Purpose: Galaxy uses a file `tool_conf.xml` in which other tools can be included. `CTDConverter` can also generate this file. Categories will be extracted from the provided input CTDs and for each category, a different `
` will be generated. Any input CTD lacking a category will be sorted under the provided default category. * Short/long version: `-t` / `--tool-conf-destination` * Required: no. * Taken values: The destination of the file. $ python convert.py galaxy -i /data/ctds/*.ctd -o /data/generated-galaxy-stubs -t /data/generated-galaxy-stubs/tool_conf.xml ## Adding Parameters to the Command-line * Purpose: Galaxy *ToolConfig* files include a `` element in which the command line to invoke the tool can be given. Sometimes it is needed to invoke your tools in a certain way (i.e., passing certain parameters). For instance, some tools offer the possibility to be invoked in a verbose or quiet way or even to be invoked in a headless way (i.e., without GUI). * Short/long version: `-a` / `--add-to-command-line` * Required: no. * Taken values: The command(s) to be added to the command line. Example: $ python convert.py galaxy ... -a "--quiet --no-gui" Will generate the following `` element in the generated Galaxy *ToolConfig*: TOOL_NAME --quiet --no-gui ... ## Providing a default Category * Purpose: Input CTDs that lack a category will be sorted under the value given to this parameter. If this parameter is not provided, then the category `DEFAULT` will be used. * Short/long version: `-c` / `--default-category` * Required: no. * Taken values: The value for the default category to use for input CTDs lacking a category. Example: Suppose there is a folder containing several CTD files. Some of those CTDs don't have the optional attribute `category` and the rest belong to the `Data Processing` category. The following invocation: $ python convert.py galaxy ... -c Other will generate, for each of the categories, a different section. Additionally, CTDs lacking a category will be sorted under the given category, `Other`, as shown:
...
...
## Providing a Path for the Location of the *ToolConfig* Files * Purpose: The `tool_conf.xml` file contains references to files which in turn contain Galaxy *ToolConfig* files. Using this parameter, you can provide information about the location of your wrappers on your Galaxy instance. * Short/long version: `-g` / `--galaxy-tool-path` * Required: no. * Taken values: The path relative to your `$GALAXY_ROOT/tools` folder on which your tools are located. Example: $ python convert.py galaxy ... -g my_tools_folder Will generate `` elements in the generated `tool_conf.xml` as follows: In this example, `tool_conf.xml` refers to a file located on `$GALAXY_ROOT/tools/my_tools_folder/some_tool.xml`. ## Including additional Macros Files * Purpose: Include external macros files. * Short/long version: `-m` / `--macros` * Required: no. * Default: `macros.xml` * Taken values: List of paths of macros files to include. *ToolConfig* supports elaborate sections such as ``, ``, etc., that are identical across tools of the same suite. Macros files assist in the task of including external xml sections into *ToolConfig* files. For more information about the syntax of macros files, see: https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#Reusing_Repeated_Configuration_Elements There are some macros that are required, namely `stdio`, `requirements` and `advanced_options`. A template macro file is included in [macros.xml]. It can be edited to suit your needs and you could add extra macros or leave it as it is and include additional files. Every macro found in the provided files will be expanded. Please note that the used macros files **must** be copied to your Galaxy installation on the same location in which you place the generated *ToolConfig* files, otherwise Galaxy will not be able to parse the generated *ToolConfig* files! ## Including additional Test Macros files * Purpose: Include macros containing tests * `--test-macros` and `--test-macros-prefix` * Required: no. * Taken values: List of paths and corresponding prefixes This allows to specify macro file(s) containing tests. The macros should be named `` where `PREFIX` is the value specified with `--test-macros-prefix`. ## Generating a `datatypes_conf.xml` File * Purpose: Specify the destination of a generated `datatypes_conf.xml` file. * Short/long version: `-d` / `--datatypes-destination` * Required: no. * Taken values: The path in which `datatypes_conf.xml` will be generated. It is likely that your tools use file formats or mimetypes that have not been registered in Galaxy. The generator allows you to specify a path in which an automatically generated `datatypes_conf.xml` file will be created. Consult the next section to get information about how to register file formats and mimetypes. ## Providing Galaxy File Formats * Purpose: Register new file formats and mimetypes. * Short/long version: `-f` / `--formats-file` * Required: no. * Taken values: The path of a file describing formats. Galaxy supports the concept of file format in order to connect compatible ports, that is, input ports of a certain data format will be able to receive data from a port from the same format. This converter allows you to provide a personalized file in which you can relate the CTD data formats with supported Galaxy data formats. The format file is a simple text file, each line containing several columns separated by whitespace. The content of each column is as follows: * 1st column: file extension, this column is required. * 2nd column: data type, as listed in Galaxy, this column is optional. * 3rd column: full-named Galaxy data type, as it will appear on datatypes_conf.xml; this column is required if the second column is included. * 4th column: mimetype, this column is optional. The following is an example of a valid "file formats" file: # CTD type # Galaxy type # Long Galaxy data type # Mimetype csv tabular galaxy.datatypes.data:Text fasta ini txt galaxy.datatypes.data:Text txt idxml txt galaxy.datatypes.xml:GenericXml application/xml options txt galaxy.datatypes.data:Text grid grid galaxy.datatypes.data:Grid Note that each line consists of either one, three or four columns. In the case of data types already registered in Galaxy (such as `fasta` and `txt` in the above example), only the first column is needed. In the case of data types that haven't been yet registered in Galaxy, the first three columns are needed (mimetype is optional). For information about Galaxy data types and subclasses, consult the following page: https://wiki.galaxyproject.org/Admin/Datatypes/Adding%20Datatypes [CTDopts]: https://github.com/genericworkflownodes/CTDopts [macros.xml]: https://github.com/WorkflowConversion/CTDConverter/blob/master/galaxy/macros.xml [CTDSchema]: https://github.com/genericworkflownodes/CTDSchemaCTDConverter-3.0a1/ctdconverter/galaxy/__init__.py000066400000000000000000000000001403724066100221710ustar00rootroot00000000000000CTDConverter-3.0a1/ctdconverter/galaxy/converter.py000077500000000000000000002716211403724066100224670ustar00rootroot00000000000000#!/usr/bin/env python import copy import json import os import os.path import re import sys from collections import OrderedDict from CTDopts.CTDopts import ( _Choices, _FileFormat, _InFile, _Null, _NumericRange, _OutFile, _OutPrefix, ModelError, ParameterGroup ) from lxml import etree from lxml.etree import ( CDATA, Element, ElementTree, parse, ParseError, strip_elements, SubElement ) from ..common import ( logger, utils ) from ..common.exceptions import ( ApplicationException, InvalidModelException ) # mapping to CTD types to Galaxy types TYPE_TO_GALAXY_TYPE = {int: 'integer', float: 'float', str: 'text', bool: 'boolean', _InFile: 'txt', _OutFile: 'txt', _Choices: 'select', _OutPrefix: 'output-prefix'} GALAXY_TYPE_TO_TYPE = dict() for k in TYPE_TO_GALAXY_TYPE: GALAXY_TYPE_TO_TYPE[TYPE_TO_GALAXY_TYPE[k]] = k STDIO_MACRO_NAME = "stdio" REQUIREMENTS_MACRO_NAME = "requirements" ADVANCED_OPTIONS_NAME = "adv_opts_" REQUIRED_MACROS = [REQUIREMENTS_MACRO_NAME, STDIO_MACRO_NAME, ADVANCED_OPTIONS_NAME + "macro"] class ExitCode: def __init__(self, code_range="", level="", description=None): self.range = code_range self.level = level self.description = description class DataType: def __init__(self, extension, galaxy_extension, composite=None): self.extension = extension self.galaxy_extension = galaxy_extension self.composite = composite def add_specific_args(parser): """ add command line arguments specific for galaxy tool generation @param parser an instance of ArgumentParser """ parser.add_argument("-f", "--formats-file", dest="formats_file", help="File containing the supported file formats. Run with '-h' or '--help' to see a " "brief example on the layout of this file.", default=None, required=False) parser.add_argument("-a", "--add-to-command-line", dest="add_to_command_line", help="Adds content to the command line", default="", required=False) parser.add_argument("-d", "--datatypes-destination", dest="data_types_destination", help="Specify the location of a datatypes_conf.xml to modify and add the registered " "data types. If the provided destination does not exist, a new file will be created.", default=None, required=False) parser.add_argument("-c", "--default-category", dest="default_category", default="DEFAULT", required=False, help="Default category to use for tools lacking a category when generating tool_conf.xml") parser.add_argument("-t", "--tool-conf-destination", dest="tool_conf_destination", default=None, required=False, help="Specify the location of an existing tool_conf.xml that will be modified to include " "the converted tools. If the provided destination does not exist, a new file will" "be created.") parser.add_argument("-g", "--galaxy-tool-path", dest="galaxy_tool_path", default=None, required=False, help="The path that will be prepended to the file names when generating tool_conf.xml") parser.add_argument("-r", "--required-tools", dest="required_tools_file", default=None, required=False, help="Each line of the file will be interpreted as a tool name that needs translation. " "Run with '-h' or '--help' to see a brief example on the format of this file.") parser.add_argument("-s", "--skip-tools", dest="skip_tools_file", default=None, required=False, help="File containing a list of tools for which a Galaxy stub will not be generated. " "Run with '-h' or '--help' to see a brief example on the format of this file.") parser.add_argument("-m", "--macros", dest="macros_files", default=[], nargs="*", action="append", required=None, help="Import the additional given file(s) as macros. " "The macros stdio, requirements and advanced_options are " "required. Please see galaxy/macros.xml for an example of a " "valid macros file. All defined macros will be imported.") parser.add_argument("--test-macros", dest="test_macros_files", default=[], nargs="*", action="append", required=None, help="Import tests from the files given file(s) as macros. " "The macro names must end with the id of the tools") parser.add_argument("--test-macros-prefix", dest="test_macros_prefix", default=[], nargs="*", action="append", required=None, help="The prefix of the macro name in the corresponding trest macros file") parser.add_argument("--test-test", dest="test_test", action='store_true', default=False, required=False, help="Generate a simple test for the internal unit tests.") parser.add_argument("--test-only", dest="test_only", action='store_true', default=False, required=False, help="Generate only the test section.") parser.add_argument("--test-unsniffable", dest="test_unsniffable", nargs="+", default=[], required=False, help="File extensions that can't be sniffed in Galaxy." "Needs to be the OpenMS extensions (1st column in --formats-file)." "For testdata with such extensions ftype will be set in the tes according to the file extension") parser.add_argument("--tool-version", dest="tool_version", required=False, default=None, help="Tool version to use (if not given its extracted from the CTD)") parser.add_argument("--tool-profile", dest="tool_profile", required=False, default=None, help="Tool profile version to use (if not given its not set)") parser.add_argument("--bump-file", dest="bump_file", required=False, default=None, help="json file defining tool versions." "tools not listed in the file default to 0." "if not given @GALAXY_VERSION@ is used") def modify_param_for_galaxy(param): """ some parameters need galaxy specific modifications """ if param.type is _InFile: # if file default is given (happens for external applications and # files for which the default is taken from share/OpenMS) set the # parm to not required and remove the default (external applications # need to be taken care by hardcoded values and the other cases # are chosen automatically if not specified on the command line) if param.required and not (param.default is None or type(param.default) is _Null): logger.warning(f"Data parameter {param.name} with default ({param.default})", 1) param.required = False param.default = _Null() return param def convert_models(args, parsed_ctds): """ main conversion function @param args command line arguments @param parsed_ctds the ctds """ # validate and prepare the passed arguments validate_and_prepare_args(args, parsed_ctds[0].ctd_model) # parse the given supported file-formats file supported_file_formats = parse_file_formats(args.formats_file) # extract the names of the macros and check that we have found the ones we need macros_to_expand = parse_macros_files(args.macros_files, tool_version=args.tool_version, supported_file_types=supported_file_formats, required_macros=REQUIRED_MACROS, dont_expand=[ADVANCED_OPTIONS_NAME + "macro", "references", "list_string_val", "list_string_san", "list_float_valsan", "list_integer_valsan"]) bump = parse_bump_file(args.bump_file) check_test_macros(args.test_macros_files, args.test_macros_prefix, parsed_ctds) # parse the skip/required tools files skip_tools = parse_tools_list_file(args.skip_tools_file) required_tools = parse_tools_list_file(args.required_tools_file) _convert_internal(parsed_ctds, supported_file_formats=supported_file_formats, default_executable_path=args.default_executable_path, add_to_command_line=args.add_to_command_line, required_tools=required_tools, skip_tools=skip_tools, macros_file_names=args.macros_files, macros_to_expand=macros_to_expand, parameter_hardcoder=args.parameter_hardcoder, test_test=args.test_test, test_only=args.test_only, test_unsniffable=args.test_unsniffable, test_macros_file_names=args.test_macros_files, test_macros_prefix=args.test_macros_prefix, tool_version=args.tool_version, tool_profile=args.tool_profile, bump=bump) def parse_bump_file(bump_file): if bump_file is None: return None with open(bump_file) as fp: return json.load(fp) def parse_tools_list_file(tools_list_file): """ """ tools_list = None if tools_list_file is not None: tools_list = [] with open(tools_list_file) as f: for line in f: if line is None or not line.strip() or line.strip().startswith("#"): continue else: tools_list.append(line.strip()) return tools_list def parse_macros_files(macros_file_names, tool_version, supported_file_types, required_macros=[], dont_expand=[]): """ """ macros_to_expand = [] for macros_file_name in macros_file_names: try: macros_file = open(macros_file_name) logger.info("Loading macros from %s" % macros_file_name, 0) root = parse(macros_file).getroot() for xml_element in root.findall("xml"): name = xml_element.attrib["name"] if name in macros_to_expand: logger.warning("Macro %s has already been found. Duplicate found in file %s." % (name, macros_file_name), 0) continue logger.info("Macro %s found" % name, 1) macros_to_expand.append(name) except ParseError as e: raise ApplicationException("The macros file " + macros_file_name + " could not be parsed. Cause: " + str(e)) except OSError as e: raise ApplicationException("The macros file " + macros_file_name + " could not be opened. Cause: " + str(e)) else: macros_file.close() tool_ver_tk = root.find("token[@name='@TOOL_VERSION@']") galaxy_ver_tk = root.find("token[@name='@GALAXY_VERSION@']") if tool_ver_tk is None: tool_ver_tk = add_child_node(root, "token", OrderedDict([("name", "@TOOL_VERSION@")])) tool_ver_tk.text = tool_version if galaxy_ver_tk is not None: if tool_version == tool_ver_tk.text: galaxy_ver_tk.text = str(int(galaxy_ver_tk.text)) else: tool_ver_tk.text = tool_version galaxy_ver_tk.text = "0" ext_foo = root.find("token[@name='@EXT_FOO@']") if ext_foo is None: ext_foo = add_child_node(root, "token", OrderedDict([("name", "@EXT_FOO@")])) g2o, o2g = get_fileformat_maps(supported_file_types) # make sure that the backup data type is in the map if 'txt' not in g2o: g2o['txt'] = 'txt' ext_foo.text = CDATA("""#def oms2gxyext(o) #set m={} #return m[o] #end def #def gxy2omsext(g) #set m={} #return m[g] #end def """.format(str(o2g), str(g2o))) tree = ElementTree(root) tree.write(macros_file_name, encoding="UTF-8", xml_declaration=True, pretty_print=True) # with open(macros_file_name, "w") as macros_file: # tree = ElementTree(root) # tree.write(macros_file, encoding="UTF-8", xml_declaration=True, pretty_print=True) # we depend on "stdio", "requirements" and "advanced_options" to exist on all the given macros files missing_needed_macros = [] for required_macro in required_macros: if required_macro not in macros_to_expand: missing_needed_macros.append(required_macro) if missing_needed_macros: raise ApplicationException( "The following required macro(s) were not found in any of the given macros files: %s, " "see galaxy/macros.xml for an example of a valid macros file." % ", ".join(missing_needed_macros)) # remove macros that should not be expanded for m in dont_expand: try: idx = macros_to_expand.index(m) del macros_to_expand[idx] except ValueError: pass return macros_to_expand def check_test_macros(test_macros_files, test_macros_prefix, parsed_ctds): tool_ids = set() for parsed_ctd in parsed_ctds: model = parsed_ctd.ctd_model tool_ids.add(model.name.replace(" ", "_")) for mf, mp in zip(test_macros_files, test_macros_prefix): macro_ids = set() try: with open(mf) as macros_file: root = parse(macros_file).getroot() for xml_element in root.findall("xml"): name = xml_element.attrib["name"] if not name.startswith(mp): logger.warning("Testmacro with invalid prefix %s." % (mp), 0) continue name = name[len(mp):] macro_ids.add(name) except ParseError as e: raise ApplicationException("The macros file " + mf + " could not be parsed. Cause: " + str(e)) except OSError as e: raise ApplicationException("The macros file " + mf + " could not be opened. Cause: " + str(e)) for t in tool_ids - macro_ids: logger.error("missing %s" % t) add_child_node(root, "xml", OrderedDict([("name", mp + t)])) if len(macro_ids - tool_ids): logger.warning("Unnecessary macros in {}: {}".format(mf, macro_ids - tool_ids)) tree = ElementTree(root) tree.write(mf, encoding="UTF-8", xml_declaration=True, pretty_print=True) def parse_file_formats(formats_file): """ """ supported_formats = [] if formats_file is not None: line_number = 0 with open(formats_file) as f: for line in f: line_number += 1 if line is None or not line.strip() or line.strip().startswith("#"): # ignore (it'd be weird to have something like: # if line is not None and not (not line.strip()) ... continue parsed_formats = line.strip().split() # valid lines contain either one or two columns if len(parsed_formats) == 1: supported_formats.append(DataType(parsed_formats[0], parsed_formats[0])) elif len(parsed_formats) == 2: supported_formats.append(DataType(parsed_formats[0], parsed_formats[1])) elif len(parsed_formats) == 3: composite = [tuple(x.split(":")) for x in parsed_formats[2].split(",")] supported_formats.append(DataType(parsed_formats[0], parsed_formats[1], composite)) else: logger.warning("Invalid line at line number %d of the given formats file. Line will be ignored:\n%s" % (line_number, line), 0) return supported_formats def get_fileformat_maps(supported_formats): """ convenience functions to compute dictionaries mapping Galaxy data types <-> CTD formats """ o2g = {} g2o = {} for s in supported_formats: if s.extension not in o2g: o2g[s.extension] = s.galaxy_extension if s.galaxy_extension not in g2o: g2o[s.galaxy_extension] = s.extension return g2o, o2g def validate_and_prepare_args(args, model): """ check command line arguments @param args command line arguments @return None """ # check that only one of skip_tools_file and required_tools_file has been provided if args.skip_tools_file is not None and args.required_tools_file is not None: raise ApplicationException( "You have provided both a file with tools to ignore and a file with required tools.\n" "Only one of -s/--skip-tools, -r/--required-tools can be provided.") # flatten macros_files to make sure that we have a list containing file names and not a list of lists utils.flatten_list_of_lists(args, "macros_files") utils.flatten_list_of_lists(args, "test_macros_files") utils.flatten_list_of_lists(args, "test_macros_prefix") # check that the arguments point to a valid, existing path input_variables_to_check = ["skip_tools_file", "required_tools_file", "macros_files", "formats_file"] for variable_name in input_variables_to_check: utils.validate_argument_is_valid_path(args, variable_name) # check that the provided output files, if provided, contain a valid file path (i.e., not a folder) output_variables_to_check = ["data_types_destination", "tool_conf_destination"] for variable_name in output_variables_to_check: file_name = getattr(args, variable_name) if file_name is not None and os.path.isdir(file_name): raise ApplicationException("The provided output file name (%s) points to a directory." % file_name) if not args.macros_files: # list is empty, provide the default value logger.warning("Using default macros from galaxy/macros.xml", 0) args.macros_files = [os.path.dirname(os.path.abspath(__file__)) + "/macros.xml"] if args.tool_version is None: args.tool_version = model.version def get_preferred_file_extension(): """ get the file extension for the output files @return "xml" """ return "xml" def _convert_internal(parsed_ctds, **kwargs): """ parse all input files into models using CTDopts (via utils) @param parsed_ctds the ctds @param kwargs skip_tools, required_tools, and additional parameters for expand_macros, create_command, create_inputs, create_outputs @return a tuple containing the model, output destination, origin file """ parameter_hardcoder = kwargs["parameter_hardcoder"] for parsed_ctd in parsed_ctds: model = parsed_ctd.ctd_model if kwargs["skip_tools"] is not None and model.name in kwargs["skip_tools"]: logger.info("Skipping tool %s" % model.name, 0) continue elif kwargs["required_tools"] is not None and model.name not in kwargs["required_tools"]: logger.info("Tool %s is not required, skipping it" % model.name, 0) continue origin_file = parsed_ctd.input_file output_file = parsed_ctd.suggested_output_file # overwrite attributes of the parsed ctd parameters as specified in hardcoded parameterd json for param in utils.extract_and_flatten_parameters(model): hardcoded_attributes = parameter_hardcoder.get_hardcoded_attributes(utils.extract_param_name(param), model.name, 'CTD') if hardcoded_attributes is not None: for a in hardcoded_attributes: if not hasattr(param, a): continue if a == "type": try: t = GALAXY_TYPE_TO_TYPE[hardcoded_attributes[a]] except KeyError: logger.error("Could not set hardcoded attribute {}={} for {}".format(a, hardcoded_attributes[a], param.name)) sys.exit(1) setattr(param, a, t) elif type(getattr(param, a)) is _FileFormat or (param.type in [_InFile, _OutFile, _OutPrefix] and a == "restrictions"): setattr(param, a, _FileFormat(str(hardcoded_attributes[a]))) elif type(getattr(param, a)) is _Choices: setattr(param, a, _Choices(str(hardcoded_attributes[a]))) elif type(getattr(param, a)) is _NumericRange: raise Exception("Overwriting of Numeric Range not implemented") else: setattr(param, a, hardcoded_attributes[a]) if "test_only" in kwargs and kwargs["test_only"]: test = create_test_only(parsed_ctd.ctd_model, **kwargs) tree = ElementTree(test) output_file = parsed_ctd.suggested_output_file logger.info("Writing to %s" % utils.get_filename(output_file), 1) tree.write(output_file, encoding="UTF-8", xml_declaration=False, pretty_print=True) continue logger.info("Converting {} (source {})".format(model.name, utils.get_filename(origin_file)), 0) tool = create_tool(model, kwargs.get("tool_profile", None), kwargs.get("bump", None)) write_header(tool, model) create_description(tool, model) import_macros(tool, model, **kwargs) expand_macros(tool, kwargs["macros_to_expand"]) # command, inputs, outputs = create_cio(tool, model, **kwargs) create_command(tool, model, **kwargs) create_configfiles(tool, model, **kwargs) inputs = create_inputs(tool, model, **kwargs) outputs = create_outputs(tool, model, **kwargs) if kwargs["test_test"]: create_tests(tool, inputs=copy.deepcopy(inputs), outputs=copy.deepcopy(outputs)) if kwargs["test_macros_prefix"]: create_tests(tool, test_macros_prefix=kwargs['test_macros_prefix'], name=model.name) create_help(tool, model) # citations are required to be at the end expand_macro(tool, "references") # wrap our tool element into a tree to be able to serialize it tree = ElementTree(tool) logger.info("Writing to %s" % utils.get_filename(output_file), 1) tree.write(output_file, encoding="UTF-8", xml_declaration=True, pretty_print=True) def write_header(tool, model): """ add comments to the tool header @param tool the tool xml @param model the ctd model """ tool.addprevious(etree.Comment( "This is a configuration file for the integration of a tools into Galaxy (https://galaxyproject.org/). " "This file was automatically generated using CTDConverter.")) tool.addprevious(etree.Comment('Proposed Tool Section: [%s]' % model.opt_attribs.get("category", ""))) def create_tool(model, profile, bump): """ initialize the tool @param model the ctd model """ tool_id = model.name.replace(" ", "_") if bump is None: gxy_version = "@GALAXY_VERSION@" elif model.name in bump: gxy_version = str(bump[model.name]) elif tool_id in bump: gxy_version = str(bump[tool_id]) else: gxy_version = "@GALAXY_VERSION@" attrib = OrderedDict([("id", tool_id), ("name", model.name), ("version", "@TOOL_VERSION@+galaxy" + gxy_version)]) if profile is not None: attrib["profile"] = profile return Element("tool", attrib) def create_description(tool, model): """ add description to the tool @param tool the Galaxy tool @param model the ctd model """ if "description" in model.opt_attribs.keys() and model.opt_attribs["description"] is not None: description = SubElement(tool, "description") description.text = model.opt_attribs["description"] def create_configfiles(tool, model, **kwargs): """ create - - The former will create a json file containing the tool parameter values that can be accessed in cheetah with $args_json. Note that data_style="paths" (i.e. input data sets are included in the json) is set even if input files are given on the CLI. Reason is that in this way default values in the CTD can be restored for optional input files. The latter will contain hardcoded parameters. """ configfiles_node = add_child_node(tool, "configfiles") add_child_node(configfiles_node, "inputs", OrderedDict([("name", "args_json"), ("data_style", "paths")])) parameter_hardcoder = kwargs.get("parameter_hardcoder") hc_dict = dict() for param in utils.extract_and_flatten_parameters(model): hardcoded_value = parameter_hardcoder.get_hardcoded_value(utils.extract_param_name(param), model.name) if hardcoded_value is None: continue path = utils.extract_param_path(param) for i, v in enumerate(path[:-1]): try: utils.getFromDict(hc_dict, path[:i + 1]) except KeyError: utils.setInDict(hc_dict, path[:i + 1], {}) utils.setInDict(hc_dict, path, hardcoded_value) hc_node = add_child_node(configfiles_node, "configfile", OrderedDict([("name", "hardcoded_json")])) hc_node.text = CDATA(json.dumps(hc_dict).replace('$', r'\$')) # print(json.dumps(hc_dict)) def create_command(tool, model, **kwargs): """ @param tool the Galaxy tool @param model the ctd model @param kwargs """ # main command final_cmd = OrderedDict([('preprocessing', []), ('command', []), ('postprocessing', [])]) advanced_cmd = {'preprocessing': [], 'command': [], 'postprocessing': []} final_cmd['preprocessing'].extend(["@QUOTE_FOO@", "@EXT_FOO@", "#import re", "", "## Preprocessing"]) # - call the executable with -write_ctd to write the ctd file (with defaults) # - use fill_ctd.py to overwrite the defaults in the ctd file with the # Galaxy parameters in the JSON file (from inputs config file) # - feed the ctd file to the executable (with -ini) # note: input and output file parameters are still given on the command line # - output file parameters are not included in the JSON file # - input and output files are accessed through links / files that have the correct extension final_cmd['command'].extend(["", "## Main program call"]) final_cmd['command'].append(""" set -o pipefail && @EXECUTABLE@ -write_ctd ./ && python3 '$__tool_directory__/fill_ctd.py' '@EXECUTABLE@.ctd' '$args_json' '$hardcoded_json' && @EXECUTABLE@ -ini @EXECUTABLE@.ctd""") final_cmd['command'].extend(kwargs["add_to_command_line"]) final_cmd['postprocessing'].extend(["", "## Postprocessing"]) advanced_command_start = "#if ${aon}cond.{aon}selector=='advanced':".format(aon=ADVANCED_OPTIONS_NAME) advanced_command_end = "#end if" parameter_hardcoder = kwargs["parameter_hardcoder"] supported_file_formats = kwargs["supported_file_formats"] g2o, o2g = get_fileformat_maps(supported_file_formats) for param in utils.extract_and_flatten_parameters(model): param = modify_param_for_galaxy(param) param_cmd = {'preprocessing': [], 'command': [], 'postprocessing': []} command_line_prefix = utils.extract_command_line_prefix(param, model) # TODO use utils.extract_param_name(param).replace(":", "_")? Then hardcoding ctd variables (with :) and tool variables (with _) can be distinguished if parameter_hardcoder.get_blacklist(utils.extract_param_name(param), model.name): continue hardcoded_value = parameter_hardcoder.get_hardcoded_value(utils.extract_param_name(param), model.name) if hardcoded_value is not None: pass # TODO hardcoded values should go to # param_cmd['command'].append("%s %s" % (command_line_prefix, hardcoded_value)) else: # in the else branch the parameter is neither blacklisted nor hardcoded... _actual_parameter = get_galaxy_parameter_path(param) actual_parameter = get_galaxy_parameter_path(param, fix_underscore=True) # all but bool params need the command line argument (bools have it already in the true/false value) if param.type is _OutFile or param.type is _OutPrefix or param.type is _InFile: param_cmd['command'].append(command_line_prefix) # preprocessing for file inputs: # - create a dir with name param.name # - create a link to id.ext in this directory # rationale: in the autogenerated tests the same file was used as input to multiple parameters # this leads to conflicts while linking... might also be better in general if param.type is _InFile: param_cmd['preprocessing'].append("mkdir %s &&" % actual_parameter) if param.is_list: param_cmd['preprocessing'].append("mkdir ${' '.join([\"'" + actual_parameter + "/%s'\" % (i) for i, f in enumerate($" + _actual_parameter + ") if f])} && ") param_cmd['preprocessing'].append("${' '.join([\"ln -s '%s' '" + actual_parameter + "/%s/%s.%s' && \" % (f, i, re.sub('[^\\w\\-_]', '_', f.element_identifier), $gxy2omsext(f.ext)) for i, f in enumerate($" + _actual_parameter + ") if f])}") param_cmd['command'].append("${' '.join([\"'" + actual_parameter + "/%s/%s.%s'\"%(i, re.sub('[^\\w\\-_]', '_', f.element_identifier), $gxy2omsext(f.ext)) for i, f in enumerate($" + _actual_parameter + ") if f])}") else: param_cmd['preprocessing'].append("ln -s '$" + _actual_parameter + "' '" + actual_parameter + "/${re.sub(\"[^\\w\\-_]\", \"_\", $" + _actual_parameter + ".element_identifier)}.$gxy2omsext($" + _actual_parameter + ".ext)' &&") param_cmd['command'].append("'" + actual_parameter + "/${re.sub(\"[^\\w\\-_]\", \"_\", $" + _actual_parameter + ".element_identifier)}.$gxy2omsext($" + _actual_parameter + ".ext)'") elif param.type is _OutPrefix: param_cmd['preprocessing'].append("mkdir %s &&" % actual_parameter) param_cmd['command'].append(actual_parameter + "/") elif param.type is _OutFile: _actual_parameter = get_galaxy_parameter_path(param, separator="_") actual_parameter = get_galaxy_parameter_path(param, separator="_", fix_underscore=True) # check if there is a parameter that sets the format # if so we add an extension to the generated files which will be used to # determine the format in the output tag # in all other cases (corresponding input / there is only one allowed format) # the format will be set in the output tag formats = get_galaxy_formats(param, model, o2g, TYPE_TO_GALAXY_TYPE[param.type]) type_param = get_out_type_param(param, model, parameter_hardcoder) corresponding_input, fmt_from_corresponding = get_corresponding_input(param, model) # print("ci %s ffc %s" % (corresponding_input.name, fmt_from_corresponding)) # print("formats %s" % (formats)) if corresponding_input is not None: actual_input_parameter = get_galaxy_parameter_path(corresponding_input) else: actual_input_parameter = None # print(len(formats) > 1, (corresponding_input is None or not # fmt_from_corresponding)) if type_param is not None: type_param_name = get_galaxy_parameter_path(type_param) elif len(formats) > 1 and (corresponding_input is None or not fmt_from_corresponding): # and not param.is_list: type_param_name = get_galaxy_parameter_path(param, suffix="type") else: type_param_name = None # print("tp %s" % type_param_name) param_cmd['preprocessing'].append("mkdir " + actual_parameter + " &&") # if there is only one format (the outoput node sets format using the format attribute of the data/discover node) # - single file: write to temp file with oms extension and move this to the actual result file # - lists: write to files with the oms extension and remove the extension afterwards (discovery with __name__) if len(formats) == 1: fmt = formats.pop() if param.is_list: logger.info(f"1 fmt + list {param.name} -> {actual_input_parameter}", 1) param_cmd['preprocessing'].append("mkdir ${' '.join([\"'" + actual_parameter + "/%s'\" % (i) for i, f in enumerate($" + actual_input_parameter + ") if f])} && ") param_cmd['command'].append("${' '.join([\"'" + actual_parameter + "/%s/%s.%s'\"%(i, re.sub('[^\\w\\-_]', '_', f.element_identifier), $gxy2omsext(\"" + fmt + "\")) for i, f in enumerate($" + actual_input_parameter + ") if f])}") param_cmd['postprocessing'].append("${' '.join([\"&& mv -n '" + actual_parameter + "/%(bn)s/%(id)s.%(gext)s' '" + _actual_parameter + "/%(bn)s/%(id)s'\"%{\"bn\": i, \"id\": re.sub('[^\\w\\-_]', '_', f.element_identifier), \"gext\": $gxy2omsext(\"" + fmt + "\")} for i, f in enumerate($" + actual_input_parameter + ") if f])}") else: logger.info("1 fmt + dataset %s" % param.name, 1) param_cmd['command'].append("'" + actual_parameter + "/output.${gxy2omsext(\"" + fmt + "\")}'") param_cmd['postprocessing'].append("&& mv '" + actual_parameter + "/output.${gxy2omsext(\"" + fmt + "\")}' '$" + _actual_parameter + "'") # if there is a type parameter then we use the type selected by the user # - single: write to temp file with the oms extension and mv it to the actual file output which is treated via change_format # - list: let the command create output files with the oms extensions, postprocessing renames them to the galaxy extensions, output is then discover + __name_and_ext__ elif type_param_name is not None: if param.is_list: logger.info("type + list %s" % param.name, 1) param_cmd['preprocessing'].append("mkdir ${' '.join([\"'" + actual_parameter + "/%s'\" % (i) for i, f in enumerate($" + actual_input_parameter + ") if f])} && ") param_cmd['command'].append("${' '.join([\"'" + actual_parameter + "/%s/%s.%s'\"%(i, re.sub('[^\\w\\-_]', '_', f.element_identifier), $" + type_param_name + ") for i, f in enumerate($" + actual_input_parameter + ") if f])}") param_cmd['postprocessing'].append("${' '.join([\"&& mv -n '" + actual_parameter + "/%(bn)s/%(id)s.%(omsext)s' '" + actual_parameter + "/%(bn)s/%(id)s.%(gext)s'\"%{\"bn\": i, \"id\": re.sub('[^\\w\\-_]', '_', f.element_identifier), \"omsext\":$" + type_param_name + ", \"gext\": $oms2gxyext(str($" + type_param_name + "))} for i, f in enumerate($" + actual_input_parameter + ") if f])}") else: logger.info("type + dataset %s" % param.name, 1) # 1st create file with openms extension (often required by openms) # then move it to the actual place specified by the parameter # the format is then set by the tag using param_cmd['command'].append("'" + actual_parameter + "/output.${" + type_param_name + "}'") param_cmd['postprocessing'].append("&& mv '" + actual_parameter + "/output.${" + type_param_name + "}' '$" + actual_parameter + "'") elif actual_input_parameter is not None: if param.is_list: logger.info("actual + list %s" % param.name, 1) param_cmd['preprocessing'].append("mkdir ${' '.join([\"'" + actual_parameter + "/%s'\" % (i) for i, f in enumerate($" + actual_input_parameter + ") if f])} && ") param_cmd['command'].append("${' '.join([\"'" + actual_parameter + "/%s/%s.%s'\"%(i, re.sub('[^\\w\\-_]', '_', f.element_identifier), f.ext) for i, f in enumerate($" + actual_input_parameter + ") if f])}") else: logger.info(f"actual + dataset {param.name} {actual_input_parameter} {corresponding_input.is_list}", 1) if corresponding_input.is_list: param_cmd['command'].append("'" + actual_parameter + "/output.${" + actual_input_parameter + "[0].ext}'") param_cmd['postprocessing'].append("&& mv '" + actual_parameter + "/output.${" + actual_input_parameter + "[0].ext}' '$" + _actual_parameter + "'") else: param_cmd['command'].append("'" + actual_parameter + "/output.${" + actual_input_parameter + ".ext}'") param_cmd['postprocessing'].append("&& mv '" + actual_parameter + "/output.${" + actual_input_parameter + ".ext}' '$" + _actual_parameter + "'") else: if param.is_list: raise Exception("output parameter itemlist %s without corresponding input") else: logger.info("else + dataset %s" % param.name, 1) param_cmd['command'].append("'$" + _actual_parameter + "'") # # select with multiple = true # elif is_selection_parameter(param) and param.is_list: # param_cmd['command'].append("${' '.join(['\"%s\"'%str(_) for _ in str($" + actual_parameter + ").split(',')])}") # elif param.is_list: # param_cmd['command'].append("$quote($%s" % actual_parameter + ")") # #command += "${' '.join([\"'%s'\"%str(_) for _ in $" + actual_parameter + "])}\n" # elif is_boolean_parameter(param): # param_cmd['command'].append("$%s" % actual_parameter + "") # else: # param_cmd['command'].append('"$' + actual_parameter + '"') # add if statement for optional parameters and preprocessing # - for optional outputs (param_out_x) the presence of the parameter # depends on the additional input (param_x) -> need no if # - real string parameters (i.e. ctd type string wo restrictions) also # need no if (otherwise the empty string could not be provided) if not (param.required or is_boolean_parameter(param) or (param.type is str and param.restrictions is None)): # and not(param.type is _InFile and param.is_list): actual_parameter = get_galaxy_parameter_path(param, suffix="FLAG", fix_underscore=True) _actual_parameter = get_galaxy_parameter_path(param, suffix="FLAG") for stage in param_cmd: if len(param_cmd[stage]) == 0: continue # special case for optional itemlists: for those if no option is selected only the parameter must be specified if is_selection_parameter(param) and param.is_list and param.required is False: param_cmd[stage] = [param_cmd[stage][0]] + ["#if $" + _actual_parameter + ":"] + utils.indent(param_cmd[stage][1:]) + ["#end if"] elif is_selection_parameter(param) or param.type is _InFile: param_cmd[stage] = ["#if $" + _actual_parameter + ":"] + utils.indent(param_cmd[stage]) + ["#end if"] elif param.type is _OutFile or param.type is _OutPrefix: param_cmd[stage] = ["#if \"" + param.name + "_FLAG\" in str($OPTIONAL_OUTPUTS).split(',')"] + utils.indent(param_cmd[stage]) + ["#end if"] else: param_cmd[stage] = ["#if str($" + _actual_parameter + "):"] + utils.indent(param_cmd[stage]) + ["#end if"] for stage in param_cmd: if len(param_cmd[stage]) == 0: continue if param.advanced and hardcoded_value is None and not (param.type is _OutFile or param.type is _OutPrefix): advanced_cmd[stage].extend(param_cmd[stage]) else: final_cmd[stage].extend(param_cmd[stage]) for stage in advanced_cmd: if len(advanced_cmd[stage]) == 0: continue advanced_cmd[stage] = [advanced_command_start] + utils.indent(advanced_cmd[stage]) + [advanced_command_end] final_cmd[stage].extend(advanced_cmd[stage]) out, optout = all_outputs(model, parameter_hardcoder) if len(optout) > 0 or len(out) + len(optout) == 0: stdout = ["| tee '$stdout'"] if len(optout) > 0: stdout = ["#if len(str($OPTIONAL_OUTPUTS).split(',')) == 0"] + utils.indent(stdout) + ["#end if"] final_cmd['command'].extend(stdout) ctd_out = ["#if \"ctd_out_FLAG\" in $OPTIONAL_OUTPUTS"] + utils.indent(["&& mv '@EXECUTABLE@.ctd' '$ctd_out'"]) + ["#end if"] final_cmd['postprocessing'].extend(ctd_out) command_node = add_child_node(tool, "command") command_node.attrib["detect_errors"] = "exit_code" command_node.text = CDATA("\n".join(sum(final_cmd.values(), []))) def import_macros(tool, model, **kwargs): """ creates the xml elements needed to import the needed macros files @param tool the Galaxy tool @param model the ctd model @param kwargs """ macros_node = add_child_node(tool, "macros") token_node = add_child_node(macros_node, "token") token_node.attrib["name"] = "@EXECUTABLE@" token_node.text = utils.extract_tool_executable_path(model, kwargs["default_executable_path"]) # add nodes for macro_file_name in kwargs["macros_file_names"] + kwargs["test_macros_file_names"]: macro_file = open(macro_file_name) import_node = add_child_node(macros_node, "import") # do not add the path of the file, rather, just its basename import_node.text = os.path.basename(macro_file.name) def expand_macro(node, macro, attribs=None): """Add to node.""" expand_node = add_child_node(node, "expand") expand_node.attrib["macro"] = macro if attribs: for a in attribs: expand_node.attrib[a] = attribs[a] return expand_node # and to "expand" the macros in a node def expand_macros(node, macros_to_expand): # add nodes for expand_macro in macros_to_expand: expand_node = add_child_node(node, "expand") expand_node.attrib["macro"] = expand_macro def get_galaxy_parameter_path(param, separator=".", suffix=None, fix_underscore=False): """ Get the complete path for a parameter as a string where the path components are joined by the given separator. A given suffix can be appended. """ p = get_galaxy_parameter_name(param, suffix, fix_underscore) path = utils.extract_param_path(param, fix_underscore) if len(path) > 1: return (separator.join(path[:-1]) + separator + p).replace("-", "_") elif param.advanced and (param.type is not _OutFile or suffix): return ADVANCED_OPTIONS_NAME + "cond." + p else: return p def get_galaxy_parameter_name(param, suffix=None, fix_underscore=False): """ get the name of the parameter used in the galaxy tool - replace : and - by _ - add suffix for output parameters if not None the idea of suffix is to be used for optional outputs (out_x) for which an additional boolean input (out_x_FLAG) exists @param param the parameter @param suffix suffix to append @return the name used for the parameter in the tool form """ p = param.name.replace("-", "_") if fix_underscore and p.startswith("_"): p = p[1:] if param.type is _OutFile and suffix is not None: return f"{p}_{suffix}" else: return "%s" % p def get_out_type_param(out_param, model, parameter_hardcoder): """ check if there is a parameter that has the same name with appended _type and return it if present, otherwise return None """ if parameter_hardcoder.get_blacklist(out_param.name + "_type", model.name): return None for param in utils.extract_and_flatten_parameters(model): if param.name == out_param.name + "_type": return param return None def is_in_type_param(param, model): return is_type_param(param, model, [_InFile]) def is_out_type_param(param, model): """ check if the parameter is output_type parameter - the name ends with _type and there is an output parameter without this suffix and return True iff this is the case """ return is_type_param(param, model, [_OutFile, _OutPrefix]) def is_type_param(param, model, tpe): """ check if the parameter is _type parameter of an in/output - the name ends with _type and there is an output parameter without this suffix and return True iff this is the case """ if not param.name.endswith("_type"): return False for out_param in utils.extract_and_flatten_parameters(model): if out_param.type not in tpe: continue if param.name == out_param.name + "_type": return True return False def get_corresponding_input(out_param, model): """ get the input parameter corresponding to the given output 1st try to get the input with the type (single file/list) and same format restrictions if this fails get the input that has the same type in both cases there must be only one such input return the found input parameter and True iff the 1st case applied """ c = get_input_with_same_restrictions(out_param, model, True) if c is None: return (get_input_with_same_restrictions(out_param, model, False), False) else: return (c, True) def get_input_with_same_restrictions(out_param, model, check_formats): """ get the input parameter that has the same restrictions (ctd file_formats) - input and output must both be lists of both be simple parameters """ matching = [] for allow_different_type in [False, True]: for param in utils.extract_and_flatten_parameters(model): if param.type is not _InFile: continue # logger.error("%s %s %s %s %s %s" %(out_param.name, param.name, param.is_list, out_param.is_list, param.restrictions, out_param.restrictions)) if allow_different_type or param.is_list == out_param.is_list: if check_formats: if param.restrictions is None and out_param.restrictions is None: matching.append(param) elif param.restrictions is not None and out_param.restrictions is not None and param.restrictions.formats == out_param.restrictions.formats: matching.append(param) else: matching.append(param) # logger.error("match %s "%([_.name for _ in matching])) if len(matching) > 0: break if len(matching) == 1: return matching[0] else: return None def create_inputs(tool, model, **kwargs): """ create input section of the Galaxy tool @param tool the Galaxy tool @param model the ctd model @param kwargs @return inputs node """ inputs_node = SubElement(tool, "inputs") section_nodes = dict() section_params = dict() # some suites (such as OpenMS) need some advanced options when handling inputs advanced_node = Element("expand", OrderedDict([("macro", ADVANCED_OPTIONS_NAME + "macro")])) parameter_hardcoder = kwargs["parameter_hardcoder"] supported_file_formats = kwargs["supported_file_formats"] g2o, o2g = get_fileformat_maps(supported_file_formats) # treat all non output-file/advanced/blacklisted/hardcoded parameters as inputs for param in utils.extract_and_flatten_parameters(model, True): if type(param) is ParameterGroup: title, help_text = generate_label_and_help(param.description) section_params[utils.extract_param_name(param)] = param section_nodes[utils.extract_param_name(param)] = Element("section", OrderedDict([("name", param.name), ("title", title), ("help", help_text), ("expanded", "false")])) continue param = modify_param_for_galaxy(param) # no need to show hardcoded parameters hardcoded_value = parameter_hardcoder.get_hardcoded_value(utils.extract_param_name(param), model.name) if hardcoded_value is not None: continue if parameter_hardcoder.get_blacklist(utils.extract_param_name(param), model.name): continue # do not output file type parameters for inputs since file types are # known by Galaxy and set automatically by extension (which comes from # the Galaxy data type which is translated to OpenMS datatype as defined # in filetypes.txt ) if is_in_type_param(param, model): continue if utils.extract_param_name(param.parent) in section_nodes: parent_node = section_nodes[utils.extract_param_name(param.parent)] elif param.advanced: parent_node = advanced_node else: parent_node = inputs_node # sometimes special inputs are needed for outfiles: if param.type is _OutFile or param.type is _OutPrefix: # if there are multiple possible output formats, but no parameter to choose the type or a # corresponding input then add a selection parameter formats = get_galaxy_formats(param, model, o2g, TYPE_TO_GALAXY_TYPE[_OutFile]) type_param = get_out_type_param(param, model, parameter_hardcoder) corresponding_input, fmt_from_corresponding = get_corresponding_input(param, model) if len(formats) > 1 and type_param is None and (corresponding_input is None or not fmt_from_corresponding): # and not param.is_list: fmt_select = add_child_node(parent_node, "param", OrderedDict([("name", param.name + "_type"), ("type", "select"), ("optional", "false"), ("label", f"File type of output {param.name} ({param.description})")])) g2o, o2g = get_fileformat_maps(kwargs["supported_file_formats"]) # for f in formats: # option_node = add_child_node(fmt_select, "option", OrderedDict([("value", g2o[f])]), f) for choice in param.restrictions.formats: option_node = add_child_node(fmt_select, "option", OrderedDict([("value", str(choice))])) option_node.text = o2g[str(choice)] if choice.lower() != o2g[str(choice)]: option_node.text += " (%s)" % choice continue # create the actual param node and fill the attributes param_node = add_child_node(parent_node, "param") create_param_attribute_list(param_node, param, model, kwargs["supported_file_formats"]) hardcoded_attributes = parameter_hardcoder.get_hardcoded_attributes(param.name, model.name, 'XML') if hardcoded_attributes is not None: for a in hardcoded_attributes: param_node.attrib[a] = str(hardcoded_attributes[a]) section_parents = [utils.extract_param_name(section_params[sn].parent) for sn in section_nodes] for sn in section_nodes: if len(section_nodes[sn]) == 0 and sn not in section_parents: continue if utils.extract_param_name(section_params[sn].parent) in section_nodes: section_nodes[utils.extract_param_name(section_params[sn].parent)].append(section_nodes[sn]) else: inputs_node.append(section_nodes[sn]) # if there is an advanced section then append it at the end of the inputs inputs_node.append(advanced_node) # Add select for optional outputs out, optout = all_outputs(model, parameter_hardcoder) attrib = OrderedDict([("name", "OPTIONAL_OUTPUTS"), ("type", "select"), ("optional", "true"), ("multiple", "true"), ("label", "Optional outputs")]) # if len(out) == 0 and len(out) + len(optout) > 0: # attrib["optional"] = "false" # else: # attrib["optional"] = "true" param_node = add_child_node(inputs_node, "param", attrib) for o in optout: title, help_text = generate_label_and_help(o.description) option_node = add_child_node(param_node, "option", OrderedDict([("value", o.name + "_FLAG")]), text=f"{o.name} ({title})") option_node = add_child_node(param_node, "option", OrderedDict([("value", "ctd_out_FLAG")]), text="Output used ctd (ini) configuration file") return inputs_node def is_default(value, param): """ check if the value is the default of the param or if the value is in the defaults of param """ return param.default == value or (type(param.default) is list and value in param.default) def get_formats(param, model, o2g): """ determine format attribute from the CTD restictions (i.e. the OpenMS extensions) - also check if all listed possible formats are supported in Galaxy and warn if necessary """ if param.restrictions is None: return [] elif type(param.restrictions) is _FileFormat: choices = param.restrictions.formats elif is_out_type_param(param, model): choices = param.restrictions.choices else: raise InvalidModelException("Unrecognized restriction type [%(type)s] " "for [%(name)s]" % {"type": type(param.restrictions), "name": param.name}) # check if there are formats that have not been registered yet... formats = set() for format_name in choices: if format_name not in o2g: logger.warning(f"Ignoring unknown format {format_name} for parameter {param.name}", 1) else: formats.add(format_name) return sorted(formats) def get_galaxy_formats(param, model, o2g, default=None): """ determine galaxy formats for a parm (i.e. list of allowed Galaxy extensions) from the CTD restictions (i.e. the OpenMS extensions) - if there is a single one, then take this - if there is none than use given default """ formats = get_formats(param, model, o2g) gxy_formats = {o2g[_] for _ in formats if _ in o2g} if len(gxy_formats) == 0: if default is not None: gxy_formats.add(default) else: raise InvalidModelException("No supported formats [%(type)s] " "for [%(name)s]" % {"type": type(param.restrictions), "name": param.name}) return sorted(gxy_formats) def create_param_attribute_list(param_node, param, model, supported_file_formats): """ get the attributes of input parameters @param param_node the galaxy tool param node @param param the ctd parameter @param supported_file_formats """ g2o, o2g = get_fileformat_maps(supported_file_formats) # set the name, argument and a first guess for the type (which will be over written # in some cases .. see below) # even if the conversion relies on the fact that the param names are identical # to the ctd ITEM names we replace dashes by underscores because input and output # parameters need to be treated in cheetah. variable names are currently fixed back # to dashes in fill_ctd.py. currently there seems to be only a single tool # requiring this https://github.com/OpenMS/OpenMS/pull/4529 param_node.attrib["name"] = get_galaxy_parameter_name(param) param_node.attrib["argument"] = "-%s" % utils.extract_param_name(param) param_type = TYPE_TO_GALAXY_TYPE[param.type] if param_type is None: raise ModelError("Unrecognized parameter type %(type)s for parameter %(name)s" % {"type": param.type, "name": param.name}) # ITEMLIST is rendered as text field (even if its integers or floats), an # exception is files which are treated a bit below if param.is_list: param_type = "text" if is_selection_parameter(param): param_type = "select" if len(param.restrictions.choices) < 5: param_node.attrib["display"] = "checkboxes" if param.is_list: param_node.attrib["multiple"] = "true" if is_boolean_parameter(param): param_type = "boolean" if param.type is _InFile: # assume it's just text unless restrictions are provided param_node.attrib["type"] = "data" param_node.attrib["format"] = ",".join(get_galaxy_formats(param, model, o2g, TYPE_TO_GALAXY_TYPE[_InFile])) # in the case of multiple input set multiple flag if param.is_list: param_node.attrib["multiple"] = "true" else: param_node.attrib["type"] = param_type # set the optional attribute of parameters # # OpenMS uses sets text, int, select, bool parameters that have a default # as optional (required=False), the default value is set implicitly if no # value is given. # This is reasonable for the CLI because one certainly does not want the # user to specify the default manually for all parameters. # For Galaxy tools setting these parameters as required leads to the # equivalent behavior. Assuming required is better because it makes # the implicit setting of parameters more transparent to the user # (in Galaxy the default would be prefilled in the form and at least # one option needs to be selected). if not (param.default is None or type(param.default) is _Null) and param_node.attrib["type"] in ["integer", "float", "text", "boolean", "select"]: logger.error("%s %s %s %s %s" % (param.name, param.default is None, type(param.default) is _Null, param_type, param.type)) param_node.attrib["optional"] = "false" else: param_node.attrib["optional"] = str(not param.required).lower() # check for parameters with restricted values (which will correspond to a "select" in galaxy) if param.restrictions is not None or param_type == "boolean": # it could be either _Choices or _NumericRange, with special case for boolean types if param_type == "boolean": create_boolean_parameter(param_node, param) elif type(param.restrictions) is _Choices: # TODO if the parameter is used to select the output file type the # options need to be replaced with the Galaxy data types # if is_out_type_param(param, model): # param.restrictions.choices = get_supported_file_types(param.restrictions.choices, supported_file_formats) # create as many