diff --git a/modules/ensembl_modules/download/dsetsmeta4genome/environment.yml b/modules/ensembl_modules/download/dsetsmeta4genome/environment.yml new file mode 100644 index 0000000..4b3c9d3 --- /dev/null +++ b/modules/ensembl_modules/download/dsetsmeta4genome/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "YOUR-TOOL-HERE" diff --git a/modules/ensembl_modules/download/dsetsmeta4genome/main.nf b/modules/ensembl_modules/download/dsetsmeta4genome/main.nf new file mode 100644 index 0000000..efacc41 --- /dev/null +++ b/modules/ensembl_modules/download/dsetsmeta4genome/main.nf @@ -0,0 +1,72 @@ +// See the NOTICE file distributed with this work for additional information +// regarding copyright ownership. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +process DOWNLOAD_DSETSMETA4GENOME { + tag "$meta.id" + label 'process_low' + container 'ensemblorg/datasets-cli:v16.33.0' + + input: + val(meta) // with keys [ id, accession ] + + output: + tuple val(meta), path("ncbi_stats.json") + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + shell: + output = "ncbi_stats.json" + ''' + echo "Calling datasets-cli.... datasets 'summary' 'genome' 'accession' [!{meta.accession}]'" + + # Pipe datasets to jq instead of '--as-json-lines' to + # obtain a total_count of reports returned. + datasets summary genome accession !{meta.accession} | jq '.' > !{output} + + if [ "$?" -ne 0 ]; then + echo "Invalid or unsupported assembly accession: !{meta.accession}" + exit 1 + fi + + # Check if it should maybe be using RefSeq? + if [[ $(jq '.total_count' !{output}) -eq 0 ]] && [[ !{meta.accession} =~ "GCA_" ]]; then + accession=$(echo !{meta.accession} | sed 's/^GCA_/GCF_/') + echo "Trying again with RefSeq accession: $accession" + datasets summary genome accession $accession | jq '.' > !{output} + fi + + # Get version from datasets and jq + cat <<-END_VERSIONS > versions.yml + "!{task.process}": + NcbiDatasets: $(datasets --version | cut -f 2 -d :) + Jq: $(jq --version) + END_VERSIONS + ''' + + stub: + output_file = "ncbi_stats.json" + ''' + touch $output_file + + # Get version from datasets and jq + cat <<-END_VERSIONS > versions.yml + "!{task.process}": + NcbiDatasets: $(datasets --version | cut -f 2 -d :) + Jq: $(jq --version) + END_VERSIONS + ''' +} diff --git a/modules/ensembl_modules/download/dsetsmeta4genome/meta.yml b/modules/ensembl_modules/download/dsetsmeta4genome/meta.yml new file mode 100644 index 0000000..f33f31d --- /dev/null +++ b/modules/ensembl_modules/download/dsetsmeta4genome/meta.yml @@ -0,0 +1,69 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "download_dsetsmeta4genome" +## TODO nf-core: Add a description of the module and list keywords +description: write your description here +keywords: + - sort + - example + - genomics +tools: + - "download": + ## TODO nf-core: Add a description and other details for the software below + description: "" + homepage: "" + documentation: "" + tool_dev_url: "" + doi: "" + licence: + identifier: + +## TODO nf-core: Add a description of all of the variables used as input +input: + # Only when we have meta + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + ## TODO nf-core: Delete / customise this example input + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: "http://edamontology.org/format_25722" + - edam: "http://edamontology.org/format_2573" + - edam: "http://edamontology.org/format_3462" + + +## TODO nf-core: Add a description of all of the variables used as output +output: + - bam: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + ## TODO nf-core: Delete / customise this example output + - "*.bam": + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: "http://edamontology.org/format_25722" + - edam: "http://edamontology.org/format_2573" + - edam: "http://edamontology.org/format_3462" + + - versions: + - "versions.yml": + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@ensembl-dev" +maintainers: + - "@ensembl-dev" diff --git a/modules/ensembl_modules/download/dsetsmeta4genome/tests/main.nf.test b/modules/ensembl_modules/download/dsetsmeta4genome/tests/main.nf.test new file mode 100644 index 0000000..4da464a --- /dev/null +++ b/modules/ensembl_modules/download/dsetsmeta4genome/tests/main.nf.test @@ -0,0 +1,74 @@ +// TODO nf-core: Once you have added the required tests, please run the following command to build this file: +// nf-core modules test download/dsetsmeta4genome +nextflow_process { + + name "Test Process DOWNLOAD_DSETSMETA4GENOME" + script "../main.nf" + process "DOWNLOAD_DSETSMETA4GENOME" + + tag "modules" + tag "modules_ensemblmodules" + tag "download" + tag "download/dsetsmeta4genome" + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used + test("sarscov2 - bam") { + + // TODO nf-core: If you are created a test for a chained module + // (the module requires running more than one process to generate the required output) + // add the 'setup' method here. + // You can find more information about how to use a 'setup' method in the docs (https://nf-co.re/docs/contributing/modules#steps-for-creating-nf-test-for-chained-modules). + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + //TODO nf-core: Add all required assertions to verify the test output. + // See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples. + ) + } + + } + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + //TODO nf-core: Add all required assertions to verify the test output. + ) + } + + } + +}