[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
06/06: gnu: wfmash: Enable test suite.
From: |
guix-commits |
Subject: |
06/06: gnu: wfmash: Enable test suite. |
Date: |
Thu, 31 Mar 2022 09:31:57 -0400 (EDT) |
efraim pushed a commit to branch master
in repository guix.
commit 8f3dc994bbcb3028318e3d597f829a12396110c2
Author: Efraim Flashner <efraim@flashner.co.il>
AuthorDate: Thu Mar 31 16:13:57 2022 +0300
gnu: wfmash: Enable test suite.
* gnu/packages/bioinformatics.scm (wfmash)[arguments]: Don't disable
tests. Add custom 'check phase based on upstream's own CI tests.
[native-inputs]: Add samtools.
---
gnu/packages/bioinformatics.scm | 109 +++++++++++++++++++++++++++++++++++++++-
1 file changed, 108 insertions(+), 1 deletion(-)
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 7a7f2f722e..fd659b59f8 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -16154,13 +16154,120 @@ language.")
(("!__x86_64__") "0"))))))
(build-system cmake-build-system)
(arguments
- (list #:tests? #f)) ; no tests
+ (list
+ #:phases
+ #~(modify-phases %standard-phases
+ (replace 'check
+ ;; Adapted from .github/workflows/test_on_push.yml
+ (lambda* (#:key tests? inputs #:allow-other-keys)
+ (when tests?
+ (let ((samtools (search-input-file inputs "/bin/samtools")))
+ ;; This is the easiest way to access the data
+ ;; needed for the test suite.
+ (symlink (string-append "../wfmash-v" #$version "/data")
+ "data")
+ (and
+ ;; This test takes 60 minutes on riscv64-linux.
+ #$@(if (not (target-riscv64?))
+ #~((begin
+ ;; Test with a subset of the LPA dataset (PAF
output)
+ (setenv "ASAN_OPTIONS"
"detect_leaks=1:symbolize=1")
+ (setenv "LSAN_OPTIONS"
"verbosity=0:log_threads=1")
+ (with-output-to-file "LPA.subset.paf"
+ (lambda _
+ (invoke "bin/wfmash"
+ "data/LPA.subset.fa.gz"
+ "data/LPA.subset.fa.gz"
+ "-X" "-n" "10" "-T" "wflign_info."
+ "-u" "./")))
+ (invoke "head" "LPA.subset.paf")))
+ #~())
+ ;; This test takes about 5 hours on riscv64-linux.
+ #$@(if (not (target-riscv64?))
+ #~((begin
+ ;; Test with a subset of the LPA dataset (SAM
output)
+ (setenv "ASAN_OPTIONS"
"detect_leaks=1:symbolize=1")
+ (setenv "LSAN_OPTIONS"
"verbosity=0:log_threads=1")
+ (with-output-to-file "LPA.subset.sam"
+ (lambda _
+ (invoke "bin/wfmash"
+ "data/LPA.subset.fa.gz"
+ "data/LPA.subset.fa.gz"
+ "-X" "-N" "-a" "-T"
"wflign_info.")))
+ (with-output-to-file "LPA.subset.sam-view"
+ (lambda _
+ (invoke samtools "view" "LPA.subset.sam"
"-bS")))
+ (with-output-to-file "LPA.subset.bam"
+ (lambda _
+ (invoke samtools "sort"
"LPA.subset.sam-view")))
+ (invoke samtools "index" "LPA.subset.bam")
+ ;; samtools view LPA.subset.bam | head | cut -f
1-9
+ ;(invoke samtools "view" "LPA.subset.bam")
+ ;; There should be an easier way to do this
with pipes.
+ (with-output-to-file "LPA.subset.bam-incr1"
+ (lambda _
+ (invoke samtools "view" "LPA.subset.bam")))
+ (with-output-to-file "LPA.subset.bam-incr2"
+ (lambda _
+ (invoke "head" "LPA.subset.bam-incr1")))
+ (invoke "cut" "-f" "1-9"
"LPA.subset.bam-incr2")))
+ #~())
+ ;; This test takes 60 minutes on riscv64-linux.
+ #$@(if (not (target-riscv64?))
+ #~((begin
+ ;; Test with a subset of the LPA dataset,
+ ;; setting a lower identity threshold (PAF
output)
+ (setenv "ASAN_OPTIONS"
"detect_leaks=1:symbolize=1")
+ (setenv "LSAN_OPTIONS"
"verbosity=0:log_threads=1")
+ (with-output-to-file "LPA.subset.p90.paf"
+ (lambda _
+ (invoke "bin/wfmash"
+ "data/LPA.subset.fa.gz"
+ "data/LPA.subset.fa.gz"
+ "-X" "-p" "90" "-n" "10"
+ "-T" "wflign_info.")))
+ (invoke "head" "LPA.subset.p90.paf")))
+ #~())
+ (begin
+ ;; Test aligning short reads (500 bps) to a reference
(SAM output)
+ (setenv "ASAN_OPTIONS" "detect_leaks=1:symbolize=1")
+ (setenv "LSAN_OPTIONS" "verbosity=0:log_threads=1")
+ (with-output-to-file "reads.500bps.sam"
+ (lambda _
+ (invoke "bin/wfmash"
+ "data/reference.fa.gz"
+ "data/reads.500bps.fa.gz"
+ "-s" "0.5k" "-N" "-a")))
+ (with-output-to-file "reads.500bps.sam-view"
+ (lambda _
+ (invoke samtools "view" "reads.500bps.sam" "-bS")))
+ (with-output-to-file "reads.500bps.bam"
+ (lambda _
+ (invoke samtools "sort" "reads.500bps.sam-view")))
+ (invoke samtools "index" "reads.500bps.bam")
+ (with-output-to-file "reads.500bps.bam-view"
+ (lambda _
+ (invoke samtools "view" "reads.500bps.bam")))
+ (invoke "head" "reads.500bps.bam-view"))
+ (begin
+ ;; Test with few very short reads (255bps) (PAF output)
+ (setenv "ASAN_OPTIONS" "detect_leaks=1:symbolize=1")
+ (setenv "LSAN_OPTIONS" "verbosity=0:log_threads=1")
+ (with-output-to-file "reads.255bps.paf"
+ (lambda _
+ (invoke "bin/wfmash"
+ "data/reads.255bps.fa.gz"
+ "data/reads.255bps.fa.gz"
+ "-X" "-w" "16")))
+ (invoke "head" "reads.255bps.paf"))))))))))
(inputs
(list atomic-queue
gsl
htslib
jemalloc
zlib))
+ (native-inputs
+ (list samtools))
(synopsis "Base-accurate DNA sequence aligner")
(description "@code{wfmash} is a DNA sequence read mapper based on mash
distances and the wavefront alignment algorithm. It is a fork of MashMap that
- branch master updated (a69a44bf59 -> 8f3dc994bb), guix-commits, 2022/03/31
- 02/06: gnu: htslib: Add bzip2 and xz to inputs., guix-commits, 2022/03/31
- 01/06: gnu: gsl: Force bootstrap when cross-compiling to riscv64-linux., guix-commits, 2022/03/31
- 04/06: gnu: atomic-queue: Do not look for boost when cross-compiling., guix-commits, 2022/03/31
- 03/06: gnu: atomic-queue: Run tests correctly., guix-commits, 2022/03/31
- 05/06: gnu: Add wfmash., guix-commits, 2022/03/31
- 06/06: gnu: wfmash: Enable test suite.,
guix-commits <=