let mem_align_to_bam
~reference_build
?(configuration = Configuration.Mem.default)
~(result_prefix:string)
~(run_with : Machine.t)
(raw_input : Input_reads.t) =
let open KEDSL in
let reference_fasta =
Machine.get_reference_genome run_with reference_build
|> Reference_genome.fasta in
let in_work_dir =
Program.shf "cd %s" Filename.(quote (dirname result_prefix)) in
let bwa_tool = Machine.get_tool run_with Machine.Tool.Default.bwa in
let samtools = Machine.get_tool run_with Machine.Tool.Default.samtools in
let bwa_index = index ~reference_build ~run_with in
let result = sprintf "%s.bam" result_prefix in
let input = Input_reads.prepare ~run_with raw_input in
let name = sprintf "bwa-mem-%s" (Input_reads.name input) in
let processors = Machine.max_processors run_with in
let bwa_base_command =
String.concat ~sep:" " [
"bwa mem";
(read_group_header_option `Mem
~sample_name:(Input_reads.sample_name input)
~read_group_id:(Input_reads.read_group_id input));
"-t"; Int.to_string processors;
"-O"; Int.to_string configuration.Configuration.Mem.gap_open_penalty;
"-E"; Int.to_string configuration.Configuration.Mem.gap_extension_penalty;
"-B"; Int.to_string configuration.Configuration.Mem.mismatch_penalty;
(Filename.quote reference_fasta#product#path);
] in
let command_to_stdout =
match input with
| `Fastq fastq ->
let r1_path, r2_path_opt = fastq#product#paths in
String.concat ~sep:" " [
bwa_base_command;
Filename.quote r1_path;
Option.value_map ~default:"" r2_path_opt ~f:Filename.quote;
]
| `Bam (b, pairedness) ->
String.concat ~sep:" " [
"samtools bam2fq"; Filename.quote b#product#path;
"|";
bwa_base_command; (match pairedness with `PE -> "-p" | `SE -> "");
"-";
]
in
let command =
String.concat ~sep:" " [
command_to_stdout; "|";
"samtools"; "view"; "-b"; "|";
"samtools"; "sort";
"-@"; Int.to_string processors; "-T"; Filename.chop_extension result ^ "-tmp";
"-o"; Filename.quote result;
] in
let product =
bam_file ~host:Machine.(as_host run_with)
~name:(Input_reads.sample_name input)
~sorting:`Coordinate
~reference_build result in
workflow_node product
~name
~edges:(
depends_on Machine.Tool.(ensure bwa_tool)
:: depends_on Machine.Tool.(ensure samtools)
:: depends_on bwa_index
:: on_failure_activate (Remove.file ~run_with result)
:: Input_reads.as_dependencies input
)
~tags:[Target_tags.aligner]
~make:(Machine.run_big_program run_with ~processors ~name
~self_ids:["bwa"; "mem"]
Program.(
Machine.Tool.(init bwa_tool)
&& Machine.Tool.(init samtools)
&& in_work_dir
&& sh "set -o pipefail"
&& sh command))