let mem_align_to_sam
~reference_build
?(configuration = Configuration.Mem.default)
~fastq
~(result_prefix:string)
~(run_with : Machine.t)
() =
let open KEDSL in
let reference_fasta =
Machine.get_reference_genome run_with reference_build
|> Reference_genome.fasta in
let in_work_dir =
Program.shf "cd %s" Filename.(quote (dirname result_prefix)) in
let bwa_tool = Machine.get_tool run_with Machine.Tool.Default.bwa in
let bwa_index = index ~reference_build ~run_with in
let result = sprintf "%s.sam" result_prefix in
let r1_path, r2_path_opt = fastq#product#paths in
let name = sprintf "bwa-mem-%s" (Filename.basename r1_path) in
let processors = Machine.max_processors run_with in
let bwa_base_command =
String.concat ~sep:" " [
"bwa mem";
(read_group_header_option `Mem
~sample_name:fastq#product#escaped_sample_name
~read_group_id:(Filename.basename r1_path));
"-t"; Int.to_string processors;
"-O"; Int.to_string configuration.Configuration.Mem.gap_open_penalty;
"-E"; Int.to_string configuration.Configuration.Mem.gap_extension_penalty;
"-B"; Int.to_string configuration.Configuration.Mem.mismatch_penalty;
(Filename.quote reference_fasta#product#path);
(Filename.quote r1_path);
] in
let bwa_base_target ~bwa_command =
workflow_node
(single_file result ~host:Machine.(as_host run_with))
~name
~edges:(
depends_on Machine.Tool.(ensure bwa_tool)
:: depends_on bwa_index
:: depends_on fastq
:: on_failure_activate (Remove.file ~run_with result)
:: [])
~tags:[Target_tags.aligner]
~make:(Machine.run_big_program run_with ~processors ~name
~self_ids:["bwa"; "mem"]
Program.(
Machine.Tool.(init bwa_tool)
&& in_work_dir
&& sh bwa_command))
in
match r2_path_opt with
| Some read2 ->
let bwa_command =
String.concat ~sep:" " [
bwa_base_command;
(Filename.quote read2);
">"; (Filename.quote result);
] in
bwa_base_target ~bwa_command
| None ->
let bwa_command =
String.concat ~sep:" " [
bwa_base_command;
">"; (Filename.quote result);
] in
bwa_base_target ~bwa_command