let align_to_sam
~reference_build
?(configuration = Configuration.Aln.default)
~fastq
~(result_prefix:string)
~(run_with : Machine.t)
() =
let open KEDSL in
let reference_fasta =
Machine.get_reference_genome run_with reference_build
|> Reference_genome.fasta in
let in_work_dir =
Program.shf "cd %s" Filename.(quote (dirname result_prefix)) in
let bwa_tool = Machine.get_tool run_with Machine.Tool.Default.bwa in
let bwa_index = index ~reference_build ~run_with in
let processors = Machine.max_processors run_with in
let bwa_aln read_number read =
let name = sprintf "bwa-aln-%s" (Filename.basename read) in
let result = sprintf "%s-R%d.sai" result_prefix read_number in
let bwa_command =
String.concat ~sep:" " [
"bwa aln";
"-t"; Int.to_string processors;
"-O"; Int.to_string configuration.Configuration.Aln.gap_open_penalty;
"-E"; Int.to_string configuration.Configuration.Aln.gap_extension_penalty;
"-M"; Int.to_string configuration.Configuration.Aln.mismatch_penalty;
(Filename.quote reference_fasta#product#path);
(Filename.quote read);
">"; (Filename.quote result);
] in
workflow_node
(single_file result ~host:Machine.(as_host run_with))
~name
~edges:[
depends_on fastq;
depends_on bwa_index;
depends_on Machine.Tool.(ensure bwa_tool);
on_failure_activate (Remove.file ~run_with result);
]
~tags:[Target_tags.aligner]
~make:(Machine.run_big_program run_with ~processors ~name
~self_ids:["bwa"; "aln"]
Program.(
Machine.Tool.(init bwa_tool)
&& in_work_dir
&& sh bwa_command
))
in
let r1_path, r2_path_opt = fastq#product#paths in
let r1_sai = bwa_aln 1 r1_path in
let r2_sai_opt = Option.map r2_path_opt ~f:(fun r -> (bwa_aln 2 r, r)) in
let sam =
let name = sprintf "bwa-sam-%s" (Filename.basename result_prefix) in
let result = sprintf "%s.sam" result_prefix in
let program, edges =
let common_edges = [
depends_on r1_sai;
depends_on reference_fasta;
depends_on bwa_index;
depends_on Machine.Tool.(ensure bwa_tool);
on_failure_activate (Remove.file ~run_with result);
] in
match r2_sai_opt with
| Some (r2_sai, r2) ->
Program.(
Machine.Tool.(init bwa_tool)
&& in_work_dir
&& shf "bwa sampe %s %s %s %s %s %s > %s"
(read_group_header_option `Aln
~sample_name:fastq#product#escaped_sample_name
~read_group_id:(Filename.basename r1_path))
(Filename.quote reference_fasta#product#path)
(Filename.quote r1_sai#product#path)
(Filename.quote r2_sai#product#path)
(Filename.quote r1_path)
(Filename.quote r2)
(Filename.quote result)),
(depends_on r2_sai :: common_edges)
| None ->
Program.(
Machine.Tool.(init bwa_tool)
&& in_work_dir
&& shf "bwa samse %s %s %s > %s"
(read_group_header_option `Aln
~sample_name:fastq#product#escaped_sample_name
~read_group_id:(Filename.basename r1_path))
(Filename.quote reference_fasta#product#path)
(Filename.quote r1_sai#product#path)
(Filename.quote result)),
common_edges
in
workflow_node
(single_file result ~host:Machine.(as_host run_with))
~name ~edges
~tags:[Target_tags.aligner]
~make:(Machine.run_big_program run_with ~processors:1 ~name program
~self_ids:["bwa"; "sampe"])
in
sam