let mutect2
?(more_edges = [])
~configuration
~run_with
~input_normal_bam ~input_tumor_bam
~result_prefix how =
let open KEDSL in
let reference =
Machine.get_reference_genome run_with
input_normal_bam#product#reference_build in
let run_on_region ~add_edges region =
let result_file suffix =
let region_name = Region.to_filename region in
sprintf "%s-%s%s" result_prefix region_name suffix in
let output_vcf = result_file "-mutect2.vcf" in
let gatk = Machine.get_tool run_with Machine.Tool.Default.gatk in
let run_path = Filename.dirname output_vcf in
let reference_fasta = Reference_genome.fasta reference in
let reference_dot_fai = Samtools.faidx ~run_with reference_fasta in
let sequence_dict = Picard.create_dict ~run_with reference_fasta in
let sorted_normal_bam =
Samtools.sort_bam_if_necessary ~run_with ~by:`Coordinate input_normal_bam
in
let sorted_tumor_bam =
Samtools.sort_bam_if_necessary ~run_with ~by:`Coordinate input_tumor_bam
in
let `Arguments config_arguments, `Edges confg_edges =
Configuration.Mutect2.compile ~reference configuration in
let run_caller =
let name = sprintf "%s" (Filename.basename output_vcf) in
let make =
Machine.run_big_program run_with ~name
~self_ids:["gatk"; "mutect2"]
Program.(
Machine.Tool.(init gatk)
&& shf "mkdir -p %s" run_path
&& shf "cd %s" run_path
&& call_gatk ~region ~analysis:"MuTect2"
([ "-I:normal"; sorted_normal_bam#product#path;
"-I:tumor"; sorted_tumor_bam#product#path;
"-R"; reference_fasta#product#path;
"-o"; output_vcf; ]
@ config_arguments)
)
in
workflow_node ~name ~make
(vcf_file output_vcf
~reference_build:input_normal_bam#product#reference_build
~host:Machine.(as_host run_with))
~tags:[Target_tags.variant_caller]
~edges:(add_edges @ confg_edges @ [
depends_on Machine.Tool.(ensure gatk);
depends_on sorted_normal_bam;
depends_on sorted_tumor_bam;
depends_on reference_fasta;
depends_on reference_dot_fai;
depends_on sequence_dict;
depends_on (Samtools.index_to_bai ~run_with sorted_normal_bam);
depends_on (Samtools.index_to_bai ~run_with sorted_tumor_bam);
on_failure_activate (Remove.file ~run_with output_vcf);
])
in
run_caller
in
match how with
| `Region region -> run_on_region ~add_edges:more_edges region
| `Map_reduce ->
let targets =
List.map (Reference_genome.major_contigs reference)
~f:(run_on_region ~add_edges:[])
in
let final_vcf = result_prefix ^ "-merged.vcf" in
Vcftools.vcf_concat ~run_with targets ~final_vcf ~more_edges