struct
open Biokepi_run_environment
open Common
module Configuration = struct
type t = {
name: string;
parameters: (string * string) list
}
let name t = t.name
let to_json {name; parameters}: Yojson.Basic.json =
`Assoc [
"name", `String name;
"parameters",
`Assoc (List.map parameters ~f:(fun (a, b) -> a, `String b));
]
let generate_config_file ~path config : KEDSL.Program.t =
let open KEDSL in
Program.(
shf "echo '[user]' > %s" path
&& chain
(List.map config.parameters (fun (k, v) ->
shf "echo '%s = %s' >> %s" k v path))
)
let default =
{ name = "default";
parameters = [
"isSkipDepthFilters", "0";
"maxInputDepth", "10000";
"depthFilterMultiple", "3.0";
"snvMaxFilteredBasecallFrac", "0.4";
"snvMaxSpanningDeletionFrac", "0.75";
"indelMaxRefRepeat", "8";
"indelMaxWindowFilteredBasecallFrac", "0.3";
"indelMaxIntHpolLength", "14";
"ssnvPrior", "0.000001";
"sindelPrior", "0.000001";
"ssnvNoise", "0.0000005";
"sindelNoise", "0.0000001";
"ssnvNoiseStrandBiasFrac", "0.5";
"minTier1Mapq", "40";
"minTier2Mapq", "5";
"ssnvQuality_LowerBound", "15";
"sindelQuality_LowerBound", "30";
"isWriteRealignedBam", "0";
"binSize", "25000000";
"extraStrelkaArguments", "--eland-compatibility";
]}
let test1 =
{ name = "test1";
parameters = [
"isSkipDepthFilters", "0";
"maxInputDepth", "10000";
"depthFilterMultiple", "3.0";
"snvMaxFilteredBasecallFrac", "0.4";
"snvMaxSpanningDeletionFrac", "0.75";
"indelMaxRefRepeat", "8";
"indelMaxWindowFilteredBasecallFrac", "0.3";
"indelMaxIntHpolLength", "14";
"ssnvPrior", "0.000001";
"sindelPrior", "0.000001";
"ssnvNoise", "0.0000005";
"sindelNoise", "0.0000001";
"ssnvNoiseStrandBiasFrac", "0.5";
"minTier1Mapq", "40";
"minTier2Mapq", "5";
"ssnvQuality_LowerBound", "15";
"sindelQuality_LowerBound", "30";
"isWriteRealignedBam", "0";
"binSize", "25000000";
"extraStrelkaArguments", "--eland-compatibility";
"priorSomaticSnvRate", "1e-06";
"germlineSnvTheta", "0.001";
]}
let empty_exome =
{ name = "empty-exome";
parameters = [
"isSkipDepthFilters", "1";
]}
let exome_default =
{ name = "exome-default";
parameters = [
"isSkipDepthFilters", "1";
"maxInputDepth", "10000";
"depthFilterMultiple", "3.0";
"snvMaxFilteredBasecallFrac", "0.4";
"snvMaxSpanningDeletionFrac", "0.75";
"indelMaxRefRepeat", "8";
"indelMaxWindowFilteredBasecallFrac", "0.3";
"indelMaxIntHpolLength", "14";
"ssnvPrior", "0.000001";
"sindelPrior", "0.000001";
"ssnvNoise", "0.0000005";
"sindelNoise", "0.0000001";
"ssnvNoiseStrandBiasFrac", "0.5";
"minTier1Mapq", "40";
"minTier2Mapq", "5";
"ssnvQuality_LowerBound", "15";
"sindelQuality_LowerBound", "30";
"isWriteRealignedBam", "0";
"binSize", "25000000";
"extraStrelkaArguments", "--eland-compatibility";
]}
end
let run
~run_with ~normal ~tumor ~result_prefix ?(more_edges = [])
?(configuration = Configuration.default) () =
let open KEDSL in
let open Configuration in
let name = Filename.basename result_prefix in
let result_file suffix = result_prefix ^ suffix in
let output_dir = result_file "strelka_output" in
let config_file_path = result_file "configuration" in
let output_file_path = output_dir // "results/passed_somatic_combined.vcf" in
let reference_fasta =
Machine.get_reference_genome run_with normal#product#reference_build
|> Reference_genome.fasta in
let strelka_tool = Machine.get_tool run_with Machine.Tool.Default.strelka in
let gatk_tool = Machine.get_tool run_with Machine.Tool.Default.gatk in
let sorted_normal =
Samtools.sort_bam_if_necessary
~run_with ~by:`Coordinate normal in
let sorted_tumor =
Samtools.sort_bam_if_necessary
~run_with ~by:`Coordinate tumor in
let working_dir = Filename.(dirname result_prefix) in
let processors = Machine.max_processors run_with in
let make =
Machine.run_big_program run_with ~name ~processors
~self_ids:["strelka"]
Program.(
Machine.Tool.init strelka_tool && Machine.Tool.init gatk_tool
&& shf "mkdir -p %s" working_dir
&& shf "cd %s" working_dir
&& generate_config_file ~path:config_file_path configuration
&& shf "rm -fr %s" output_dir
&& shf "$STRELKA_BIN/configureStrelkaWorkflow.pl --normal=%s --tumor=%s --ref=%s --config=%s --output-dir=%s "
sorted_normal#product#path
sorted_tumor#product#path
reference_fasta#product#path
config_file_path
output_dir
&& shf "cd %s" output_dir
&& shf "make -j%d" processors
&& Gatk.call_gatk ~analysis:"CombineVariants" [
"--variant:snvs"; "results/passed.somatic.snvs.vcf";
"--variant:indels"; "results/passed.somatic.indels.vcf";
"-R"; reference_fasta#product#path;
"-genotypeMergeOptions"; "PRIORITIZE";
"-o"; output_file_path; "-priority"; "snvs,indels"
]
)
in
workflow_node ~name ~make
(vcf_file output_file_path
~reference_build:normal#product#reference_build
~host:Machine.(as_host run_with))
~edges:(more_edges @ [
depends_on normal;
depends_on tumor;
depends_on reference_fasta;
depends_on (Machine.Tool.ensure strelka_tool);
depends_on (Machine.Tool.ensure gatk_tool);
depends_on sorted_normal;
depends_on sorted_tumor;
depends_on (Picard.create_dict ~run_with reference_fasta);
depends_on (Samtools.faidx ~run_with reference_fasta);
depends_on (Samtools.index_to_bai ~run_with sorted_normal);
depends_on (Samtools.index_to_bai ~run_with sorted_tumor);
])
end