let run ~(run_with: Machine.t)
~configuration
~reference_build
~vcfs
~predictor
~alleles_file
~output =
let open KEDSL in
let topiary =
Machine.get_tool run_with Machine.Tool.Definition.(create "topiary")
in
let predictor_tool = predictor_to_tool ~run_with predictor in
let (predictor_edges, predictor_init) =
match predictor_tool with
| Some (e, i) -> ([depends_on e;], i)
| None -> ([], Program.(sh "echo 'No external prediction tool required'"))
in
let var_arg = List.concat_map vcfs ~f:(fun v -> ["--vcf"; v#product#path]) in
let predictor_arg = ["--mhc-predictor"; (predictor_to_string predictor)] in
let allele_arg = ["--mhc-alleles-file"; alleles_file#product#path] in
let (output_arg, output_path) =
match output with
| `HTML html_file -> ["--output-html"; html_file], html_file
| `CSV csv_file -> ["--output-csv"; csv_file], csv_file
in
let str_of_str a = a in
let maybe_argument ~f arg_name value =
match value with
| None -> []
| Some arg_value -> [arg_name; f arg_value]
in
let if_argument arg_name value = if value then [arg_name] else [] in
let open Configuration in
let c = configuration in
let rna_arg =
(maybe_argument ~f:str_of_str "--rna-gene-fpkm-tracking-file" c.rna_gene_fpkm_tracking_file)
@ (maybe_argument ~f:string_of_float "--rna-min-gene-expression" (Some c.rna_min_gene_expression))
@ (maybe_argument ~f:str_of_str "--rna-transcript-fpkm-tracking-file" c.rna_transcript_fpkm_tracking_file)
@ (maybe_argument ~f:string_of_float "--rna-min-transcript-expression" (Some c.rna_min_transcript_expression))
@ (maybe_argument ~f:str_of_str "--rna-transcript-fpkm-gtf-file" c.rna_transcript_fkpm_gtf_file)
in
let string_of_intlist l = l |> List.map ~f:string_of_int |> String.concat ~sep:"," in
let length_arg = maybe_argument ~f:string_of_intlist "--mhc-epitope-lengths" (Some c.mhc_epitope_lengths) in
let ic50_arg = maybe_argument ~f:string_of_float "--ic50-cutoff" (Some c.ic50_cutoff) in
let percentile_arg = maybe_argument ~f:string_of_float "--percentile-cutoff" (Some c.percentile_cutoff) in
let padding_arg = maybe_argument ~f:string_of_int "--padding-around-mutation" c.padding_around_mutation in
let self_filter_directory = maybe_argument ~f:str_of_str "--self-filter-directory" c.self_filter_directory in
let skip_error_arg = if_argument "--skip-variant-errors" c.skip_variant_errors in
let novel_arg = if_argument "--only-novel-epitopes" c.only_novel_epitopes in
let arguments =
var_arg @ predictor_arg @ allele_arg @ output_arg @ rna_arg
@ length_arg @ novel_arg @ ic50_arg @ percentile_arg @ padding_arg
@ skip_error_arg @ self_filter_directory
@ Configuration.render configuration
in
let name = sprintf "topiary_%s" (Filename.basename output_path) in
workflow_node
(single_file output_path ~host:Machine.(as_host run_with))
~name
~edges:([
depends_on Machine.Tool.(ensure topiary);
depends_on (Pyensembl.cache_genome ~run_with ~reference_build);
depends_on alleles_file;
] @ (List.map ~f:depends_on vcfs)
@ predictor_edges)
~make:(
Machine.run_program run_with ~name
Program.(
Machine.Tool.(init topiary)
&& predictor_init
&& Pyensembl.(set_cache_dir_command ~run_with)
&& exec (["topiary"] @ arguments)
)
)