struct
(** Workflow-nodes to run kallisto. *)
open Biokepi_run_environment open Common
(** Create a kallisto specific index of the transcriptome (cDNA) *)
let index     ~reference_build     ~(run_with : Machine.t) =   let open KEDSL in   let reference_transcriptome =     Machine.get_reference_genome run_with reference_build     |> Reference_genome.cdna_exn in   let kallisto_tool = Machine.get_tool run_with Machine.Tool.Default.kallisto in   let name =     sprintf "kallisto-index-%s" (Filename.basename reference_transcriptome#product#path) in   let reference_dir = (Filename.dirname reference_transcriptome#product#path) in   let result = sprintf "%s.kallisto.idx" reference_dir in   workflow_node ~name     (single_file ~host:(Machine.(as_host run_with)) result)     ~edges:[       on_failure_activate (Workflow_utilities.Remove.file ~run_with result);       depends_on reference_transcriptome;       depends_on Machine.Tool.(ensure kallisto_tool);     ]     ~make:(Machine.run_big_program run_with ~name              ~self_ids:["kallisto""index"]              Program.(                Machine.Tool.(init kallisto_tool)                && shf "kallisto index -i %s %s"                  result                  reference_transcriptome#product#path              ))
(** Quantify transcript abundance from RNA fastqs, results in abundance.tsv file *)
let run     ~reference_build     ?(bootstrap_samples=100)     ~(run_with:Machine.t)     ~fastq     ~result_prefix     =     let open KEDSL in     let processors = Machine.max_processors run_with in     let name = sprintf "kallisto-%s-bootstrap_%d" (Filename.basename result_prefix) bootstrap_samples in     let result_file suffix = result_prefix ^ suffix in     let output_dir = result_file "-kallisto" in     let abundance_file = output_dir // "abundance.tsv" in     let kallisto_index = index ~reference_build ~run_with in     let kallisto_tool = Machine.get_tool run_with Machine.Tool.Default.kallisto in     let r1_path, r2_path_opt = fastq#product#paths in     let kallisto_quant_base_cmd =       sprintf         "kallisto quant -i %s -o %s -b %d -t %d %s"         kallisto_index#product#path         output_dir         bootstrap_samples         processors         r1_path     in     let kallisto_quant =       match r2_path_opt with       | Some r2_path -> sprintf "%s %s" kallisto_quant_base_cmd r2_path       | None -> kallisto_quant_base_cmd     in     let make =       Machine.run_big_program run_with ~name ~processors         ~self_ids:["kallisto""quant"]         Program.(           Machine.Tool.init kallisto_tool           && sh kallisto_quant         )     in     workflow_node ~name ~make       (single_file abundance_file ~host:(Machine.as_host run_with))       ~edges:[         on_failure_activate           (Workflow_utilities.Remove.directory ~run_with output_dir);         depends_on kallisto_index;         depends_on fastq;         depends_on (Machine.Tool.ensure kallisto_tool);       ] end