struct open Biokepi_run_environment open Common module Remove = Workflow_utilities.Remove module Configuration = struct   module Intersect = struct     type t = {       params: string list;      
              (** Catch-all list of parameters to be concatted together and passed to the command. *)
      with_headers: bool;      
             (** The header of A will be prepended to the output. -header. *)
      unique_features: bool;    
           (** Write the original A entry one if any overlaps found in B. *)
    }     let default = { params = []; with_headers = true; unique_features = true; }     let render {params; with_headers; unique_features; } =       (if with_headers then " -header " else " ")       ^ (if unique_features then " -u " else " ")       ^ (String.concat ~sep:" " params)   end end let bamtofastq     ~(run_with:Machine.t) ~sample_type ~output_prefix input_bam =   let open KEDSL in   let sorted_bam =     Samtools.sort_bam_if_necessary       ~run_with ~by:`Read_name input_bam in   let sample_name = input_bam#product#sample_name in   let fastq_output_options, r1, r2opt =     match sample_type with     | `Paired_end ->       let r1 = sprintf "%s_R1.fastq" output_prefix in       let r2 = sprintf "%s_R2.fastq" output_prefix in       (["-fq"; r1; "-fq2"; r2], r1, Some r2)     | `Single_end ->       let r1 = sprintf "%s.fastq" output_prefix in       (["-fq"; r1], r1, None)   in   let bedtools = Machine.get_tool run_with Machine.Tool.Default.bedtools in   let src_bam = sorted_bam#product#path in   let program =     Program.(Machine.Tool.(init bedtools)              && exec ["mkdir""-p"Filename.dirname r1]              && exec ("bedtools" ::                       "bamtofastq" ::  "-i" :: src_bam ::                       fastq_output_options)) in   let name =     sprintf "bedtools-bamtofastq-%s"       Filename.(basename src_bam |> chop_extension) in   let make = Machine.run_program ~name run_with program in   let edges = [     depends_on Machine.Tool.(ensure bedtools);     depends_on input_bam;     depends_on sorted_bam;     on_failure_activate (Remove.file ~run_with r1);     on_success_activate (Remove.file ~run_with sorted_bam#product#path);   ] |> fun list ->     begin match r2opt with     | None -> list     | Some r2 ->       on_failure_activate (Remove.file ~run_with r2) :: list     end   in   workflow_node     (fastq_reads ~name:sample_name ~host:(Machine.as_host run_with) r1 r2opt)     ~edges ~name ~make
(** Used to determine if features in multiple sets intersect with one another.

Feature sets include BED, VCF, GFF, and BAM files.

  • primary: The primary set file (workflow_node with #path).
  • intersect_with: List of set file workflow_nodes to intersect with.
  • result: Path to the resulting set.
*)
let intersect     ~(run_with:Machine.t)     ?(configuration=Configuration.Intersect.default)     ~primary ~intersect_with output   =   let open KEDSL in   let bedtools = Machine.get_tool run_with Machine.Tool.Default.bedtools in   let arguments =     (sprintf "-a %s" (Filename.quote primary#product#path)) ^     (List.map ~f:(fun n -> (Filename.quote n#product#path)) intersect_with      |> String.concat ~sep:","      |> sprintf " -b %s ")     ^ (Configuration.Intersect.render configuration)   in   let program =     Program.(Machine.Tool.(init bedtools)              && sh ("bedtools intersect "                     ^ arguments ^ " > " ^ output)) in   let name = sprintf "bedtools-intersect-%s-with-%s"       (Filename.basename primary#product#path)       (String.concat ~sep:"__"          (List.map             ~f:(fun n -> (Filename.basename n#product#path)) intersect_with)) in   let make = Machine.run_program run_with ~name program in   let edges = [     depends_on primary;     depends_on Machine.Tool.(ensure bedtools);     on_failure_activate (Remove.file run_with output)   ] @ (List.map ~f:depends_on intersect_with) in   let out = transform_vcf primary#product ~path:output in   workflow_node out ~name ~edges ~make     ~ensures:(`Is_verified (out#as_single_file#is_bigger_than 1)) end