struct
open Biokepi_run_environment
open Common
module Remove = Workflow_utilities.Remove
module Configuration = struct
module Intersect = struct
type t = {
params: string list;
with_headers: bool;
unique_features: bool;
}
let default = { params = []; with_headers = true; unique_features = true; }
let render {params; with_headers; unique_features; } =
(if with_headers then " -header " else " ")
^ (if unique_features then " -u " else " ")
^ (String.concat ~sep:" " params)
end
end
let bamtofastq
~(run_with:Machine.t) ~sample_type ~output_prefix input_bam =
let open KEDSL in
let sorted_bam =
Samtools.sort_bam_if_necessary
~run_with ~by:`Read_name input_bam in
let sample_name = input_bam#product#sample_name in
let fastq_output_options, r1, r2opt =
match sample_type with
| `Paired_end ->
let r1 = sprintf "%s_R1.fastq" output_prefix in
let r2 = sprintf "%s_R2.fastq" output_prefix in
(["-fq"; r1; "-fq2"; r2], r1, Some r2)
| `Single_end ->
let r1 = sprintf "%s.fastq" output_prefix in
(["-fq"; r1], r1, None)
in
let bedtools = Machine.get_tool run_with Machine.Tool.Default.bedtools in
let src_bam = sorted_bam#product#path in
let program =
Program.(Machine.Tool.(init bedtools)
&& exec ["mkdir"; "-p"; Filename.dirname r1]
&& exec ("bedtools" ::
"bamtofastq" :: "-i" :: src_bam ::
fastq_output_options)) in
let name =
sprintf "bedtools-bamtofastq-%s"
Filename.(basename src_bam |> chop_extension) in
let make = Machine.run_program ~name run_with program in
let edges = [
depends_on Machine.Tool.(ensure bedtools);
depends_on input_bam;
depends_on sorted_bam;
on_failure_activate (Remove.file ~run_with r1);
on_success_activate (Remove.file ~run_with sorted_bam#product#path);
] |> fun list ->
begin match r2opt with
| None -> list
| Some r2 ->
on_failure_activate (Remove.file ~run_with r2) :: list
end
in
workflow_node
(fastq_reads ~name:sample_name ~host:(Machine.as_host run_with) r1 r2opt)
~edges ~name ~make
let intersect
~(run_with:Machine.t)
?(configuration=Configuration.Intersect.default)
~primary ~intersect_with output
=
let open KEDSL in
let bedtools = Machine.get_tool run_with Machine.Tool.Default.bedtools in
let arguments =
(sprintf "-a %s" (Filename.quote primary#product#path)) ^
(List.map ~f:(fun n -> (Filename.quote n#product#path)) intersect_with
|> String.concat ~sep:","
|> sprintf " -b %s ")
^ (Configuration.Intersect.render configuration)
in
let program =
Program.(Machine.Tool.(init bedtools)
&& sh ("bedtools intersect "
^ arguments ^ " > " ^ output)) in
let name = sprintf "bedtools-intersect-%s-with-%s"
(Filename.basename primary#product#path)
(String.concat ~sep:"__"
(List.map
~f:(fun n -> (Filename.basename n#product#path)) intersect_with)) in
let make = Machine.run_program run_with ~name program in
let edges = [
depends_on primary;
depends_on Machine.Tool.(ensure bedtools);
on_failure_activate (Remove.file run_with output)
] @ (List.map ~f:depends_on intersect_with) in
let out = transform_vcf primary#product ~path:output in
workflow_node out ~name ~edges ~make
~ensures:(`Is_verified (out#as_single_file#is_bigger_than 1))
end