struct
open Common
open KEDSL
module Tool = struct
module Definition = struct
type t = {name: string; version: string option}
let create ?version name = {name; version}
let to_opam_name {name; version} =
sprintf "%s.%s" name (Option.value ~default:"NOVERSION" version)
let to_string = to_opam_name
let to_directory_name = to_opam_name
let get_version t = t.version
let get_name t = t.name
end
module Default = struct
open Definition
let bwa = create "bwa" ~version:"0.7.10"
let freebayes = create "freebayes" ~version:"1.1.0"
let sambamba = create "sambamba" ~version:"0.6.5"
let samtools = create "samtools" ~version:"1.4"
let bcftools = create "bcftools" ~version:"1.4"
let vcftools = create "vcftools" ~version:"0.1.12b"
let bedtools = create "bedtools" ~version:"2.23.0"
let somaticsniper = create "somaticsniper" ~version:"1.0.3"
let varscan = create "varscan" ~version:"2.3.5"
let mutect = create "mutect"
let gatk = create "gatk"
let strelka = create "strelka" ~version:"1.0.14"
let virmid = create "virmid" ~version:"1.1.1"
let muse = create "muse" ~version:"1.0b"
let star = create "star" ~version:"2.4.1d"
let stringtie = create "stringtie" ~version:"1.2.2"
let cufflinks = create "cufflinks" ~version:"2.2.1"
let hisat = create "hisat" ~version:"0.1.6-beta"
let hisat2 = create "hisat" ~version:"2.0.2-beta"
let mosaik = create "mosaik" ~version:"2.2.3"
let kallisto = create "kallisto" ~version:"0.42.3"
let bowtie = create "bowtie" ~version:"1.1.2"
let fastqc = create "fastqc" ~version:"0.11.5"
let igvxml = create "igvxml" ~version:"0.1.0"
let hlarp = create "hlarp" ~version:"biokepi-branch"
let samblaster = create "samblaster" ~version:"v.0.1.22"
let delly2 = create "delly2" ~version:"0.7.7"
let optitype = create "optitype" ~version:"1.2.1-0"
let seqtk = create "seqtk" ~version:"1.2"
let seq2hla = create "seq2hla" ~version:"2.2"
let picard = create "picard" ~version:"2.9.2"
let snpeff = create "snpeff" ~version:"4.3.1m-0"
let pyensembl = create "pyensembl" ~version:"1.1.0"
let vcfannotatepolyphen = create "vcf-annotate-polyphen" ~version:"0.1.2"
let topiary = create "topiary" ~version:"1.2.1"
let vaxrank = create "vaxrank" ~version:"0.6.0"
let isovar = create "isovar" ~version:"0.7.0"
end
type t = {
definition: Definition.t;
init: Program.t;
ensure: phony_workflow;
}
let create ?init ?ensure definition = {
definition;
init =
Option.value init
~default:(Program.shf "echo 'Tool %s: default init'"
(Definition.to_string definition));
ensure =
Option.value_map
ensure
~f:KEDSL.forget_product
~default:(workflow_node nothing
~name:(sprintf "%s-ensured"
(Definition.to_string definition)));
}
let init t = t.init
let ensure t = t.ensure
module Kit = struct
type tool = t
type t = Definition.t -> tool option
let concat : t list -> t =
fun l ->
fun def ->
List.find_map l ~f:(fun kit -> kit def)
let of_list l : t =
fun def ->
List.find l ~f:(fun {definition; _} -> definition = def)
let get_exn t tool =
match t tool with
| Some s -> s
| None ->
failwithf "Toolkit cannot provide the tool %s"
(Definition.to_string tool)
end
end
module Make_fun = struct
module Requirement = struct
type t = [
| `Processors of int
| `Internet_access
| `Memory of [
| `GB of float
| `Small
| `Big
]
| `Quick_run
| `Spark of string list
| `Custom of string
| `Self_identification of string list
] [@@deriving yojson, show]
end
type t =
?name: string ->
?requirements: Requirement.t list ->
Program.t ->
KEDSL.Build_process.t
let stream_processor requirements =
`Processors 1 :: `Memory `Small :: requirements
let quick requirements = `Quick_run :: requirements
let downloading requirements =
`Internet_access :: stream_processor requirements
let with_self_ids ?self_ids l =
match self_ids with
| Some tags -> `Self_identification tags :: l
| None -> l
let with_requirements : t -> Requirement.t list -> t = fun f l ->
fun ?name ?(requirements = []) prog ->
f ?name ~requirements:(l @ requirements) prog
end
type t = {
name: string;
host: Host.t;
pyensembl_cache_dir: string option;
get_reference_genome: string -> Reference_genome.t;
toolkit: Tool.Kit.t;
run_program: Make_fun.t;
work_dir: string;
max_processors: int;
}
let create
~host ?pyensembl_cache_dir ~get_reference_genome ~toolkit
~run_program ~work_dir ~max_processors name =
{name; toolkit; pyensembl_cache_dir; get_reference_genome;
host; run_program; work_dir; max_processors}
let name t = t.name
let as_host ?with_shell t =
match with_shell with
| None -> t.host
| Some shell ->
begin
let open Ketrew_pure in
let shell_key = "shell" in
let org_uri = Host.to_uri t.host in
let uri_no_shell = Uri.remove_query_param org_uri shell_key in
let uri_with_shell =
let shell_str = sprintf "%s,-c" shell in
Uri.add_query_param uri_no_shell (shell_key, [shell_str;])
in
KEDSL.Host.parse (Uri.to_string uri_with_shell)
end
let get_pyensembl_cache_dir t = t.pyensembl_cache_dir
let get_reference_genome t = t.get_reference_genome
let get_tool t tool =
match t.toolkit tool with
| Some s -> s
| None ->
failwithf "Machine %S cannot provide the tool %s"
t.name (Tool.Definition.to_string tool)
let run_program t = t.run_program
let max_processors t = t.max_processors
let quick_run_program t : Make_fun.t =
Make_fun.with_requirements t.run_program (Make_fun.quick [])
let run_stream_processor ?self_ids t : Make_fun.t =
Make_fun.with_requirements t.run_program
(Make_fun.stream_processor [] |> Make_fun.with_self_ids ?self_ids)
let run_download_program t : Make_fun.t =
Make_fun.with_requirements t.run_program (Make_fun.downloading [])
let run_big_program t :
?processors: int -> ?self_ids : string list -> Make_fun.t =
fun ?(processors = 1) ?self_ids ->
Make_fun.with_requirements
t.run_program
(Make_fun.with_self_ids ?self_ids [`Memory `Big; `Processors processors])
let work_dir t = t.work_dir
end