module Biopam
: sig
open Biokepi_run_environment
open Common
val default_opam_url : string
val default_biopam_url : string
type tool_type = [
| `Library of string
| `Application
]
type install_target = private {
definition: Machine.Tool.Definition.t;
tool_type : tool_type;
package : string;
witness : string;
test :
(host:KEDSL.Host.t -> string -> KEDSL.Command.t) option;
edges : KEDSL.workflow_edge list;
init_environment : install_path: string -> KEDSL.Program.t;
requires_conda: bool;
repository: [ `Biopam | `Opam | `Custom of string ];
compiler: string option;
pin: string option;
}
val install_target:
?tool_type:tool_type ->
?test:(host: KEDSL.Host.t -> string -> KEDSL.Command.t) ->
?edges: KEDSL.workflow_edge list ->
?init_environment:(install_path:string -> KEDSL.Program.t) ->
?requires_conda:bool ->
witness:string ->
?package:string ->
?repository:[ `Biopam | `Custom of string | `Opam ] ->
?compiler:string ->
?pin: string ->
Machine.Tool.Definition.t ->
install_target
val provide :
run_program: Machine.Make_fun.t ->
host: Common.KEDSL.Host.t ->
install_path:string -> install_target -> Machine.Tool.t
val default :
run_program: Machine.Make_fun.t ->
host: Common.KEDSL.Host.t ->
install_path:string -> unit ->
Machine.Tool.Kit.t
end
= struct
open Biokepi_run_environment
open Common
type tool_type = [
| `Library of string
| `Application
]
type install_target = {
definition: Machine.Tool.Definition.t;
tool_type : tool_type;
package : string;
witness : string;
test : (host:KEDSL.Host.t -> string -> KEDSL.Command.t) option;
edges : KEDSL.workflow_edge list;
init_environment : install_path: string -> KEDSL.Program.t;
requires_conda: bool;
repository: [ `Biopam | `Opam | `Custom of string ];
compiler: string option;
pin: string option;
}
let install_target
?(tool_type = `Application)
?test
?(edges = [])
?(init_environment =
fun ~install_path -> KEDSL.Program.(sh "echo 'Default Init'"))
?(requires_conda = false)
~witness
?package
?(repository = `Biopam)
?compiler
?pin
definition =
let package =
match package with
| Some p -> p
| None -> Machine.Tool.Definition.to_opam_name definition in
{definition; tool_type; package; witness; test; edges;
init_environment; requires_conda; repository; compiler; pin}
let default_test ~host path =
KEDSL.Command.shell ~host (sprintf "test -e %s" path)
let default_opam_url =
"https://github.com/ocaml/opam/releases/download/1.2.2/opam-1.2.2-x86_64-Linux"
let get_conda_env =
Conda.setup_environment
~custom_channels: [ "trung"; "conda-forge" ]
~base_packages: [
("anaconda-client", `Version "1.2.2");
("bcftools", `Version "1.3");
("biopython", `Version "1.66");
("cairo", `Version "1.12.18");
("clyent", `Version "1.2.0");
("cycler", `Version "0.10.0");
("distribute", `Version "0.6.45");
("fontconfig", `Version "2.11.1");
("freetype", `Version "2.5.5");
("glpk", `Version "4.57");
("hdf5", `Version "1.8.15.1");
("htslib", `Version "1.3");
("libgcc", `Version "4.8.5");
("libpng", `Version "1.6.17");
("libxml2", `Version "2.9.2");
("matplotlib", `Version "1.5.1");
("mkl", `Version "11.3.1");
("numexpr", `Version "2.4.6");
("numpy", `Version "1.10.4");
("openssl", `Version "1.0.2g");
("packaging", `Version "16.7");
("pandas", `Version "0.17.1");
("pixman", `Version "0.32.6");
("pycairo", `Version "1.10.0");
("pyinstaller", `Version "3.1");
("pyomo", `Version "4.3");
("pyparsing", `Version "2.0.3");
("pyqt", `Version "4.11.4");
("pysam", `Version "0.9.0");
("pytables", `Version "3.2.2");
("python-dateutil", `Version "2.4.2");
("pytz", `Version "2015.7");
("pyyaml", `Version "3.11");
("qt", `Version "4.8.7");
("requests", `Version "2.9.1");
("samtools", `Version "1.3");
("setuptools", `Version "20.1.1");
("sip", `Version "4.16.9");
("six", `Version "1.10.0");
("sqlite", `Version "3.9.2");
("tk", `Version "8.5.18");
("wheel", `Version "0.29.0");
("yaml", `Version "0.1.6");
("zlib", `Version "1.2.8");
]
~banned_packages: [ "readline"; "ncurses" ]
~python_version:`Python2
module Opam = struct
let dir ~install_path = install_path // "opam_dir"
let bin ~install_path = dir ~install_path // "opam"
let root ~install_path name = dir ~install_path // "opam-root-" ^ name
let target ~host ~install_path =
KEDSL.single_file ~host (bin ~install_path)
let installed ~(run_program : Machine.Make_fun.t) ~host ~install_path =
let url = default_opam_url in
let opam_exec = target ~host ~install_path in
let install_dir = dir ~install_path in
let open KEDSL in
workflow_node opam_exec
~name:"Install opam"
~make:(
run_program
~requirements:[
`Internet_access;
`Self_identification ["opam-installation"];
]
Program.(
exec ["mkdir"; "-p"; install_dir]
&& exec ["cd"; install_dir]
&& Workflow_utilities.Download.wget_program ~output_filename:"opam" url
&& shf "chmod +x %s" opam_exec#path))
~edges:[
on_failure_activate
(Workflow_utilities.Remove.path_on_host ~host install_dir);
]
let kcom ~root_name ~install_path k fmt =
let bin = bin ~install_path in
let root = root ~install_path root_name in
ksprintf k
("PATH=%s:$PATH OCAMLRUNPARAM=b OPAMLOCKRETRIES=20000 OPAMBASEPACKAGES= OPAMYES=true OPAMROOT=%s %s " ^^ fmt)
(Filename.dirname bin)
root
bin
let program_sh ?(never_fail = false) ~root_name ~install_path fmt =
kcom ~root_name ~install_path (fun s ->
KEDSL.Program.sh
(if never_fail
then s ^ " | echo 'Never fails'"
else s))
fmt
let command_shell ~root_name ~host ~install_path fmt =
kcom ~root_name ~install_path (KEDSL.Command.shell ~host) fmt
let tool_type_to_variable = function
| `Library _ -> "lib"
| `Application -> "bin"
let root_of_package p = "root-" ^ p
let which ~install_path {package; witness; tool_type; _} =
let v = tool_type_to_variable tool_type in
let s =
let package_name = String.take_while package ~f:((<>) '.') in
kcom ~root_name:(root_of_package package) ~install_path
(fun x -> x) "config var %s:%s" package_name v in
(sprintf "$(%s)" s) // witness
end
let default_biopam_url = "https://github.com/solvuu/biopam.git"
let install_tool ~(run_program : Machine.Make_fun.t) ~host ~install_path
({package; test; edges; init_environment; repository; _ } as it) =
let open KEDSL in
let conda_env = get_conda_env install_path it.package in
let run_prog name =
run_program
~requirements:[
`Internet_access;
`Self_identification ["opam"; name; package];
]
in
let root_name = Opam.root_of_package package in
let default_compiler, repo_url =
match repository with
| `Biopam -> "0.0.0", default_biopam_url
| `Opam -> "4.02.3", "https://opam.ocaml.org"
| `Custom c -> "4.02.3", c
in
let compiler = Option.value it.compiler ~default:default_compiler in
let pin_command =
match it.pin with
| None -> Program.sh "echo 'Package Not Pinned'"
| Some url ->
Opam.program_sh ~root_name ~install_path "pin add -n %s %s" package url
in
let edges =
let edges =
[ KEDSL.depends_on (Opam.installed ~run_program ~host ~install_path)] in
if it.requires_conda
then
depends_on (Conda.configured ~run_program ~host ~conda_env) :: edges
else edges in
let name = "Installing " ^ package in
let make =
run_prog "install"
Program.(
(if it.requires_conda
then Conda.init_env ~conda_env ()
else sh "echo 'Does not need Conda'")
&& shf "rm -fr %s" (Filename.quote root_name)
&& Opam.program_sh
~install_path ~root_name "init --comp=%s %s"
compiler (Filename.quote repo_url)
&& pin_command
&& Opam.program_sh ~root_name ~install_path "install %s" package
)
in
let shell_which = Opam.which ~install_path it in
let test = (Option.value test ~default:default_test) ~host shell_which in
let cond =
object
method is_done = Some (`Command_returns (test, 0))
method shell_which = shell_which
end
in
workflow_node cond ~name ~make ~edges
let provide ~run_program ~host ~install_path it =
let conda_env = get_conda_env install_path it.package in
let install_workflow =
install_tool ~run_program ~host ~install_path it in
let export_var =
match it.tool_type with
| `Application -> None
| `Library v ->
let path = install_workflow#product#shell_which in
Some KEDSL.Program.(shf "export %s=\"%s${%s:+:}${%s}\"" v path v v)
in
Machine.Tool.create it.definition
~ensure:install_workflow
~init:KEDSL.Program.(
(if it.requires_conda
then Conda.init_env ~conda_env ()
else sh "echo 'Does not need Conda'")
&& it.init_environment ~install_path
&& Opam.kcom ~root_name:(Opam.root_of_package it.package) ~install_path
(shf "eval $(%s)") "config env"
&& Option.value export_var ~default:(sh "echo 'No export var'")
)
let test_version ~host path =
KEDSL.Command.shell ~host (sprintf "%s --version" path)
let picard =
install_target
~tool_type:(`Library "PICARD_JAR")
~witness:"picard.jar"
(Machine.Tool.Definition.create "picard" ~version:"1.128")
let bowtie =
install_target
~witness:"bowtie" ~test:test_version
Machine.Tool.Default.bowtie
let seq2hla =
install_target
~witness:"seq2HLA" ~requires_conda:true
~package:"seq2HLA.2.2"
Machine.Tool.Default.seq2hla
let optitype =
install_target ~witness:"OptiTypePipeline" Machine.Tool.Default.optitype
~requires_conda:true
~init_environment:KEDSL.Program.(
fun ~install_path ->
let name = Machine.Tool.(Default.optitype.Definition.name) in
let version = Machine.Tool.(Default.optitype.Definition.version) in
shf "export OPAMROOT=%s.%s"
(Opam.root_of_package name |> Opam.root ~install_path)
(match version with None -> "NOVERSION" | Some v -> v)
&& shf "export OPTITYPE_DATA=$(%s config var lib)/optitype"
(Opam.bin ~install_path)
)
let igvxml =
install_target
~witness:"igvxml" ~test:test_version
~repository:`Opam
~compiler:"4.03.0"
~pin:"https://github.com/hammerlab/igvxml.git#0.1.0"
Machine.Tool.Default.igvxml
let hlarp =
install_target
~tool_type:`Application
~witness:"hlarp" ~test:test_version
~repository:`Opam
~compiler:"4.03.0"
~pin:"https://github.com/hammerlab/hlarp.git#biokepi-tracker"
Machine.Tool.Default.hlarp
let default :
run_program: Machine.Make_fun.t ->
host: Common.KEDSL.Host.t ->
install_path: string ->
unit ->
_ = fun ~run_program ~host ~install_path () ->
Machine.Tool.Kit.of_list
(List.map ~f:(provide ~run_program ~host ~install_path) [
picard;
bowtie;
seq2hla;
optitype;
igvxml;
hlarp;
])
end
module Build_machine
: sig
open Biokepi_run_environment
open Common
val create :
?max_processors : int ->
?gatk_jar_location:(unit -> Workflow_utilities.Download.tool_file_location) ->
?mutect_jar_location:(unit -> Workflow_utilities.Download.tool_file_location) ->
?netmhc_tool_locations:(unit -> Netmhc.netmhc_file_locations) ->
?pyensembl_cache_dir:string ->
?run_program:Machine.Make_fun.t ->
?toolkit:Machine.Tool.Kit.t ->
?b37:Reference_genome.t ->
string ->
Machine.t
end
= struct
open Biokepi_run_environment
open Common
let default_run_program : host:KEDSL.Host.t -> Machine.Make_fun.t =
fun ~host ?(name="biokepi-ssh-box") ?(requirements = []) program ->
let open KEDSL in
daemonize ~using:`Python_daemon ~host program
let create
?(max_processors = 1)
?gatk_jar_location
?mutect_jar_location
?netmhc_tool_locations
?pyensembl_cache_dir
?run_program ?toolkit ?b37 uri =
let open KEDSL in
let host = Host.parse (uri // "ketrew_playground") in
let meta_playground = Uri.of_string uri |> Uri.path in
let run_program =
match run_program with
| None -> default_run_program ~host
| Some r -> r
in
let toolkit =
Option.value toolkit
~default:(Tool_providers.default_toolkit ()
~run_program
~host ~install_tools_path:(meta_playground // "install-tools")
?gatk_jar_location ?mutect_jar_location
?netmhc_tool_locations)
in
Machine.create (sprintf "ssh-box-%s" uri)
~max_processors
?pyensembl_cache_dir
~get_reference_genome:(fun name ->
match name, b37 with
| name, Some some37 when name = Reference_genome.name some37 -> some37
| name, _ ->
Download_reference_genomes.get_reference_genome name
~toolkit ~host ~run_program
~destination_path:(meta_playground // "reference-genome"))
~host
~toolkit
~run_program
~work_dir:(meta_playground // "work")
end
module Conda
: sig
open Biokepi_run_environment
type conda_version_type = [
| `Latest
| `Version of string
]
type conda_environment_type = private {
name: string;
python_version: [ `Python2 | `Python3 ];
channels: string list;
base_packages: (string * conda_version_type) list;
banned_packages: string list;
install_path: string;
main_subdir: string;
envs_subdir: string;
}
val setup_environment :
?custom_channels: string list ->
?base_packages: (string * conda_version_type) list ->
?banned_packages: string list ->
?main_subdir: string ->
?envs_subdir: string ->
?python_version: [ `Python2 | `Python3 ] ->
string ->
string ->
conda_environment_type
val configured :
conda_env: conda_environment_type ->
run_program: Machine.Make_fun.t ->
host: Common.KEDSL.Host.t ->
< is_done : Common.KEDSL.Condition.t option > Common.KEDSL.workflow_node
val init_env :
conda_env: conda_environment_type ->
unit ->
Common.KEDSL.Program.t
val deactivate_env :
conda_env: conda_environment_type ->
unit ->
Common.KEDSL.Program.t
val environment_path :
conda_env: conda_environment_type ->
string
end
= struct
open Biokepi_run_environment
open Common
let rm_path = Workflow_utilities.Remove.path_on_host
type conda_version_type = [
| `Latest
| `Version of string
]
type conda_environment_type = {
name: string;
python_version: [ `Python2 | `Python3 ];
channels: string list;
base_packages: (string * conda_version_type) list;
banned_packages: string list;
install_path: string;
main_subdir: string;
envs_subdir: string;
}
let setup_environment
?(custom_channels = [])
?(base_packages = [])
?(banned_packages = [])
?(main_subdir = "conda_dir")
?(envs_subdir = "envs")
?(python_version = `Python2)
install_path
name =
let channels = [ "bioconda"; "r" ] @ custom_channels in
{name; python_version; channels; base_packages; banned_packages; install_path; main_subdir; envs_subdir}
let main_dir ~conda_env = conda_env.install_path // conda_env.main_subdir
let envs_dir ~conda_env = conda_env.install_path // conda_env.envs_subdir
let commands ~conda_env com = main_dir ~conda_env // "bin" // com
let bin ~conda_env = commands ~conda_env "conda"
let activate ~conda_env = commands ~conda_env "activate"
let deactivate ~conda_env = commands ~conda_env "deactivate"
let environment_path ~conda_env = envs_dir ~conda_env // conda_env.name
let com ~conda_env fmt =
Printf.sprintf ("%s " ^^ fmt) (bin ~conda_env)
let installed ~(run_program : Machine.Make_fun.t) ~host ~conda_env =
let open KEDSL in
let url =
"https://repo.continuum.io/miniconda/Miniconda3-4.1.11-Linux-x86_64.sh" in
let conda_exec = single_file ~host (bin ~conda_env) in
let install_dir = main_dir ~conda_env in
workflow_node conda_exec
~name:(sprintf "Install conda: %s" conda_env.name)
~make:(
run_program
~requirements:[
`Internet_access; `Self_identification ["conda"; "installation"]
]
Program.(
exec ["mkdir"; "-p"; conda_env.install_path]
&& exec ["rm";"-fr"; install_dir]
&& exec ["cd"; conda_env.install_path]
&& Workflow_utilities.Download.wget_program url
&& shf "bash Miniconda3-4.1.11-Linux-x86_64.sh -b -p %s" install_dir
)
)
let configured ~conda_env ~(run_program : Machine.Make_fun.t) ~host =
let open KEDSL in
let create_env =
com ~conda_env "create -y -q --prefix %s python=%d"
(envs_dir ~conda_env // conda_env.name)
(match conda_env.python_version with `Python2 -> 2 | `Python3 -> 3)
in
let install_package (package, version) =
Program.(
shf "conda install -y %s%s"
package
(match version with `Latest -> "" | `Version v -> "=" ^ v)
)
in
let force_rm_package package =
Program.(shf "conda remove -y --force %s" package)
in
let make =
run_program
~requirements:[
`Internet_access;
`Self_identification ["conda"; "configuration"];
]
Program.(
sh create_env
&& shf "source %s %s" (activate ~conda_env) (envs_dir ~conda_env // conda_env.name)
&& chain (List.map ~f:(shf "conda config --add channels %s") conda_env.channels)
&& chain (List.map ~f:install_package conda_env.base_packages)
&& chain (List.map ~f:force_rm_package conda_env.banned_packages)
)
in
let edges = [ depends_on (installed ~run_program ~host ~conda_env) ] in
let product =
(single_file ~host (envs_dir ~conda_env // conda_env.name // "bin/conda")
:> < is_done : Common.KEDSL.Condition.t option >) in
let name =
sprintf "Configure conda: %s" conda_env.name in
workflow_node product ~make ~name ~edges
let init_env ~conda_env () =
let prefix = (envs_dir ~conda_env // conda_env.name) in
KEDSL.Program.(
shf "[ ${CONDA_PREFIX-none} != \"%s\" ] && source %s %s || echo 'Already in conda env: %s'"
prefix (activate ~conda_env) prefix prefix
)
let deactivate_env ~conda_env () =
let prefix = (envs_dir ~conda_env // conda_env.name) in
KEDSL.Program.(
shf "[ ${CONDA_PREFIX-none} == \"%s\" ] && source %s || echo 'Doing nothing. The conda env is not active: %s'"
prefix (deactivate ~conda_env) prefix
)
end
module Download_reference_genomes
: sig
open Biokepi_run_environment
type pull_function =
toolkit:Machine.Tool.Kit.t ->
host:Common.KEDSL.Host.t ->
run_program:Machine.Make_fun.t ->
destination_path:string -> Reference_genome.t
val pull_b37 : pull_function
val pull_b37decoy : pull_function
val pull_b38 : pull_function
val pull_hg18 : pull_function
val pull_hg19 : pull_function
val pull_mm10 : pull_function
val default_genome_providers : (string * pull_function) list
val get_reference_genome : string -> pull_function
end
= struct
open Biokepi_run_environment
open Common
open Workflow_utilities.Download
module Vcftools = Workflow_utilities.Vcftools
let of_specification
~toolkit ~host ~run_program ~destination_path specification =
let open Reference_genome in
let {
Specification.
name;
ensembl;
species;
metadata;
fasta;
dbsnp;
known_indels;
cosmic;
exome_gtf;
cdna;
whess;
major_contigs;
} = specification in
let dest_file f = destination_path // name // f in
let rec compile_location filename =
function
| `Url url
| `Gunzip `Url url ->
Workflow_utilities.Download.wget_gunzip
~host ~run_program ~destination:(dest_file filename) url
| `Bunzip2 `Url url ->
Workflow_utilities.Download.wget_bunzip2
~host ~run_program ~destination:(dest_file filename) url
| `Vcf_concat l ->
let vcfs =
List.map ~f:(fun (n, loc) -> compile_location n loc) l
in
let vcftools =
Machine.Tool.Kit.get_exn toolkit Machine.Tool.Default.vcftools in
let concated =
let tmp_vcf =
dest_file (Filename.chop_extension filename ^ "-cat.vcf") in
Vcftools.vcf_concat_no_machine
~make_product:(fun p -> KEDSL.single_file p ~host)
~host ~vcftools ~run_program ~final_vcf:tmp_vcf vcfs in
let sorted =
let final_vcf_path = dest_file filename in
Vcftools.vcf_sort_no_machine
~make_product:(fun p -> KEDSL.single_file p ~host)
~host ~vcftools ~run_program
~src:concated ~dest:final_vcf_path () in
sorted
| other ->
failwithf "Reference_genome.compile_location this kind of location is not yet implemented"
in
let compile_location_opt filename =
Option.map ~f:(compile_location filename) in
create specification
(compile_location (name ^ ".fasta") fasta)
?cosmic:(compile_location_opt "cosmic.vcf" cosmic)
?dbsnp:(compile_location_opt "dbsnp.vcf" dbsnp)
?known_indels:(compile_location_opt "known_indels.vcf" dbsnp)
?gtf:(compile_location_opt "transcripts.gtf" exome_gtf)
?cdna:(compile_location_opt "cdns-all.fa" cdna)
?whess:(compile_location_opt "whess.sqlite" whess)
type pull_function =
toolkit:Machine.Tool.Kit.t ->
host:Common.KEDSL.Host.t ->
run_program:Machine.Make_fun.t ->
destination_path:string -> Reference_genome.t
let pull_b37 ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =
of_specification ~toolkit ~host ~run_program ~destination_path
Reference_genome.Specification.Default.b37
let pull_b37decoy ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =
of_specification ~toolkit ~host ~run_program ~destination_path
Reference_genome.Specification.Default.b37decoy
let pull_b38 ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =
of_specification ~toolkit ~host ~run_program ~destination_path
Reference_genome.Specification.Default.b38
let pull_hg38 ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =
of_specification ~toolkit ~host ~run_program ~destination_path
Reference_genome.Specification.Default.hg38
let pull_hg19 ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =
of_specification ~toolkit ~host ~run_program ~destination_path
Reference_genome.Specification.Default.hg19
let pull_hg18 ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =
of_specification ~toolkit ~host ~run_program ~destination_path
Reference_genome.Specification.Default.hg18
let pull_mm10 ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =
of_specification ~toolkit ~host ~run_program ~destination_path
Reference_genome.Specification.Default.mm10
let default_genome_providers = [
Reference_genome.Specification.Default.Name.b37, pull_b37;
Reference_genome.Specification.Default.Name.b37decoy, pull_b37decoy;
Reference_genome.Specification.Default.Name.b38, pull_b38;
Reference_genome.Specification.Default.Name.hg38, pull_hg38;
Reference_genome.Specification.Default.Name.hg18, pull_hg18;
Reference_genome.Specification.Default.Name.hg19, pull_hg19;
Reference_genome.Specification.Default.Name.mm10, pull_mm10;
]
let get_reference_genome name =
match List.find default_genome_providers ~f:(fun (a, _) -> a = name) with
| Some (_, pull) -> pull
| None -> failwithf "Cannot find the reference genorme called %S" name
end
module Netmhc
= struct
open Biokepi_run_environment
open Common
let rm_path = Workflow_utilities.Remove.path_on_host
type netmhc_file_locations = {
netmhc: Workflow_utilities.Download.tool_file_location;
netmhcpan: Workflow_utilities.Download.tool_file_location;
pickpocket: Workflow_utilities.Download.tool_file_location;
netmhccons: Workflow_utilities.Download.tool_file_location;
}
let escape_char ~needle haystack =
let escfun c = if c = needle then ['\\'; c] else [c] in
String.rev haystack
|> String.fold ~init:[] ~f:(fun x c -> (escfun c) @ x)
|> List.map ~f:String.of_character
|> String.concat
let replace_value file oldvalue newvalue =
let escape_slash = escape_char ~needle:'/' in
let file_org = file in
let file_bak = file_org ^ ".bak" in
KEDSL.Program.(
shf "mv %s %s" file_org file_bak &&
shf "sed -e 's/%s/%s/g' %s > %s"
(escape_slash oldvalue) (escape_slash newvalue) file_bak file_org &&
shf "rm -f %s" file_bak
)
let replace_env_value file envname newvalue =
let oldvalue = sprintf "setenv\t%s\t.*" envname in
let newvalue = sprintf "setenv\t%s\t%s" envname newvalue in
replace_value file oldvalue newvalue
let extract_location location =
match location with
| `Scp l -> l
| `Wget l -> l
| `Fail _ -> "NoFile-0.0b.Linux.tar.gz"
let guess_major_version tool_file_loc =
let loc = extract_location tool_file_loc in
try
let basename = Filename.basename loc in
let dash_idx = String.find basename ~f:(fun c -> c ='-') in
match dash_idx with
| Some i -> String.get basename (i + 1)
| None -> None
with _ ->
ksprintf
failwith
"Error while guessing NetMHC major version from %s"
loc
let guess_folder_name tool_file_loc =
let loc = extract_location tool_file_loc in
let chop_final_char s =
let ssub = String.sub s 0 ((String.length s) - 1) in
match ssub with
| Some txt -> txt
| None -> s
in
try
loc
|> Filename.basename
|> Filename.chop_extension
|> Filename.chop_extension
|> Filename.chop_extension
|> chop_final_char
with _ ->
ksprintf
failwith
"Error while guessing NetMHC folder name from %s"
loc
let tmp_dir install_path = install_path // "tmp"
let netmhc_conda_env install_path =
Conda.(setup_environment
~python_version:`Python2
install_path
"netmhc_conda")
let netmhc_runner_path install_path = install_path // "biokepi_runner"
let netmhc_runner_script_contents ~binary_name ~binary_path ~conda_env =
Ketrew_pure.Internal_pervasives.fmt {bash|
#!/bin/bash
# Force use the controlled python environment
OLD_PATH=$PATH
export PATH=%s:$PATH
# Run the netMHC* binary
%s "$@"
export PATH=$OLD_PATH
|bash}
Conda.((environment_path ~conda_env) // "bin")
binary_path
let create_netmhc_runner_cmd
~binary_name ~binary_path ~conda_env dest =
let script_contents =
netmhc_runner_script_contents ~binary_name ~binary_path ~conda_env
in
let cmd =
sprintf
"cat << EOF > %s%sEOF"
dest
(escape_char ~needle:'$' script_contents)
in
KEDSL.Program.(sh cmd)
let default_netmhc_install
~(run_program : Machine.Make_fun.t) ~host ~install_path
~tool_file_loc ~binary_name ~example_data_file ~env_setup
?(depends=[])
?(data_folder_name="data")
?(data_folder_dest=".")
() =
let open KEDSL in
let tool_name = binary_name in
let downloaded_file =
Workflow_utilities.Download.get_tool_file
~identifier:tool_name
~run_program ~host ~install_path
tool_file_loc
in
let folder_name = guess_folder_name tool_file_loc in
let cap_name = String.set folder_name 0 'N' in
let folder_in_url = match cap_name with Some s -> s | None -> folder_name in
let data_url =
sprintf
"http://www.cbs.dtu.dk/services/%s/%s.tar.gz"
folder_in_url
data_folder_name
in
let (one_data_file, with_data) =
match example_data_file with
| Some df -> (data_folder_name // df, true)
| None -> ("", false)
in
let downloaded_data_file =
Workflow_utilities.Download.wget_untar
~run_program ~host
~destination_folder:(install_path // folder_name // data_folder_dest)
~tar_contains:one_data_file data_url
in
let tool_path = install_path // folder_name in
let runner_folder = netmhc_runner_path install_path in
let runner_path = runner_folder // binary_name in
let binary_path = tool_path // binary_name in
let fix_script replacement =
match replacement with
| `ENV (e, v) -> replace_env_value binary_name e v
| `GENERIC (o, n) -> replace_value binary_name o n
in
let conda_env = netmhc_conda_env install_path in
let ensure =
workflow_node (single_file ~host binary_path)
~name:("Install NetMHC tool: " ^ tool_name)
~edges:(
[ depends_on downloaded_file;
depends_on Conda.(configured ~run_program ~host ~conda_env);
on_failure_activate (rm_path ~host install_path); ]
@ (if with_data then [ depends_on downloaded_data_file; ] else [])
@ (List.map depends ~f:(fun d -> depends_on d))
)
~make:(run_program
~requirements:[
`Self_identification ["netmhc"; tool_name; "installation"];
]
Program.(
shf "cd %s" install_path &&
shf "tar zxf %s" downloaded_file#product#path &&
shf "cd %s" tool_path &&
chain (List.map ~f:fix_script env_setup) &&
shf "chmod +x %s" binary_path &&
shf "mkdir -p %s" (tmp_dir install_path) &&
shf "mkdir -p %s" runner_folder &&
create_netmhc_runner_cmd
~binary_name ~binary_path ~conda_env runner_path &&
shf "chmod +x %s" runner_path
)
)
in
let init =
Program.(
shf "export PATH=%s:$PATH" runner_folder &&
shf "export TMPDIR=%s" (tmp_dir install_path)
)
in
(Machine.Tool.create
Machine.Tool.Definition.(create binary_name)
~ensure ~init, binary_path, ensure)
let guess_env_setup
~install_path
?(tmp_dirname = "tmp")
?(home_env = "NMHOME")
tool_file_loc =
let folder_name = guess_folder_name tool_file_loc in
[
`ENV (home_env, install_path // folder_name);
`ENV ("TMPDIR", install_path // tmp_dirname);
]
let default ~run_program ~host ~install_path ~(files:netmhc_file_locations) () =
let netmhc_mj = guess_major_version files.netmhc in
let is_old_netmhc =
match netmhc_mj with
| Some v -> (int_of_string (Char.escaped v)) < 4
| None -> true
in
let netmhc_env = guess_env_setup ~install_path files.netmhc in
let older_netmhc =
default_netmhc_install ~run_program ~host ~install_path
~tool_file_loc:files.netmhc ~binary_name:"netMHC"
~example_data_file:(Some "SLA-10401/bl50/synlist")
~env_setup:(
[ `GENERIC ("/usr/local/bin/python2.5", "`which python`") ]
@ netmhc_env
)
~data_folder_name:"net"
~data_folder_dest:"etc"
in
let newer_netmhc =
default_netmhc_install ~run_program ~host ~install_path
~tool_file_loc:files.netmhc ~binary_name:"netMHC"
~example_data_file:(Some "version")
~env_setup:netmhc_env
~data_folder_name:"data"
~data_folder_dest:"."
in
let netmhc_install_func =
if is_old_netmhc then older_netmhc else newer_netmhc
in
let (netmhc, netmhc_path, netmhc_install) = netmhc_install_func () in
let (netmhcpan, netmhcpan_path, netmhcpan_install) =
default_netmhc_install ~run_program ~host ~install_path
~tool_file_loc:files.netmhcpan ~binary_name:"netMHCpan"
~example_data_file:(Some "version")
~env_setup:(guess_env_setup ~install_path files.netmhcpan) ()
in
let (pickpocket, pickpocket_path, pickpocket_install) =
default_netmhc_install ~run_program ~host ~install_path
~tool_file_loc:files.pickpocket ~binary_name:"PickPocket"
~example_data_file:None
~env_setup:(guess_env_setup ~install_path files.pickpocket) ()
in
let cons_env =
[`ENV ("NETMHC_env", netmhc_path);
`ENV ("NETMHCpan_env", netmhcpan_path);
`ENV ("PICKPOCKET_env", pickpocket_path);
] @
(guess_env_setup
~home_env:"NCHOME" ~install_path files.netmhccons
)
in
let (netmhccons, _, _) =
default_netmhc_install ~run_program ~host ~install_path
~tool_file_loc:files.netmhccons ~binary_name:"netMHCcons"
~example_data_file:(Some "BLOSUM50")
~env_setup:cons_env
~depends:[netmhc_install; netmhcpan_install; pickpocket_install]
()
in
Machine.Tool.Kit.of_list [netmhc; netmhcpan; pickpocket; netmhccons]end
module Python_package
= struct
open Biokepi_run_environment
open Common
type install_tool_type = Pip | Conda
type install_source_type =
| Package_PyPI of string
| Package_Source of string * string
| Package_Conda of string
let bin_in_conda_environment ~conda_env command =
Conda.(environment_path ~conda_env) // "bin" // command
let create_python_tool ~host ~(run_program : Machine.Make_fun.t) ~install_path
?check_bin ?version ?(python_version=`Python3)
(installation:install_tool_type * install_source_type) =
let open KEDSL in
let versionize ?version ~sep name = match version with
| None -> name
| Some v -> name ^ sep ^ v
in
let install_command, name =
match installation with
| (Pip, Package_PyPI pname) ->
["pip"; "install"; versionize ?version ~sep:"==" pname], pname
| (Pip, Package_Source (pname, source)) ->
["pip"; "install"; source], pname
| (Conda, Package_Conda pname) ->
["conda"; "install"; "-y"; versionize ?version ~sep:"=" pname], pname
| (Conda, Package_PyPI pname) ->
["conda"; "skeleton"; "pypi"; pname], pname
| _ -> failwith "Installation type not supported."
in
let main_subdir = name ^ "_conda_dir" in
let conda_env =
Conda.setup_environment ~python_version ~main_subdir install_path
(name ^ Option.value_map ~default:"" version ~f:(sprintf ".%s"))
in
let single_file_check id =
single_file ~host (bin_in_conda_environment ~conda_env id)
in
let exec_check =
match check_bin with
| None -> single_file_check name
| Some s -> single_file_check s
in
let ensure =
workflow_node exec_check
~name:("Installing Python tool: " ^ name)
~edges:[ depends_on Conda.(configured ~run_program ~host ~conda_env) ]
~make:(run_program
~requirements:[
`Internet_access; `Self_identification ["python"; "installation"]
]
Program.(
Conda.init_env ~conda_env ()
&& exec install_command)
)
in
let init = Conda.init_env ~conda_env () in
Machine.Tool.create Machine.Tool.Definition.(create name) ~ensure ~init
let default ~host ~run_program ~install_path () =
Machine.Tool.Kit.of_list [
create_python_tool ~host ~run_program ~install_path
~version:"1.1.0" (Pip, Package_PyPI "pyensembl");
create_python_tool ~host ~run_program ~install_path
~version:"0.1.2" (Pip, Package_PyPI "vcf-annotate-polyphen");
create_python_tool ~host ~run_program ~install_path
~version:"0.1.3" ~check_bin:"isovar-protein-sequences.py"
(Pip, Package_PyPI "isovar");
create_python_tool ~host ~run_program ~install_path
~version:"0.1.2" (Pip, Package_PyPI "topiary");
create_python_tool ~host ~run_program ~install_path
~version:"0.4.2" (Pip, Package_PyPI "vaxrank");
]
end
module Tool_providers
= struct
open Biokepi_run_environment
open Common
let rm_path = Workflow_utilities.Remove.path_on_host
let generic_installation
~(run_program : Machine.Make_fun.t)
~host ~install_path
~install_program ~witness ~url
?unarchived_directory
?(archive_is_directory = true)
tool_name =
let archive = Filename.basename url in
let archive_kind =
if Filename.check_suffix url "bz2" then `Tar "j"
else if Filename.check_suffix url "gz" then `Tar "z"
else if Filename.check_suffix url "tar" then `Tar ""
else if Filename.check_suffix url "zip" then `Zip
else if Filename.check_suffix url "deb" then `Deb
else `None
in
let open KEDSL in
let unarchival =
let open Program in
let and_cd =
if archive_is_directory then
[shf "cd %s" (Option.value unarchived_directory
~default:(tool_name ^ "*"))]
else [] in
match archive_kind with
| `Tar tar_option ->
chain ([shf "tar xvf%s %s" tar_option archive;
shf "rm -f %s" archive; ] @ and_cd)
| `Zip ->
chain ([shf "unzip %s" archive; shf "rm -f %s" archive;] @ and_cd)
| `Deb ->
chain [
exec ["ar"; "x"; archive];
exec ["tar"; "xvfz"; "data.tar.gz"];
exec ["rm"; "-f"; "data.tar.gz"];
]
| `None -> sh "echo Not-an-archive"
in
workflow_node
~name:(sprintf "Install %s" tool_name)
witness
~edges:[
on_failure_activate (rm_path ~host install_path);
]
~make:(
run_program
~requirements:[
`Internet_access;
`Self_identification ["generic-instalation"; tool_name];
]
Program.(
shf "mkdir -p %s" install_path
&& shf "cd %s" install_path
&& Workflow_utilities.Download.wget_program url
&& unarchival
&& install_program
&& sh "echo Done"
))
let git_installation
~(run_program : Machine.Make_fun.t)
~host ~install_path
~install_program ~witness
~repository ~recursive tool
=
let open KEDSL in
let recursive = if recursive then "--recursive" else "" in
let version =
(Option.value_exn
tool.Machine.Tool.Definition.version
~msg:"Git_installable tool must have a verison") in
let name = tool.Machine.Tool.Definition.name in
workflow_node
~name:(sprintf "Install %s %s" name version)
witness
~edges:[
on_failure_activate (rm_path ~host install_path);
]
~make:(
run_program
~requirements:[
`Internet_access;
`Self_identification ["git-instalation"; name];
]
Program.(
shf "mkdir -p %s" install_path
&& shf "cd %s" install_path
&& shf "git clone %s %s" recursive repository
&& shf "cd %s" name
&& shf "git checkout %s" version
&& install_program
&& sh "echo Done"
))
module Tool_def = Machine.Tool.Definition
module Installable_tool = struct
let noop = KEDSL.Program.sh "echo Nothing-done-here"
type t = {
tool_definition : Tool_def.t;
url : string;
install_program : path: string -> KEDSL.Program.t;
init_program : path: string -> KEDSL.Program.t;
witness: host: KEDSL.Host.t -> path: string -> KEDSL.unknown_product;
unarchived_directory : string option;
archive_is_directory : bool;
}
let make ~url
?(install_program = fun ~path -> noop)
?(init_program = fun ~path -> noop)
~witness ?(archive_is_directory = true)
?unarchived_directory
tool_definition =
{tool_definition; url; install_program;
init_program; witness; unarchived_directory; archive_is_directory}
let render ~run_program ~host ~install_tools_path tool =
let path =
install_tools_path // Tool_def.to_directory_name tool.tool_definition in
let ensure =
generic_installation
?unarchived_directory:tool.unarchived_directory
~archive_is_directory:tool.archive_is_directory
~run_program ~host
~install_path:path
~install_program:(tool.install_program ~path)
~witness:(tool.witness ~host ~path)
~url:tool.url
(tool.tool_definition.Tool_def.name)
in
Machine.Tool.create tool.tool_definition ~ensure
~init:(tool.init_program path)
end
module Git_installable_tool = struct
let noop = KEDSL.Program.sh "echo Nothing-done-here"
type t = {
tool_definition : Tool_def.t;
repository : string;
recursive : bool;
install_program : path: string -> KEDSL.Program.t;
init_program : path: string -> KEDSL.Program.t;
witness: host: KEDSL.Host.t -> path: string -> KEDSL.unknown_product;
}
let make ~repository
?(install_program = fun ~path -> noop)
?(init_program = fun ~path -> noop)
?(recursive = false)
~witness
tool_definition =
{tool_definition; repository; recursive; install_program; init_program; witness;}
let render ~run_program ~host ~install_tools_path tool =
let path =
install_tools_path // Tool_def.to_directory_name tool.tool_definition in
let ensure =
git_installation
~run_program ~host
~install_path:path
~install_program:(tool.install_program ~path)
~witness:(tool.witness ~host ~path)
~repository:tool.repository
~recursive:tool.recursive
tool.tool_definition
in
Machine.Tool.create tool.tool_definition ~ensure
~init:(tool.init_program path)
end
let add_to_dollar_path ~path = KEDSL.Program.shf "export PATH=%s:$PATH" path
let make_and_copy_bin bin =
fun ~path -> KEDSL.Program.(
sh "make" && shf "cp %s %s" bin path)
let witness_file bin =
fun ~host ~path ->
let p = KEDSL.single_file ~host (path // bin) in
object method is_done = p#is_done end
let witness_list l =
fun ~host ~path ->
KEDSL.list_of_files ~host (List.map l ~f:(fun bin -> path // bin))
|> fun p -> object method is_done = p#is_done end
let bwa =
Installable_tool.make
Machine.Tool.Default.bwa
~url:"http://downloads.sourceforge.net/project/bio-bwa/bwa-0.7.10.tar.bz2"
~install_program:(make_and_copy_bin "bwa")
~init_program:add_to_dollar_path
~witness:(witness_file "bwa")
let freebayes =
Git_installable_tool.make
Machine.Tool.Default.freebayes
~repository:"https://github.com/ekg/freebayes.git"
~recursive:true
~install_program:(fun ~path -> KEDSL.Program.(
sh "make"
&& shf "cp -r bin %s" path
))
~init_program:(fun ~path ->
KEDSL.Program.(shf "export PATH=%s/bin/:$PATH" path))
~witness:(witness_list ["bin/freebayes"; "bin/bamleftalign"])
let sambamba =
Installable_tool.make
Machine.Tool.Default.sambamba
~archive_is_directory:false
~url:"https://github.com/lomereiter/sambamba/releases/download/v0.6.5/sambamba_v0.6.5_linux.tar.bz2"
~init_program:add_to_dollar_path
~witness:(witness_file "sambamba_v0.6.5")
let stringtie =
Installable_tool.make
Machine.Tool.Default.stringtie
~url:"https://github.com/gpertea/stringtie/archive/v1.2.2.tar.gz"
~install_program:(make_and_copy_bin "stringtie")
~init_program:add_to_dollar_path
~witness:(witness_file "stringtie")
let vcftools =
Installable_tool.make Machine.Tool.Default.vcftools
~url:"http://downloads.sourceforge.net/project/vcftools/vcftools_0.1.12b.tar.gz"
~install_program:(fun ~path -> KEDSL.Program.(
sh "make"
&& shf "cp -r bin %s" path
&& shf "cp -r lib/perl5/site_perl %s" path
))
~witness:(witness_file @@ "bin" // "vcftools")
~init_program:(fun ~path ->
KEDSL.Program.(shf "export PATH=%s/bin/:$PATH" path
&& shf "export PERL5LIB=$PERL5LIB:%s/site_perl/" path))
let bedtools =
Installable_tool.make Machine.Tool.Default.bedtools
~url:"https://github.com/arq5x/bedtools2/archive/v2.23.0.tar.gz"
~install_program:(fun ~path -> KEDSL.Program.(
sh "make" && shf "cp -r bin %s" path))
~init_program:(fun ~path ->
KEDSL.Program.(shf "export PATH=%s/bin/:$PATH" path))
~witness:(witness_file @@ "bin" // "bedtools")
let mosaik =
let url =
"https://mosaik-aligner.googlecode.com/files/MOSAIK-2.2.3-source.tar" in
Installable_tool.make Machine.Tool.Default.mosaik ~url
~unarchived_directory:"MOSAIK*"
~init_program:(fun ~path ->
KEDSL.Program.(
shf "export PATH=%s:$PATH" path
&& shf "export MOSAIK_PE_ANN=%s/pe.ann" path
&& shf "export MOSAIK_SE_ANN=%s/se.ann" path
))
~witness:(witness_file "MosaikAligner")
~install_program:KEDSL.Program.(fun ~path ->
sh "make"
&& shf "cp networkFile/*pe.ann %s/pe.ann" path
&& shf "cp networkFile/*se.ann %s/se.ann" path
&& shf "cp bin/* %s" path
)
let star =
let url = "https://github.com/alexdobin/STAR/archive/STAR_2.4.1d.tar.gz" in
let star_binary = "STAR" in
let star_binary_path = sprintf "bin/Linux_x86_64/%s" star_binary in
Installable_tool.make ~url Machine.Tool.Default.star
~init_program:add_to_dollar_path
~unarchived_directory:"STAR-*"
~install_program:KEDSL.Program.(fun ~path ->
shf "cp %s %s" star_binary_path path)
~witness:(witness_file star_binary)
let hisat tool =
let open KEDSL in
let url, hisat_binary =
let open Machine.Tool.Default in
match tool with
| one when one = hisat ->
"http://ccb.jhu.edu/software/hisat/downloads/hisat-0.1.6-beta-Linux_x86_64.zip",
"hisat"
| two when two = hisat2 ->
"ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/downloads/hisat2-2.0.2-beta-Linux_x86_64.zip",
"hisat2"
| other ->
failwithf "Can't install Hisat version: %s" (Tool_def.to_string other)
in
Installable_tool.make tool
~url
~witness:(witness_file hisat_binary)
~install_program:KEDSL.Program.(fun ~path ->
shf "mv hisat* %s" path
)
~init_program:add_to_dollar_path
let kallisto =
let url = "https://github.com/pachterlab/kallisto/releases/download/v0.42.3/kallisto_linux-v0.42.3.tar.gz" in
Installable_tool.make Machine.Tool.Default.kallisto ~url
~witness:(witness_file "kallisto")
~install_program:KEDSL.Program.(fun ~path ->
shf "cp -r * %s" path
)
~init_program:add_to_dollar_path
let samtools =
let url = "https://github.com/samtools/samtools/releases/download/1.3/samtools-1.3.tar.bz2" in
let toplevel_tools = ["samtools"] in
let htslib = ["bgzip"; "tabix" ] in
let tools = toplevel_tools @ htslib in
let install_program ~path =
let open KEDSL.Program in
sh "make"
&& shf "cp %s %s" (String.concat toplevel_tools ~sep:" ") path
&& sh "cd htslib*/"
&& sh "make"
&& shf "cp %s %s" (String.concat htslib ~sep:" ") path
&& sh "echo Done"
in
let witness = witness_list tools in
Installable_tool.make Machine.Tool.Default.samtools ~url ~install_program
~init_program:add_to_dollar_path ~witness
let cufflinks =
let url =
"http://cole-trapnell-lab.github.io/cufflinks/assets/downloads/cufflinks-2.2.1.Linux_x86_64.tar.gz" in
let witness = witness_file "cufflinks" in
let install_program ~path = KEDSL.Program.(shf "cp * %s" path) in
Installable_tool.make Machine.Tool.Default.cufflinks ~install_program ~url
~init_program:add_to_dollar_path ~witness
let somaticsniper =
let url =
let deb_file = "somatic-sniper1.0.3_1.0.3_amd64.deb" in
sprintf
"http://apt.genome.wustl.edu/ubuntu/pool/main/s/somatic-sniper1.0.3/%s"
deb_file
in
let binary = "somaticsniper" in
let binary_in_deb = "usr/bin/bam-somaticsniper1.0.3" in
let install_program ~path =
KEDSL.Program.(shf "mv %s/%s %s/%s" path binary_in_deb path binary) in
Installable_tool.make Machine.Tool.Default.somaticsniper ~install_program ~url
~witness:(witness_file binary) ~init_program:add_to_dollar_path
let varscan =
let url =
"http://downloads.sourceforge.net/project/varscan/VarScan.v2.3.5.jar" in
let jar = "VarScan.v2.3.5.jar" in
let witness = witness_file jar in
let init_program ~path =
KEDSL.Program.(shf "export VARSCAN_JAR=%s/%s" path jar) in
Installable_tool.make Machine.Tool.Default.varscan ~url ~init_program ~witness
let picard =
let url =
"https://github.com/broadinstitute/picard/releases/download/1.127/picard-tools-1.127.zip"
in
let jar = "picard-tools-1.127" // "picard.jar" in
let init_program ~path = KEDSL.Program.(shf "export PICARD_JAR=%s/%s" path jar) in
Installable_tool.make Machine.Tool.Default.picard ~url ~init_program
~witness:(witness_file jar)
let get_broad_jar =
Workflow_utilities.Download.get_tool_file ~identifier:"broad-jar"
let mutect_tool
~(run_program : Machine.Make_fun.t)
~host ~install_tools_path loc =
let tool = Machine.Tool.Default.mutect in
let open KEDSL in
let install_path = install_tools_path // Tool_def.to_directory_name tool in
let get_mutect = get_broad_jar ~run_program ~host ~install_path loc in
Machine.Tool.create tool ~ensure:get_mutect
~init:Program.(shf "export mutect_HOME=%s" install_path)
let gatk_tool
~(run_program : Machine.Make_fun.t)
~host ~install_tools_path loc =
let tool = Machine.Tool.Default.gatk in
let open KEDSL in
let install_path = install_tools_path // Tool_def.to_directory_name tool in
let ensure = get_broad_jar ~run_program ~host ~install_path loc in
Machine.Tool.create tool ~ensure
~init:Program.(shf "export GATK_JAR=%s" ensure#product#path)
let strelka =
let url =
"ftp://strelka:%27%27@ftp.illumina.com/v1-branch/v1.0.14/strelka_workflow-1.0.14.tar.gz" in
let strelka_bin = "usr" // "bin" in
let witness = witness_file @@ strelka_bin // "configureStrelkaWorkflow.pl" in
let install_program ~path =
KEDSL.Program.(
shf "./configure --prefix=%s" (path // "usr")
&& sh "make && make install"
)
in
let init_program ~path =
KEDSL.Program.(shf "export STRELKA_BIN=%s/%s" path strelka_bin) in
Installable_tool.make Machine.Tool.Default.strelka ~url
~init_program ~install_program ~witness
let virmid =
let url =
"http://downloads.sourceforge.net/project/virmid/virmid-1.1.1.tar.gz" in
let jar = "Virmid-1.1.1" // "Virmid.jar" in
let init_program ~path =
KEDSL.Program.(shf "export VIRMID_JAR=%s/%s" path jar) in
Installable_tool.make Machine.Tool.Default.virmid ~url ~init_program
~unarchived_directory:"."
~witness:(witness_file jar)
let muse =
let url =
"http://bioinformatics.mdanderson.org/Software/MuSE/MuSEv1.0b" in
let binary = "MuSEv1.0b" in
let install_program ~path =
KEDSL.Program.( shf "chmod +x %s/%s" path binary) in
let init_program ~path =
KEDSL.Program.(shf "export muse_bin=%s/%s" path binary) in
Installable_tool.make Machine.Tool.Default.muse ~url
~install_program ~init_program
~witness:(witness_file binary)
let fastqc =
let url =
"http://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.5.zip"
in
let binary = "fastqc" in
let binary_path path = path // binary in
let init_program ~path =
KEDSL.Program.(shf "export FASTQC_BIN=%s" (binary_path path))
in
Installable_tool.make Machine.Tool.Default.fastqc ~url
~witness:(witness_file binary)
~install_program:KEDSL.Program.(fun ~path ->
shf "cp -r * %s" path
&& shf "chmod +x %s" (binary_path path)
)
~init_program
~unarchived_directory:"FastQC"
let samblaster =
let binary = "samblaster" in
Installable_tool.make
Machine.Tool.Default.samblaster
~url:"https://github.com/GregoryFaust/samblaster/releases/download/v.0.1.22/samblaster-v.0.1.22.tar.gz"
~install_program:(make_and_copy_bin binary)
~init_program:add_to_dollar_path
~witness:(witness_file binary)
let default_tool_location msg (): Workflow_utilities.Download.tool_file_location =
`Fail (sprintf "No location provided for %s" msg)
let default_netmhc_locations (): Netmhc.netmhc_file_locations = Netmhc.({
netmhc=(default_tool_location "NetMHC" ());
netmhcpan=(default_tool_location "NetMHCpan" ());
pickpocket=(default_tool_location "PickPocket" ());
netmhccons=(default_tool_location "NetMHCcons" ());
})
let default_toolkit
~run_program
~host ~install_tools_path
?(mutect_jar_location = default_tool_location "Mutect")
?(gatk_jar_location = default_tool_location "GATK")
?(netmhc_tool_locations = default_netmhc_locations)
() =
let install installable =
Installable_tool.render ~host installable ~install_tools_path ~run_program
in
let install_git installable =
Git_installable_tool.render ~host installable ~install_tools_path ~run_program
in
Machine.Tool.Kit.concat [
Machine.Tool.Kit.of_list [
mutect_tool ~run_program ~host ~install_tools_path (mutect_jar_location ());
gatk_tool ~run_program ~host ~install_tools_path (gatk_jar_location ());
install bwa;
install samtools;
install bedtools;
install vcftools;
install strelka;
install picard;
install somaticsniper;
install sambamba;
install varscan;
install muse;
install virmid;
install star;
install stringtie;
install cufflinks;
install @@ hisat Machine.Tool.Default.hisat;
install @@ hisat Machine.Tool.Default.hisat2;
install mosaik;
install kallisto;
install fastqc;
install samblaster;
install_git freebayes;
];
Biopam.default ~run_program ~host
~install_path:(install_tools_path // "biopam-kit") ();
Python_package.default ~run_program ~host
~install_path: (install_tools_path // "python-tools") ();
Netmhc.default ~run_program ~host
~install_path: (install_tools_path // "netmhc-tools")
~files:(netmhc_tool_locations ()) ();
]
end