(* code generated with [/extranfs/build-docs-opam/work/clones/biokepimaster/tools/build-doc.sh ketrew,ppx_deriving.std] *) module Biopam  : sig open Biokepi_run_environment open Common
(** The default location from where we download opam. *)
val default_opam_url : string
(** The default location from where we download biopam. *)
val default_biopam_url : string type tool_type = [   | `Library of string   
        (** The export variable that points to witness. *)
  | `Application ]
(** The type of tool that we are installing via opam.

This guides installation and determines: 1. where we look for the witness in opam_install_path/package or opam_install_path/bin. 2. Whether we export $PATH (Application) or $LIBVAR (Library).*)

(** A description of what we'd like Biopam to install.*)
type install_target = private {   
  (** The package handle the Biopam package provides. *)
  definition: Machine.Tool.Definition.t;   
  (** What are we installing? See tool_type. *)
  tool_type : tool_type;   
  (** Name of the package: `opam install package` *)
  package : string;   
  (** File that is passed to test determine success and what is exported. *)
  witness : string;   
  (** Test to determine success of the install. Defaults to `test -e witness`. *)
  test :     (host:KEDSL.Host.t -> string -> KEDSL.Command.t) option;   (* Install dependencies. *)   edges : KEDSL.workflow_edge list;   (* Transform the install and init programs, e.g. it needs to be run in a      specific environment. Defaults to a “no-op”. *)   init_environment : install_path: string -> KEDSL.Program.t;   
  (** Whether this package requires Conda packages. *)
  requires_conda: bool;   
  (** Which opam-repository the tool should come from. *)
  repository: [ `Biopam | `Opam | `Custom of string ];   
  (** Which compiler should be used to create the tool's own installation opam-switch. *)
  compiler: string option;   
  (** Use "opam pin". *)
  pin: string option; } val install_target:   ?tool_type:tool_type ->   ?test:(host: KEDSL.Host.t -> string -> KEDSL.Command.t) ->   ?edges: KEDSL.workflow_edge list ->   ?init_environment:(install_path:string -> KEDSL.Program.t) ->   ?requires_conda:bool ->   witness:string ->   ?package:string ->   ?repository:[ `Biopam | `Custom of string | `Opam ] ->   ?compiler:string ->   ?pin: string ->   Machine.Tool.Definition.t ->   install_target
(** Create install_target values.

  • tool_type: the kind of tool being installed. See tool_type. Default: `Application.
  • test: test to determine success of the install. Default: "test -e <witness>".
  • edges: dependencies to install first.
  • init_environment: transform the install and init programs, e.g. it needs to be run in a specific environment. Defaults to a “no-op”.
  • witness: name of the file (base-name) that is tested to determine the success of an installation (usually the binary for `Applications, or the JAR for Java libraries, etc.).
  • requires_conda: whether this package requires Python packages installed with Conda.
  • package: the package name in the opam sense i.e. "opam install <package-name>" (the default is to construct the package name from the Machine.Tool.Definition.t).
  • repository: Which opam-repository the tool should come from:
    • `Biopam: the Biopam project's repository (default).
    • `Opam: the default Opam repository.
    • `Custom url: use custom URL.
  • compiler: Which compiler should be used to create the tool's own installation opam-switch (the default is None corresponding to "0.0.0" for `Biopam and "4.02.3" for `Opam or `Custom _)
  • pin: use "opam pin" on the given URL.
  • anonymous argument: the tool that the installation-target provides.
*)
val provide :   run_program: Machine.Make_fun.t ->   host: Common.KEDSL.Host.t ->   install_path:string -> install_target -> Machine.Tool.t
(** Provide the specified (via install_target) tool.*)
val default :    run_program: Machine.Make_fun.t ->   host: Common.KEDSL.Host.t ->   install_path:string -> unit ->   Machine.Tool.Kit.t
(** A set of default tools that have been specified in this module.*)
end  = struct
(** Provide tools via Biopam: https://github.com/solvuu/biopam *)
open Biokepi_run_environment open Common (* What are we installing via opam. This determines where we look for the    witness; in [opam_install_path]/package or [opam_install_path]/bin. *) type tool_type = [   | `Library of string   | `Application ] type install_target = {   definition: Machine.Tool.Definition.t;   tool_type : tool_type;   package : string; 
     (** What do we call 'install opam ' with *)
  witness : string; 
     (** File that must exist after install, ex:
  • bowtie exec
  • picard.jar
*)
  test : (host:KEDSL.Host.t -> string -> KEDSL.Command.t) option;   edges : KEDSL.workflow_edge list;   init_environment : install_path: string -> KEDSL.Program.t;   requires_conda: bool;   repository: [ `Biopam | `Opam | `Custom of string ];   compiler: string option;   pin: string option; } let install_target     ?(tool_type = `Application)     ?test     ?(edges = [])     ?(init_environment =       fun ~install_path -> KEDSL.Program.(sh "echo 'Default Init'"))     ?(requires_conda = false)     ~witness     ?package     ?(repository = `Biopam)     ?compiler     ?pin     definition =   let package =     match package with     | Some p -> p     | None -> Machine.Tool.Definition.to_opam_name definition in   {definition; tool_type; package; witness; test; edges;    init_environment; requires_conda; repository; compiler; pin} let default_test ~host path =   KEDSL.Command.shell ~host (sprintf "test -e %s" path) let default_opam_url =   "https://github.com/ocaml/opam/releases/download/1.2.2/opam-1.2.2-x86_64-Linux" let get_conda_env =   Conda.setup_environment     ~custom_channels: [ "trung""conda-forge" ]     ~base_packages: [       ("anaconda-client"`Version "1.2.2");       ("bcftools"`Version "1.3");       ("biopython"`Version "1.66");       ("cairo"`Version "1.12.18");       ("clyent"`Version "1.2.0");       ("cycler"`Version "0.10.0");       ("distribute"`Version "0.6.45");       ("fontconfig"`Version "2.11.1");       ("freetype"`Version "2.5.5");       ("glpk"`Version "4.57");       ("hdf5"`Version "1.8.15.1");       ("htslib"`Version "1.3");       ("libgcc"`Version "4.8.5");       ("libpng"`Version "1.6.17");       ("libxml2"`Version "2.9.2");       ("matplotlib"`Version "1.5.1");       ("mkl"`Version "11.3.1");       ("numexpr"`Version "2.4.6");       ("numpy"`Version "1.10.4");       ("openssl"`Version "1.0.2g");       ("packaging"`Version "16.7");       ("pandas"`Version "0.17.1");       ("pixman"`Version "0.32.6");       ("pycairo"`Version "1.10.0");       ("pyinstaller"`Version "3.1");       ("pyomo"`Version "4.3");       ("pyparsing"`Version "2.0.3");       ("pyqt"`Version "4.11.4");       ("pysam"`Version "0.9.0");       ("pytables"`Version "3.2.2");       ("python-dateutil"`Version "2.4.2");       ("pytz"`Version "2015.7");       ("pyyaml"`Version "3.11");       ("qt"`Version "4.8.7");       ("requests"`Version "2.9.1");       ("samtools"`Version "1.3");       ("setuptools"`Version "20.1.1");       ("sip"`Version "4.16.9");       ("six"`Version "1.10.0");       ("sqlite"`Version "3.9.2");       ("tk"`Version "8.5.18");       ("wheel"`Version "0.29.0");       ("yaml"`Version "0.1.6");       ("zlib"`Version "1.2.8");     ]     (* see https://github.com/ContinuumIO/anaconda-issues/issues/152#issuecomment-225214743 *)     ~banned_packages: [ "readline""ncurses" ]      ~python_version:`Python2 (* Hide the messy logic of calling opam in here. This should not be exported    and use the Biopam functions directly.*) module Opam = struct   let dir ~install_path = install_path // "opam_dir"   let bin ~install_path = dir ~install_path // "opam"   let root ~install_path name = dir ~install_path // "opam-root-" ^ name   (* TODO:      Instead of just making sure that this file exists? Wouldn't it be better      to make sure that a command from this program gives the right output?      ie. $ opam --version = 1.2.2 *)   let target ~host ~install_path =     KEDSL.single_file ~host (bin ~install_path)   (* A workflow to ensure that opam is installed. *)   let installed ~(run_program : Machine.Make_fun.t) ~host ~install_path =     let url = default_opam_url in     let opam_exec   = target ~host ~install_path in     let install_dir = dir ~install_path in     let open KEDSL in     workflow_node opam_exec       ~name:"Install opam"       ~make:(         run_program           ~requirements:[             `Internet_access;             `Self_identification ["opam-installation"];           ]           Program.(             exec ["mkdir""-p"; install_dir]             && exec ["cd"; install_dir]             && Workflow_utilities.Download.wget_program ~output_filename:"opam" url             && shf "chmod +x %s" opam_exec#path))       ~edges:[         on_failure_activate           (Workflow_utilities.Remove.path_on_host ~host install_dir);       ]   let kcom ~root_name ~install_path k fmt =     let bin = bin ~install_path in     let root = root ~install_path root_name in     (*         - PATH: we add `opam` so that installation scripts can use the tool        - OCAMLRUNPARAM: we want OCaml backtraces        - OPAMLOCKRETRIES: installations should concurrently but in case of we          bump the lock to wait instead of fail        - OPAMBASEPACKAGES: we make sure opam does not install any package by          default        - OPAMYES: answer `y` to all questions (i.e. batch mode)        - OPAMROOT: our per-package replacement for `~/.opam/`     *)     ksprintf k       ("PATH=%s:$PATH OCAMLRUNPARAM=b OPAMLOCKRETRIES=20000 OPAMBASEPACKAGES= OPAMYES=true OPAMROOT=%s %s " ^^ fmt)       (Filename.dirname bin)       root       bin   let program_sh ?(never_fail = false) ~root_name ~install_path fmt =     kcom ~root_name ~install_path (fun s ->         KEDSL.Program.sh           (if never_fail            then s ^ " | echo 'Never fails'"            else s))       fmt   let command_shell ~root_name ~host ~install_path fmt =     kcom ~root_name ~install_path (KEDSL.Command.shell ~host) fmt   let tool_type_to_variable = function     | `Library _   -> "lib"     | `Application -> "bin"   let root_of_package p = "root-" ^ p   (* Answer Opam 'which' questions *)   let which ~install_path {package; witness; tool_type; _} =     let v = tool_type_to_variable tool_type in     let s =       let package_name = String.take_while package ~f:((<>) '.'in       kcom ~root_name:(root_of_package package) ~install_path         (fun x -> x) "config var %s:%s" package_name v in     (sprintf "$(%s)" s) // witness end let default_biopam_url = "https://github.com/solvuu/biopam.git" let install_tool ~(run_program : Machine.Make_fun.t) ~host ~install_path     ({package; test; edges; init_environment; repository; _ } as it) =   let open KEDSL in   let conda_env = get_conda_env install_path it.package in   let run_prog name =     run_program       ~requirements:[         `Internet_access;         `Self_identification ["opam"; name; package];       ]   in   let root_name = Opam.root_of_package package in   let default_compiler, repo_url =     match repository with     | `Biopam -> "0.0.0", default_biopam_url     | `Opam -> "4.02.3""https://opam.ocaml.org"     | `Custom c -> "4.02.3", c   in   let compiler = Option.value it.compiler ~default:default_compiler in   let pin_command =     match it.pin with     | None -> Program.sh "echo 'Package Not Pinned'"     | Some url ->       Opam.program_sh ~root_name ~install_path "pin add -n %s %s" package url   in   let edges =     let edges =       [ KEDSL.depends_on (Opam.installed ~run_program ~host ~install_path)] in     if it.requires_conda     then       depends_on (Conda.configured ~run_program ~host ~conda_env) :: edges     else edges in   let name = "Installing " ^ package in   let make =     run_prog "install"       Program.(         (if it.requires_conda          then Conda.init_env ~conda_env ()          else sh "echo 'Does not need Conda'")         && shf "rm -fr %s" (Filename.quote root_name)         && Opam.program_sh           ~install_path ~root_name "init --comp=%s %s"           compiler (Filename.quote repo_url)         && pin_command         && Opam.program_sh ~root_name ~install_path "install %s" package       )   in   let shell_which = Opam.which ~install_path it in   let test = (Option.value test ~default:default_test) ~host shell_which in   let cond =     object       method is_done = Some (`Command_returns (test, 0))       method shell_which = shell_which     end   in   workflow_node cond ~name ~make ~edges let provide ~run_program ~host ~install_path it =   let conda_env = get_conda_env install_path it.package in   let install_workflow =     install_tool ~run_program ~host ~install_path it in   let export_var =     match it.tool_type with     | `Application -> None     | `Library v   ->       let path = install_workflow#product#shell_which in       Some KEDSL.Program.(shf "export %s=\"%s${%s:+:}${%s}\"" v path v v)   in   Machine.Tool.create it.definition     ~ensure:install_workflow     ~init:KEDSL.Program.(         (if it.requires_conda          then Conda.init_env ~conda_env ()          else sh "echo 'Does not need Conda'")         && it.init_environment ~install_path         && Opam.kcom ~root_name:(Opam.root_of_package it.package) ~install_path           (shf "eval $(%s)""config env"         && Option.value export_var ~default:(sh "echo 'No export var'")       ) let test_version ~host path =   KEDSL.Command.shell ~host (sprintf "%s --version" path) let picard =   install_target     ~tool_type:(`Library "PICARD_JAR")     ~witness:"picard.jar"     (Machine.Tool.Definition.create "picard" ~version:"1.128") let bowtie =   install_target     ~witness:"bowtie" ~test:test_version     Machine.Tool.Default.bowtie let seq2hla =   install_target     ~witness:"seq2HLA" ~requires_conda:true     ~package:"seq2HLA.2.2" (* we need to uppercase HLA for opam *)     Machine.Tool.Default.seq2hla let optitype =   install_target ~witness:"OptiTypePipeline" Machine.Tool.Default.optitype     ~requires_conda:true     ~init_environment:KEDSL.Program.(         fun ~install_path ->           let name = Machine.Tool.(Default.optitype.Definition.name) in           let version = Machine.Tool.(Default.optitype.Definition.version) in           shf "export OPAMROOT=%s.%s"             (Opam.root_of_package name |> Opam.root ~install_path)             (match version with None -> "NOVERSION" | Some v -> v)           && shf "export OPTITYPE_DATA=$(%s config var lib)/optitype"             (Opam.bin ~install_path)       ) let igvxml =   install_target     ~witness:"igvxml" ~test:test_version     ~repository:`Opam     ~compiler:"4.03.0"     ~pin:"https://github.com/hammerlab/igvxml.git#0.1.0"     Machine.Tool.Default.igvxml let hlarp =   install_target     ~tool_type:`Application     ~witness:"hlarp" ~test:test_version     ~repository:`Opam     ~compiler:"4.03.0"     ~pin:"https://github.com/hammerlab/hlarp.git#biokepi-tracker"     Machine.Tool.Default.hlarp let default :   run_program: Machine.Make_fun.t ->   host: Common.KEDSL.Host.t ->   install_path: string ->   unit ->   _ = fun ~run_program ~host ~install_path () ->   Machine.Tool.Kit.of_list     (List.map ~f:(provide ~run_program ~host ~install_path) [     picard;     bowtie;     seq2hla;     optitype;     igvxml;     hlarp;   ]) end module Build_machine  : sig
(** Simplified creation of Run_environment.Machine.t values *)
open Biokepi_run_environment open Common
(** Build a Run_environment.Machine.t with convenient default values.

The string argument is a URI like the one expected by Ketrew.EDSL.Host.parse except that the “path” is the meta-playground for Biokepi (the ketrew playground will be (meta_playground // "ketrew_playground").

The default run_program is daemonizing with `Python_daemon.

The default toolkit is default_toolkit from Tool_providers. This machine will get tools installations and data-fetching from Biokepi's defaults. The ?b37 argument allows to override the locations of the “B37” genome; to override other default please use Run_environment.Machine.create directly. *)

val create :   ?max_processors : int ->   ?gatk_jar_location:(unit -> Workflow_utilities.Download.tool_file_location) ->   ?mutect_jar_location:(unit -> Workflow_utilities.Download.tool_file_location) ->   ?netmhc_tool_locations:(unit -> Netmhc.netmhc_file_locations) ->   ?pyensembl_cache_dir:string ->   ?run_program:Machine.Make_fun.t ->   ?toolkit:Machine.Tool.Kit.t ->   ?b37:Reference_genome.t ->   string ->   Machine.t end  = struct open Biokepi_run_environment open Common let default_run_program : host:KEDSL.Host.t -> Machine.Make_fun.t =   fun ~host ?(name="biokepi-ssh-box") ?(requirements = []) program ->     let open KEDSL in     daemonize ~using:`Python_daemon ~host program let create     ?(max_processors = 1)     ?gatk_jar_location     ?mutect_jar_location     ?netmhc_tool_locations     ?pyensembl_cache_dir     ?run_program ?toolkit ?b37 uri =   let open KEDSL in   let host = Host.parse (uri // "ketrew_playground"in   let meta_playground = Uri.of_string uri |> Uri.path in   let run_program =     match run_program with     | None -> default_run_program ~host     | Some r -> r   in   let toolkit =     Option.value toolkit       ~default:(Tool_providers.default_toolkit ()                   ~run_program                   ~host ~install_tools_path:(meta_playground // "install-tools")                   ?gatk_jar_location ?mutect_jar_location                   ?netmhc_tool_locations)   in   Machine.create (sprintf "ssh-box-%s" uri)     ~max_processors     ?pyensembl_cache_dir     ~get_reference_genome:(fun name ->         match name, b37 with         | name, Some some37 when name = Reference_genome.name some37 -> some37         | name, _ ->                   Download_reference_genomes.get_reference_genome name             ~toolkit ~host ~run_program             ~destination_path:(meta_playground // "reference-genome"))     ~host     ~toolkit     ~run_program     ~work_dir:(meta_playground // "work") end module Conda  : sig open Biokepi_run_environment type conda_version_type = [   | `Latest   | `Version of string ] type conda_environment_type = private {   name: string; (* name of the environment *)   python_version: [ `Python2 | `Python3 ];   channels: string list; (* supported installation channels *)   base_packages: (string * conda_version_type) list; (* defualt installations *)   banned_packages: string list; (* packages to be removed after initial setup *)   install_path: string; (* where to install the conda and environments *)   main_subdir: string; (* subdir that will contain conda utilities *)   envs_subdir: string; (* subdir that will contain the environment files *) }
(** Helper method to configure conda environments for tools *)
val setup_environment :   ?custom_channels: string list ->   ?base_packages: (string * conda_version_type) list ->   ?banned_packages: string list ->   ?main_subdir: string ->   ?envs_subdir: string ->   ?python_version: [ `Python2 | `Python3 ] ->   string ->   string ->   conda_environment_type
(** A workflow node to make sure that Conda is configured. *)
val configured :   conda_env: conda_environment_type ->   run_program: Machine.Make_fun.t ->   host: Common.KEDSL.Host.t ->   < is_done : Common.KEDSL.Condition.t option > Common.KEDSL.workflow_node
(** A transform to run Programs with the Conda enviroment activated. *)
val init_env :    conda_env: conda_environment_type ->    unit ->    Common.KEDSL.Program.t
(** A transform to deactivate the conda environment if it is already active *)
val deactivate_env :    conda_env: conda_environment_type ->    unit ->    Common.KEDSL.Program.t
(** This is the absolute path to the environment folder **)
val environment_path :    conda_env: conda_environment_type ->    string end  = struct (*   Conda is a Python environment and package manager:   http://conda.pydata.org/docs/   We use it to ensure a consistent Python environment for tools that depend   on Python. *) open Biokepi_run_environment open Common let rm_path = Workflow_utilities.Remove.path_on_host type conda_version_type = [   | `Latest   | `Version of string ] type conda_environment_type = {   name: string;   python_version: [ `Python2 | `Python3 ];   channels: string list;   base_packages: (string * conda_version_type) list;   banned_packages: string list;   install_path: string;   main_subdir: string;   envs_subdir: string; } let setup_environment   ?(custom_channels = [])   ?(base_packages = [])   ?(banned_packages = [])   ?(main_subdir = "conda_dir")   ?(envs_subdir = "envs")   ?(python_version = `Python2)   install_path   name =   let channels = [ "bioconda""r" ] @ custom_channels in   {name; python_version; channels; base_packages; banned_packages; install_path; main_subdir; envs_subdir} let main_dir ~conda_env = conda_env.install_path // conda_env.main_subdir let envs_dir ~conda_env = conda_env.install_path // conda_env.envs_subdir let commands ~conda_env com = main_dir ~conda_env // "bin" // com let bin ~conda_env = commands ~conda_env "conda" let activate ~conda_env = commands ~conda_env "activate" let deactivate ~conda_env = commands ~conda_env "deactivate" let environment_path ~conda_env = envs_dir ~conda_env // conda_env.name (* give a conda command. *) let com ~conda_env fmt =   Printf.sprintf ("%s " ^^ fmt) (bin ~conda_env) (* A workflow to ensure that conda is installed. *) let installed ~(run_program : Machine.Make_fun.t) ~host ~conda_env =   let open KEDSL in   let url =     "https://repo.continuum.io/miniconda/Miniconda3-4.1.11-Linux-x86_64.sh" in   let conda_exec  = single_file ~host (bin ~conda_env) in   let install_dir = main_dir ~conda_env in   workflow_node conda_exec     ~name:(sprintf "Install conda: %s" conda_env.name)     ~make:(       run_program         ~requirements:[           `Internet_access`Self_identification ["conda""installation"]         ]         Program.(           exec ["mkdir""-p"; conda_env.install_path]           && exec ["rm";"-fr"; install_dir]           && exec ["cd"; conda_env.install_path]           && Workflow_utilities.Download.wget_program url           && shf "bash Miniconda3-4.1.11-Linux-x86_64.sh -b -p %s" install_dir         )     ) let configured ~conda_env ~(run_program : Machine.Make_fun.t) ~host =   let open KEDSL in   let create_env =     com ~conda_env "create -y -q --prefix %s python=%d"       (envs_dir ~conda_env // conda_env.name)       (match conda_env.python_version with `Python2 -> 2 | `Python3 -> 3)   in   let install_package (package, version) =     Program.(       shf "conda install -y %s%s"         package         (match version with `Latest -> "" | `Version v -> "=" ^ v)     )   in   let force_rm_package package =      Program.(shf "conda remove -y --force %s" package)   in   let make =     run_program       ~requirements:[         `Internet_access;         `Self_identification ["conda""configuration"];       ]       Program.(         sh create_env         && shf "source %s %s" (activate ~conda_env) (envs_dir ~conda_env // conda_env.name)         && chain (List.map ~f:(shf "conda config --add channels %s") conda_env.channels)         && chain (List.map ~f:install_package conda_env.base_packages)         && chain (List.map ~f:force_rm_package conda_env.banned_packages)       )   in   let edges = [ depends_on (installed ~run_program ~host ~conda_env) ] in   let product =     (single_file ~host (envs_dir ~conda_env // conda_env.name // "bin/conda")      :> < is_done : Common.KEDSL.Condition.t option >)  in   let name =     sprintf "Configure conda: %s" conda_env.name in   workflow_node product ~make ~name ~edges let init_env ~conda_env () =   let prefix = (envs_dir ~conda_env // conda_env.name) in   (* if we are already within the conda environment we want, do nothing;      otherwise, activate the new one *)   KEDSL.Program.(     shf "[ ${CONDA_PREFIX-none} != \"%s\" ] && source %s %s || echo 'Already in conda env: %s'"       prefix (activate ~conda_env) prefix prefix   ) let deactivate_env ~conda_env () =   let prefix = (envs_dir ~conda_env // conda_env.name) in   KEDSL.Program.(     shf "[ ${CONDA_PREFIX-none} == \"%s\" ] && source %s || echo 'Doing nothing. The conda env is not active: %s'"       prefix (deactivate ~conda_env) prefix   ) end module Download_reference_genomes  : sig
(** Download reference-genormes (& associated data) with Ketrew *)
open Biokepi_run_environment type pull_function =   toolkit:Machine.Tool.Kit.t ->   host:Common.KEDSL.Host.t ->   run_program:Machine.Make_fun.t ->   destination_path:string -> Reference_genome.t val pull_b37 : pull_function val pull_b37decoy : pull_function val pull_b38 : pull_function val pull_hg18 : pull_function val pull_hg19 : pull_function val pull_mm10 : pull_function val default_genome_providers : (string * pull_function) list val get_reference_genome : string -> pull_function end  = struct open Biokepi_run_environment open Common open Workflow_utilities.Download (* All the wget* functions *) module Vcftools = Workflow_utilities.Vcftools let of_specification     ~toolkit ~host ~run_program ~destination_path specification =   let open Reference_genome in   let {     Specification.     name;     ensembl;     species;     metadata;     fasta;     dbsnp;     known_indels;     cosmic;     exome_gtf; (* maybe desrves a better name? *)     cdna;     whess;     major_contigs;   } = specification in   let dest_file f = destination_path // name // f in   let rec compile_location filename =     function     | `Url url (* Right now, `wget_gunzip` is clever enough to not gunzip *)     | `Gunzip `Url url ->       Workflow_utilities.Download.wget_gunzip         ~host ~run_program ~destination:(dest_file filename) url     | `Bunzip2 `Url url ->       Workflow_utilities.Download.wget_bunzip2         ~host ~run_program ~destination:(dest_file filename) url     | `Vcf_concat l ->       let vcfs =         List.map ~f:(fun (n, loc) -> compile_location n loc) l       in       let vcftools =         Machine.Tool.Kit.get_exn toolkit Machine.Tool.Default.vcftools in       let concated =         let tmp_vcf =           dest_file (Filename.chop_extension filename ^ "-cat.vcf"in         Vcftools.vcf_concat_no_machine           ~make_product:(fun p -> KEDSL.single_file p ~host)           ~host ~vcftools ~run_program ~final_vcf:tmp_vcf vcfs in       let sorted =         let final_vcf_path = dest_file filename in         Vcftools.vcf_sort_no_machine           ~make_product:(fun p -> KEDSL.single_file p ~host)           ~host ~vcftools ~run_program           ~src:concated ~dest:final_vcf_path () in       sorted     | other ->       failwithf "Reference_genome.compile_location this kind of location is not yet implemented"   in   let compile_location_opt filename =     Option.map ~f:(compile_location filename) in   create specification     (compile_location (name ^ ".fasta") fasta)     ?cosmic:(compile_location_opt "cosmic.vcf" cosmic)     ?dbsnp:(compile_location_opt "dbsnp.vcf" dbsnp)     ?known_indels:(compile_location_opt "known_indels.vcf" dbsnp)     ?gtf:(compile_location_opt "transcripts.gtf" exome_gtf)     ?cdna:(compile_location_opt "cdns-all.fa" cdna)     ?whess:(compile_location_opt "whess.sqlite" whess) type pull_function =   toolkit:Machine.Tool.Kit.t ->   host:Common.KEDSL.Host.t ->   run_program:Machine.Make_fun.t ->   destination_path:string -> Reference_genome.t let pull_b37 ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =   of_specification ~toolkit ~host ~run_program ~destination_path     Reference_genome.Specification.Default.b37 let pull_b37decoy ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =   of_specification ~toolkit ~host ~run_program ~destination_path     Reference_genome.Specification.Default.b37decoy let pull_b38 ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =   of_specification ~toolkit ~host ~run_program ~destination_path     Reference_genome.Specification.Default.b38 let pull_hg38 ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =   of_specification ~toolkit ~host ~run_program ~destination_path     Reference_genome.Specification.Default.hg38 let pull_hg19 ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =   of_specification ~toolkit ~host ~run_program ~destination_path     Reference_genome.Specification.Default.hg19 let pull_hg18 ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =   of_specification ~toolkit ~host ~run_program ~destination_path     Reference_genome.Specification.Default.hg18 let pull_mm10 ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =   of_specification ~toolkit ~host ~run_program ~destination_path     Reference_genome.Specification.Default.mm10 let default_genome_providers = [   Reference_genome.Specification.Default.Name.b37, pull_b37;   Reference_genome.Specification.Default.Name.b37decoy, pull_b37decoy;   Reference_genome.Specification.Default.Name.b38, pull_b38;   Reference_genome.Specification.Default.Name.hg38, pull_hg38;   Reference_genome.Specification.Default.Name.hg18, pull_hg18;   Reference_genome.Specification.Default.Name.hg19, pull_hg19;   Reference_genome.Specification.Default.Name.mm10, pull_mm10; ] let get_reference_genome name =   match List.find default_genome_providers ~f:(fun (a, _) -> a = name) with   | Some (_, pull) -> pull   | None -> failwithf "Cannot find the reference genorme called %S" name                end module Netmhc  = struct open Biokepi_run_environment open Common let rm_path = Workflow_utilities.Remove.path_on_host (*    Tested against:     netMHC-4.0a.Linux.tar.gz // netMHC-3.4a.Linux.tar.gz     pickpocket-1.1a.Linux.tar.gz     netMHCpan-3.0a.Linux.tar.gz // netMHCpan-2.8a.Linux.tar.gz     netMHCcons-1.1a.Linux.tar.gz   Do not use custom named archives   and keep them as they are after you   download (i.e. no need to insert the   data folder in them or customize the   binaries) *) type netmhc_file_locations = {   netmhc: Workflow_utilities.Download.tool_file_location;   netmhcpan: Workflow_utilities.Download.tool_file_location;   pickpocket: Workflow_utilities.Download.tool_file_location;   netmhccons: Workflow_utilities.Download.tool_file_location; } (*   The standard netMHC installation requires   customizing some of the environment variables   defined in their main binary files. The following   functions handle these replacements. *) let escape_char ~needle haystack =   let escfun c = if c = needle then ['\\'; c] else [c] in   String.rev haystack   |> String.fold ~init:[] ~f:(fun x c -> (escfun c) @ x)   |> List.map ~f:String.of_character   |> String.concat let replace_value file oldvalue newvalue =   let escape_slash = escape_char ~needle:'/' in   let file_org = file in   let file_bak = file_org ^ ".bak" in   KEDSL.Program.(     shf "mv %s %s" file_org file_bak &&     shf "sed -e 's/%s/%s/g' %s > %s"       (escape_slash oldvalue) (escape_slash newvalue) file_bak file_org &&     shf "rm -f %s" file_bak   ) let replace_env_value file envname newvalue =   let oldvalue = sprintf "setenv\t%s\t.*" envname in   let newvalue = sprintf "setenv\t%s\t%s" envname newvalue in   replace_value file oldvalue newvalue let extract_location location =   match location with   | `Scp l -> l   | `Wget l -> l   | `Fail _ -> "NoFile-0.0b.Linux.tar.gz" (*    e.g. input: /path/to/netMHC-3.4a.Linux.tar.gz   e.g. output: 3 *) let guess_major_version tool_file_loc =   let loc = extract_location tool_file_loc in   try     let basename = Filename.basename loc in     let dash_idx = String.find basename ~f:(fun c -> c ='-'in     match dash_idx with     | Some i -> String.get basename (i + 1)     | None -> None   with _ ->     ksprintf      failwith     "Error while guessing NetMHC major version from %s"     loc (*    e.g. input: /path/to/netMHC-3.4a.Linux.tar.gz   e.g. output: netMHC-3.4   This guessing game is necessary to build the URL   for the data files required for a complete installation   and also for to know what folder gets extracted from the   archives. *) let guess_folder_name tool_file_loc =   let loc = extract_location tool_file_loc in   let chop_final_char s =     let ssub = String.sub s 0 ((String.length s) - 1) in     match ssub with     | Some txt -> txt     | None -> s   in   try     loc (* /path/to/netMHC-3.4a.Linux.tar.gz *)       |> Filename.basename (* netMHC-3.4a.Linux.tar.gz *)       |> Filename.chop_extension (* netMHC-3.4a.Linux.tar *)       |> Filename.chop_extension (* netMHC-3.4a.Linux *)       |> Filename.chop_extension (* netMHC-3.4a *)       |> chop_final_char (* netMHC-3.4 *)   with _ ->     ksprintf        failwith       "Error while guessing NetMHC folder name from %s"       loc (*    netMHC tools will be populating this folder,   so this is an important "tmp" folder! *) let tmp_dir install_path = install_path // "tmp" (*    NetMHC tools play nicely against Python 2.x   and are known to be problematic against Python 3.x.   For better control, we are using a Conda environment   where we can ask for specific versions of the python   to be used.    In the (far) future, NetMHC tools might start asking   for Python 3 and we will make the switch from this   configuration. *) let netmhc_conda_env install_path =   Conda.(setup_environment     ~python_version:`Python2     install_path     "netmhc_conda") (*   The issue with the netMHC tools is that they specifically   depend on a particular Python version to be able to run.   And another wrapper script (e.g. vaxrank) might require another   version of Python and once the wrapper calls one of these binaries   the Python environment gets mixed up, leading to incompatability   issues. We would like to create a wrapper script to their own   wrapper script so that we can ensure the tool gets run within   the environment we want and not the one that the original wrapper   provides.   The following runner/pseudo_binary logic handles this *) let netmhc_runner_path install_path = install_path // "biokepi_runner" let netmhc_runner_script_contents ~binary_name ~binary_path ~conda_env =   Ketrew_pure.Internal_pervasives.fmt {bash| #!/bin/bash # Force use the controlled python environment OLD_PATH=$PATH export PATH=%s:$PATH # Run the netMHC* binary %s "$@" export PATH=$OLD_PATH |bash}     Conda.((environment_path ~conda_env) // "bin")     binary_path let create_netmhc_runner_cmd     ~binary_name ~binary_path ~conda_env dest =   let script_contents =      netmhc_runner_script_contents ~binary_name ~binary_path ~conda_env   in   let cmd =      sprintf       "cat << EOF > %s%sEOF"       dest       (escape_char ~needle:'$' script_contents)   in   KEDSL.Program.(sh cmd) (* end of runner_script logic *)    let default_netmhc_install     ~(run_program : Machine.Make_fun.t) ~host ~install_path     ~tool_file_loc ~binary_name ~example_data_file ~env_setup     ?(depends=[])      ?(data_folder_name="data")      ?(data_folder_dest="."(* relative to the netMHC folder *)     () =   let open KEDSL in   let tool_name = binary_name in   let downloaded_file =     Workflow_utilities.Download.get_tool_file       ~identifier:tool_name       ~run_program ~host ~install_path       tool_file_loc   in   let folder_name = guess_folder_name tool_file_loc in   let cap_name = String.set folder_name 0 'N' in   let folder_in_url = match cap_name with Some s -> s | None -> folder_name in   let data_url =     sprintf       "http://www.cbs.dtu.dk/services/%s/%s.tar.gz"       folder_in_url       data_folder_name   in   let (one_data_file, with_data) =     match example_data_file with     | Some df -> (data_folder_name // df, true)     | None -> (""false)   in   let downloaded_data_file =     Workflow_utilities.Download.wget_untar     ~run_program ~host      ~destination_folder:(install_path // folder_name // data_folder_dest)     ~tar_contains:one_data_file data_url   in   let tool_path = install_path // folder_name in   let runner_folder = netmhc_runner_path install_path in   let runner_path = runner_folder // binary_name in   let binary_path = tool_path // binary_name in   let fix_script replacement =      match replacement with     | `ENV (e, v) -> replace_env_value binary_name e v     | `GENERIC (o, n) -> replace_value binary_name o n   in   let conda_env = netmhc_conda_env install_path in   let ensure =     workflow_node (single_file ~host binary_path)       ~name:("Install NetMHC tool: " ^ tool_name)       ~edges:(         [ depends_on downloaded_file;            depends_on Conda.(configured ~run_program ~host ~conda_env);           on_failure_activate (rm_path ~host install_path); ]         @ (if with_data then [ depends_on downloaded_data_file; ] else [])         @ (List.map depends ~f:(fun d -> depends_on d))       )       ~make:(run_program         ~requirements:[           `Self_identification ["netmhc"; tool_name; "installation"];         ]         Program.(           shf "cd %s" install_path &&           shf "tar zxf %s" downloaded_file#product#path &&           shf "cd %s" tool_path &&           chain (List.map ~f:fix_script env_setup) &&           shf "chmod +x %s" binary_path &&           shf "mkdir -p %s" (tmp_dir install_path) &&           shf "mkdir -p %s" runner_folder &&           create_netmhc_runner_cmd             ~binary_name ~binary_path ~conda_env runner_path &&           shf "chmod +x %s" runner_path         )       )   in   let init =      Program.(       (* no need to init conda. Runner scripts will do that for us *)       shf "export PATH=%s:$PATH" runner_folder &&       shf "export TMPDIR=%s" (tmp_dir install_path)     )   in   (Machine.Tool.create     Machine.Tool.Definition.(create binary_name)     ~ensure ~init, binary_path, ensure) let guess_env_setup     ~install_path     ?(tmp_dirname = "tmp")     ?(home_env = "NMHOME")     tool_file_loc =   let folder_name = guess_folder_name tool_file_loc in   [     `ENV (home_env, install_path // folder_name);     `ENV ("TMPDIR", install_path // tmp_dirname);   ] let default ~run_program ~host ~install_path ~(files:netmhc_file_locations) () =   let netmhc_mj = guess_major_version files.netmhc in   let is_old_netmhc =     match netmhc_mj with     (* 4 and above uses the default name *)     | Some v -> (int_of_string (Char.escaped v)) < 4      | None -> true   in   let netmhc_env = guess_env_setup ~install_path files.netmhc in   let older_netmhc =     default_netmhc_install ~run_program ~host ~install_path     ~tool_file_loc:files.netmhc ~binary_name:"netMHC"     ~example_data_file:(Some "SLA-10401/bl50/synlist")      ~env_setup:(       [ `GENERIC ("/usr/local/bin/python2.5""`which python`") ]       (* ^ -> to force netMHC binary use whatever python we have *)       @ netmhc_env     )     ~data_folder_name:"net"     ~data_folder_dest:"etc"   in   let newer_netmhc =      default_netmhc_install ~run_program ~host ~install_path     ~tool_file_loc:files.netmhc ~binary_name:"netMHC"     ~example_data_file:(Some "version")      ~env_setup:netmhc_env     ~data_folder_name:"data"     ~data_folder_dest:"."   in   let netmhc_install_func =      if is_old_netmhc then older_netmhc else newer_netmhc   in   let (netmhc, netmhc_path, netmhc_install) = netmhc_install_func () in   let (netmhcpan, netmhcpan_path, netmhcpan_install) =     default_netmhc_install ~run_program ~host ~install_path       ~tool_file_loc:files.netmhcpan ~binary_name:"netMHCpan"       ~example_data_file:(Some "version")       ~env_setup:(guess_env_setup ~install_path files.netmhcpan) ()   in   let (pickpocket, pickpocket_path, pickpocket_install) =     default_netmhc_install ~run_program ~host ~install_path       ~tool_file_loc:files.pickpocket ~binary_name:"PickPocket"       ~example_data_file:None       ~env_setup:(guess_env_setup ~install_path files.pickpocket) ()   in   let cons_env =     [`ENV ("NETMHC_env", netmhc_path);      `ENV ("NETMHCpan_env", netmhcpan_path);      `ENV ("PICKPOCKET_env", pickpocket_path);     ] @      (guess_env_setup       ~home_env:"NCHOME" ~install_path files.netmhccons     )   in   let (netmhccons, _, _) =     default_netmhc_install ~run_program ~host ~install_path       ~tool_file_loc:files.netmhccons ~binary_name:"netMHCcons"       ~example_data_file:(Some "BLOSUM50")       ~env_setup:cons_env       ~depends:[netmhc_install; netmhcpan_install; pickpocket_install]       ()   in   Machine.Tool.Kit.of_list [netmhc; netmhcpan; pickpocket; netmhccons]end module Python_package  = struct open Biokepi_run_environment open Common type install_tool_type = Pip | Conda type install_source_type =   | Package_PyPI of string   | Package_Source of string * string   | Package_Conda of string let bin_in_conda_environment ~conda_env command =   Conda.(environment_path ~conda_env) // "bin" // command let create_python_tool ~host ~(run_program : Machine.Make_fun.t) ~install_path     ?check_bin ?version ?(python_version=`Python3)     (installation:install_tool_type * install_source_type) =   let open KEDSL in   let versionize ?version ~sep name = match version with     | None -> name     | Some v -> name ^ sep ^ v   in   let install_command, name =     match installation with     | (PipPackage_PyPI pname) ->       ["pip""install"; versionize ?version ~sep:"==" pname], pname     | (PipPackage_Source (pname, source)) ->       ["pip""install"; source], pname     | (CondaPackage_Conda pname) ->       ["conda""install""-y"; versionize ?version ~sep:"=" pname], pname     | (CondaPackage_PyPI pname) ->       ["conda""skeleton""pypi"; pname], pname     | _ -> failwith "Installation type not supported."   in   let main_subdir = name ^ "_conda_dir" in   let conda_env =     Conda.setup_environment ~python_version ~main_subdir install_path       (name ^ Option.value_map ~default:"" version ~f:(sprintf ".%s"))   in   let single_file_check id =     single_file ~host (bin_in_conda_environment ~conda_env id)   in   let exec_check =     match check_bin with     | None -> single_file_check name     | Some s -> single_file_check s   in   let ensure =     workflow_node exec_check       ~name:("Installing Python tool: " ^ name)       ~edges:[ depends_on Conda.(configured ~run_program ~host ~conda_env) ]       ~make:(run_program         ~requirements:[           `Internet_access`Self_identification ["python""installation"]         ]         Program.(           Conda.init_env ~conda_env ()           && exec install_command)         )   in   let init = Conda.init_env ~conda_env () in   Machine.Tool.create Machine.Tool.Definition.(create name) ~ensure ~init let default ~host ~run_program ~install_path () =    Machine.Tool.Kit.of_list [     create_python_tool ~host ~run_program ~install_path       ~version:"1.1.0" (PipPackage_PyPI "pyensembl");     create_python_tool ~host ~run_program ~install_path       ~version:"0.1.2" (PipPackage_PyPI "vcf-annotate-polyphen");     create_python_tool ~host ~run_program ~install_path       ~version:"0.1.3" ~check_bin:"isovar-protein-sequences.py"       (PipPackage_PyPI "isovar");     create_python_tool ~host ~run_program ~install_path       ~version:"0.1.2" (PipPackage_PyPI "topiary");     create_python_tool ~host ~run_program ~install_path       ~version:"0.4.2" (PipPackage_PyPI "vaxrank");    ] end module Tool_providers  = struct open Biokepi_run_environment open Common let rm_path = Workflow_utilities.Remove.path_on_host let generic_installation     ~(run_program : Machine.Make_fun.t)     ~host ~install_path     ~install_program ~witness ~url     ?unarchived_directory     ?(archive_is_directory = true)     tool_name =   let archive = Filename.basename url in   let archive_kind =     if Filename.check_suffix url "bz2" then `Tar "j"     else if Filename.check_suffix url "gz"  then `Tar "z"     else if Filename.check_suffix url "tar" then `Tar ""     else if Filename.check_suffix url "zip" then `Zip     else if Filename.check_suffix url "deb" then `Deb     else `None   in   let open KEDSL in   let unarchival =     let open Program in     let and_cd =       if archive_is_directory then         [shf "cd %s" (Option.value unarchived_directory                         ~default:(tool_name ^ "*"))]       else [] in     match archive_kind with     | `Tar tar_option ->       chain ([shf "tar xvf%s %s" tar_option archive;               shf "rm -f %s" archive; ] @ and_cd)     | `Zip ->       chain ([shf "unzip %s" archive; shf "rm -f %s" archive;] @ and_cd)     | `Deb ->       chain [         exec ["ar""x"; archive];         exec ["tar""xvfz""data.tar.gz"];         exec ["rm""-f""data.tar.gz"];       ]     | `None -> sh "echo Not-an-archive"   in   workflow_node     ~name:(sprintf "Install %s" tool_name)     witness     (* (single_file ~host *)     (*    (Option.value witness ~default:(install_path // tool_name))) *)     ~edges:[       on_failure_activate (rm_path ~host install_path);     ]     ~make:(       run_program         ~requirements:[           `Internet_access;           `Self_identification ["generic-instalation"; tool_name];         ]         Program.(           shf "mkdir -p %s" install_path           && shf "cd %s" install_path           && Workflow_utilities.Download.wget_program url           && unarchival           && install_program           && sh "echo Done"         )) let git_installation     ~(run_program : Machine.Make_fun.t)     ~host ~install_path     ~install_program ~witness     ~repository ~recursive tool   =   let open KEDSL in   let recursive = if recursive then "--recursive" else "" in   let version =     (Option.value_exn        tool.Machine.Tool.Definition.version        ~msg:"Git_installable tool must have a verison"in   let name = tool.Machine.Tool.Definition.name in   workflow_node     ~name:(sprintf "Install %s %s" name version)     witness     ~edges:[       on_failure_activate (rm_path ~host install_path);     ]     ~make:(       run_program         ~requirements:[           `Internet_access;           `Self_identification ["git-instalation"; name];         ]         Program.(           shf "mkdir -p %s" install_path           && shf "cd %s" install_path           && shf "git clone %s %s" recursive repository           && shf "cd %s" name           && shf "git checkout %s" version           && install_program           && sh "echo Done"         )) module Tool_def = Machine.Tool.Definition module Installable_tool = struct   let noop = KEDSL.Program.sh "echo Nothing-done-here"   type t = {     tool_definition : Tool_def.t;     url : string;     install_program : path: string -> KEDSL.Program.t;     init_program : path: string -> KEDSL.Program.t;     witness: host: KEDSL.Host.t -> path: string -> KEDSL.unknown_product;     unarchived_directory : string option;     archive_is_directory : bool;   }   let make ~url     ?(install_program = fun ~path -> noop)     ?(init_program = fun ~path -> noop)     ~witness ?(archive_is_directory = true)     ?unarchived_directory     tool_definition =   {tool_definition; url; install_program;    init_program; witness; unarchived_directory; archive_is_directory}   let render ~run_program ~host ~install_tools_path tool =     let path =       install_tools_path // Tool_def.to_directory_name tool.tool_definition in     let ensure =       generic_installation         ?unarchived_directory:tool.unarchived_directory         ~archive_is_directory:tool.archive_is_directory         ~run_program ~host         ~install_path:path         ~install_program:(tool.install_program ~path)         ~witness:(tool.witness ~host ~path)         ~url:tool.url         (tool.tool_definition.Tool_def.name)     in     Machine.Tool.create tool.tool_definition ~ensure       ~init:(tool.init_program path) end module Git_installable_tool = struct   let noop = KEDSL.Program.sh "echo Nothing-done-here"   type t = {     tool_definition : Tool_def.t;     repository : string;     recursive : bool;     install_program : path: string -> KEDSL.Program.t;     init_program : path: string -> KEDSL.Program.t;     witness: host: KEDSL.Host.t -> path: string -> KEDSL.unknown_product;   }   let make ~repository     ?(install_program = fun ~path -> noop)     ?(init_program = fun ~path -> noop)     ?(recursive = false)     ~witness     tool_definition =   {tool_definition; repository; recursive; install_program; init_program; witness;}   let render ~run_program ~host ~install_tools_path tool =     let path =       install_tools_path // Tool_def.to_directory_name tool.tool_definition in     let ensure =       git_installation         ~run_program ~host         ~install_path:path         ~install_program:(tool.install_program ~path)         ~witness:(tool.witness ~host ~path)         ~repository:tool.repository         ~recursive:tool.recursive         tool.tool_definition     in     Machine.Tool.create tool.tool_definition ~ensure       ~init:(tool.init_program path) end let add_to_dollar_path ~path = KEDSL.Program.shf "export PATH=%s:$PATH" path let make_and_copy_bin bin =   fun ~path -> KEDSL.Program.(       sh "make" && shf "cp %s %s" bin path) let witness_file bin =   fun ~host ~path ->     let p = KEDSL.single_file ~host (path // bin) in     object method is_done = p#is_done end let witness_list l =   fun ~host ~path ->     KEDSL.list_of_files ~host (List.map l ~f:(fun bin -> path // bin))     |> fun p -> object method is_done = p#is_done end let bwa =   Installable_tool.make     Machine.Tool.Default.bwa     ~url:"http://downloads.sourceforge.net/project/bio-bwa/bwa-0.7.10.tar.bz2"     ~install_program:(make_and_copy_bin "bwa")     ~init_program:add_to_dollar_path     ~witness:(witness_file "bwa") let freebayes =   Git_installable_tool.make     Machine.Tool.Default.freebayes     ~repository:"https://github.com/ekg/freebayes.git"     ~recursive:true     ~install_program:(fun ~path -> KEDSL.Program.(         sh "make"         && shf "cp -r bin %s" path       ))     ~init_program:(fun ~path ->         KEDSL.Program.(shf "export PATH=%s/bin/:$PATH" path))     ~witness:(witness_list ["bin/freebayes""bin/bamleftalign"]) let sambamba =   Installable_tool.make     Machine.Tool.Default.sambamba     ~archive_is_directory:false     ~url:"https://github.com/lomereiter/sambamba/releases/download/v0.6.5/sambamba_v0.6.5_linux.tar.bz2"     ~init_program:add_to_dollar_path     ~witness:(witness_file "sambamba_v0.6.5") let stringtie =   Installable_tool.make     Machine.Tool.Default.stringtie     ~url:"https://github.com/gpertea/stringtie/archive/v1.2.2.tar.gz"     ~install_program:(make_and_copy_bin "stringtie")     ~init_program:add_to_dollar_path     ~witness:(witness_file "stringtie") let vcftools =   Installable_tool.make Machine.Tool.Default.vcftools     ~url:"http://downloads.sourceforge.net/project/vcftools/vcftools_0.1.12b.tar.gz"     ~install_program:(fun ~path -> KEDSL.Program.(         sh "make"         && shf  "cp -r bin %s" path         && shf  "cp -r lib/perl5/site_perl %s" path       ))     ~witness:(witness_file @@ "bin" // "vcftools")     ~init_program:(fun ~path ->         KEDSL.Program.(shf "export PATH=%s/bin/:$PATH" path                        && shf "export PERL5LIB=$PERL5LIB:%s/site_perl/" path)) let bedtools =   Installable_tool.make Machine.Tool.Default.bedtools     ~url:"https://github.com/arq5x/bedtools2/archive/v2.23.0.tar.gz"     ~install_program:(fun ~path -> KEDSL.Program.(         sh "make" && shf "cp -r bin %s" path))     ~init_program:(fun ~path ->         KEDSL.Program.(shf "export PATH=%s/bin/:$PATH" path))     ~witness:(witness_file @@ "bin" // "bedtools") let mosaik =   let url =     "https://mosaik-aligner.googlecode.com/files/MOSAIK-2.2.3-source.tar" in   Installable_tool.make Machine.Tool.Default.mosaik ~url     ~unarchived_directory:"MOSAIK*"     ~init_program:(fun ~path ->         KEDSL.Program.(           shf "export PATH=%s:$PATH" path           && shf "export MOSAIK_PE_ANN=%s/pe.ann" path           && shf "export MOSAIK_SE_ANN=%s/se.ann" path         ))     ~witness:(witness_file "MosaikAligner")     ~install_program:KEDSL.Program.(fun ~path ->         sh "make"         && shf "cp networkFile/*pe.ann %s/pe.ann" path         && shf "cp networkFile/*se.ann %s/se.ann" path         && shf "cp bin/* %s" path       ) let star =   let url = "https://github.com/alexdobin/STAR/archive/STAR_2.4.1d.tar.gz" in   let star_binary = "STAR" in   (* TODO: there are other binaries in `bin/` *)   let star_binary_path = sprintf "bin/Linux_x86_64/%s" star_binary in   Installable_tool.make ~url Machine.Tool.Default.star     ~init_program:add_to_dollar_path     ~unarchived_directory:"STAR-*"     ~install_program:KEDSL.Program.(fun ~path ->         shf "cp %s %s" star_binary_path path)     ~witness:(witness_file star_binary) let hisat tool =   let open KEDSL in   let url, hisat_binary =     let open Machine.Tool.Default in     match tool with     | one when one = hisat ->       "http://ccb.jhu.edu/software/hisat/downloads/hisat-0.1.6-beta-Linux_x86_64.zip",       "hisat"     | two when two = hisat2 ->       "ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/downloads/hisat2-2.0.2-beta-Linux_x86_64.zip",       "hisat2"     | other ->       failwithf "Can't install Hisat version: %s" (Tool_def.to_string other)   in   Installable_tool.make tool     ~url     ~witness:(witness_file hisat_binary)     ~install_program:KEDSL.Program.(fun ~path ->         shf "mv hisat* %s" path       )   ~init_program:add_to_dollar_path let kallisto =   let url = "https://github.com/pachterlab/kallisto/releases/download/v0.42.3/kallisto_linux-v0.42.3.tar.gz" in   Installable_tool.make Machine.Tool.Default.kallisto ~url     ~witness:(witness_file "kallisto")     ~install_program:KEDSL.Program.(fun ~path ->         shf "cp -r * %s" path       )   ~init_program:add_to_dollar_path let samtools =   let url = "https://github.com/samtools/samtools/releases/download/1.3/samtools-1.3.tar.bz2" in   let toplevel_tools = ["samtools"in   let htslib = ["bgzip""tabix" ] in   let tools = toplevel_tools @ htslib in   let install_program ~path =     let open KEDSL.Program in     sh "make"     && shf "cp %s %s" (String.concat toplevel_tools ~sep:" ")  path     && sh "cd htslib*/"     && sh "make"     && shf "cp %s %s" (String.concat htslib ~sep:" ") path     && sh "echo Done"   in   let witness = witness_list tools in   Installable_tool.make Machine.Tool.Default.samtools ~url ~install_program     ~init_program:add_to_dollar_path ~witness let cufflinks =   let url =     "http://cole-trapnell-lab.github.io/cufflinks/assets/downloads/cufflinks-2.2.1.Linux_x86_64.tar.gz" in   let witness = witness_file "cufflinks" in   let install_program ~path = KEDSL.Program.(shf "cp * %s" path) in   Installable_tool.make Machine.Tool.Default.cufflinks ~install_program ~url     ~init_program:add_to_dollar_path ~witness let somaticsniper =   let url =     let deb_file = "somatic-sniper1.0.3_1.0.3_amd64.deb" in     sprintf       "http://apt.genome.wustl.edu/ubuntu/pool/main/s/somatic-sniper1.0.3/%s"       deb_file   in   let binary = "somaticsniper" in   let binary_in_deb = "usr/bin/bam-somaticsniper1.0.3" in   let install_program ~path =     KEDSL.Program.(shf "mv %s/%s %s/%s" path binary_in_deb path binary) in   Installable_tool.make Machine.Tool.Default.somaticsniper ~install_program ~url     ~witness:(witness_file binary) ~init_program:add_to_dollar_path let varscan =   let url =     "http://downloads.sourceforge.net/project/varscan/VarScan.v2.3.5.jar" in   let jar = "VarScan.v2.3.5.jar" in   let witness = witness_file jar in   let init_program ~path =     KEDSL.Program.(shf "export VARSCAN_JAR=%s/%s" path jar) in   Installable_tool.make Machine.Tool.Default.varscan ~url ~init_program ~witness let picard =   let url =     "https://github.com/broadinstitute/picard/releases/download/1.127/picard-tools-1.127.zip"   in   let jar = "picard-tools-1.127" // "picard.jar" in   let init_program ~path = KEDSL.Program.(shf "export PICARD_JAR=%s/%s" path jar) in   Installable_tool.make Machine.Tool.Default.picard ~url ~init_program     ~witness:(witness_file jar)
(** Mutect (and some other tools) are behind some web-login annoying thing: c.f. <http://www.broadinstitute.org/cancer/cga/mutect_download> So the user of the lib must provide an SSH or HTTP URL (or reimplement the `Tool.t` is some other way). *)
let get_broad_jar =   Workflow_utilities.Download.get_tool_file ~identifier:"broad-jar" let mutect_tool     ~(run_program : Machine.Make_fun.t)     ~host ~install_tools_path loc =   let tool = Machine.Tool.Default.mutect in   let open KEDSL in   let install_path = install_tools_path // Tool_def.to_directory_name tool in   let get_mutect = get_broad_jar ~run_program ~host ~install_path loc in   Machine.Tool.create tool ~ensure:get_mutect     ~init:Program.(shf "export mutect_HOME=%s" install_path) let gatk_tool     ~(run_program : Machine.Make_fun.t)     ~host ~install_tools_path loc =   let tool = Machine.Tool.Default.gatk in   let open KEDSL in   let install_path = install_tools_path // Tool_def.to_directory_name tool in   let ensure = get_broad_jar ~run_program ~host ~install_path loc in   Machine.Tool.create tool ~ensure     ~init:Program.(shf "export GATK_JAR=%s" ensure#product#path)
(**

Strelka is built from source but does not seem to build on MacOSX.

*)

let strelka =   let url =     "ftp://strelka:%27%27@ftp.illumina.com/v1-branch/v1.0.14/strelka_workflow-1.0.14.tar.gz" in   let strelka_bin = "usr" // "bin" in   let witness = witness_file @@ strelka_bin // "configureStrelkaWorkflow.pl" in   let install_program ~path =     (* C.f. ftp://ftp.illumina.com/v1-branch/v1.0.14/README *)     KEDSL.Program.(       shf "./configure --prefix=%s" (path // "usr")       && sh "make && make install"     )   in   let init_program ~path =     KEDSL.Program.(shf "export STRELKA_BIN=%s/%s" path strelka_bin) in   Installable_tool.make Machine.Tool.Default.strelka ~url     ~init_program ~install_program ~witness let virmid =   let url =     "http://downloads.sourceforge.net/project/virmid/virmid-1.1.1.tar.gz" in   let jar = "Virmid-1.1.1" // "Virmid.jar" in   let init_program ~path =     KEDSL.Program.(shf "export VIRMID_JAR=%s/%s" path jar) in   Installable_tool.make Machine.Tool.Default.virmid ~url ~init_program     ~unarchived_directory:"."     ~witness:(witness_file jar) let muse =   let url =     "http://bioinformatics.mdanderson.org/Software/MuSE/MuSEv1.0b" in   let binary = "MuSEv1.0b" in   let install_program ~path =     KEDSL.Program.( shf "chmod +x %s/%s" path binary) in   let init_program ~path =     KEDSL.Program.(shf "export muse_bin=%s/%s" path binary) in   Installable_tool.make Machine.Tool.Default.muse ~url     ~install_program ~init_program     ~witness:(witness_file binary) let fastqc =   let url =     "http://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.5.zip"   in   let binary = "fastqc" in   let binary_path path = path // binary in   let init_program ~path =     KEDSL.Program.(shf "export FASTQC_BIN=%s" (binary_path path))   in   Installable_tool.make Machine.Tool.Default.fastqc ~url     ~witness:(witness_file binary)     ~install_program:KEDSL.Program.(fun ~path ->         shf "cp -r * %s" path         && shf "chmod +x %s" (binary_path path)       )     ~init_program     ~unarchived_directory:"FastQC"   let samblaster =     let binary = "samblaster" in     Installable_tool.make       Machine.Tool.Default.samblaster       ~url:"https://github.com/GregoryFaust/samblaster/releases/download/v.0.1.22/samblaster-v.0.1.22.tar.gz"       ~install_program:(make_and_copy_bin binary)       ~init_program:add_to_dollar_path       ~witness:(witness_file binary) let default_tool_location msg (): Workflow_utilities.Download.tool_file_location =   `Fail (sprintf "No location provided for %s" msg) let default_netmhc_locations (): Netmhc.netmhc_file_locations = Netmhc.({   netmhc=(default_tool_location "NetMHC" ());   netmhcpan=(default_tool_location "NetMHCpan" ());   pickpocket=(default_tool_location "PickPocket" ());   netmhccons=(default_tool_location "NetMHCcons" ()); }) let default_toolkit     ~run_program     ~host ~install_tools_path     ?(mutect_jar_location = default_tool_location "Mutect")     ?(gatk_jar_location = default_tool_location "GATK")     ?(netmhc_tool_locations = default_netmhc_locations)     () =   let install installable =     Installable_tool.render ~host installable ~install_tools_path ~run_program   in   let install_git installable =     Git_installable_tool.render ~host installable ~install_tools_path ~run_program   in   Machine.Tool.Kit.concat [     Machine.Tool.Kit.of_list [       mutect_tool ~run_program ~host ~install_tools_path (mutect_jar_location ());       gatk_tool ~run_program ~host ~install_tools_path (gatk_jar_location ());       install bwa;       install samtools;       install bedtools;       install vcftools;       install strelka;       install picard;       install somaticsniper;       install sambamba;       install varscan;       install muse;       install virmid;       install star;       install stringtie;       install cufflinks;       install @@ hisat Machine.Tool.Default.hisat;       install @@ hisat Machine.Tool.Default.hisat2;       install mosaik;       install kallisto;       install fastqc;       install samblaster;       install_git freebayes;     ];     Biopam.default ~run_program ~host       ~install_path:(install_tools_path // "biopam-kit") ();     Python_package.default ~run_program ~host       ~install_path: (install_tools_path // "python-tools") ();     Netmhc.default ~run_program ~host       ~install_path: (install_tools_path // "netmhc-tools")       ~files:(netmhc_tool_locations ()) ();   ] end