let cache_genome ~(run_with: Machine.t) ~reference_build =
let open KEDSL in
let pyensembl =
Machine.get_tool run_with Machine.Tool.Definition.(create "pyensembl")
in
let genome = Machine.(get_reference_genome run_with reference_build) in
let ensembl_release = genome |> Reference_genome.ensembl in
let species = genome |> Reference_genome.species in
let witness_file_path =
sprintf "%s/%s.cached" (get_cache_dir ~run_with) reference_build
in
let name = sprintf "pyensembl_cache-%d-%s" ensembl_release species in
workflow_node
(single_file witness_file_path ~host:(Machine.as_host run_with))
~name
~edges:[
depends_on Machine.Tool.(ensure pyensembl);
]
~make:(
Machine.run_download_program run_with
~requirements:[`Internet_access;]
~name
Program.(
Machine.Tool.(init pyensembl)
&& (set_cache_dir_command ~run_with)
&& shf "pyensembl install --release %d --species \"%s\""
ensembl_release
species
&& exec ["touch"; witness_file_path]
)
)