let index
~reference_build
~(run_with : Machine.t) =
let open KEDSL in
let reference_fasta =
Machine.get_reference_genome run_with reference_build
|> Reference_genome.fasta in
let star_tool = Machine.get_tool run_with Machine.Tool.Default.star in
let name =
sprintf "star-index-%s" (Filename.basename reference_fasta#product#path) in
let reference_annotations =
Machine.get_reference_genome run_with reference_build |> Reference_genome.gtf_exn in
let reference_dir = (Filename.dirname reference_fasta#product#path) in
let result_dir = sprintf "%s/star-index/" reference_dir in
let suffix_array_result = result_dir // "SA" in
let processors = Machine.max_processors run_with in
workflow_node ~name
(single_file ~host:(Machine.(as_host run_with)) suffix_array_result)
~edges:[
on_failure_activate (Remove.directory ~run_with result_dir);
depends_on reference_fasta;
depends_on Machine.Tool.(ensure star_tool);
]
~tags:[Target_tags.aligner]
~make:(Machine.run_big_program run_with ~processors ~name
~self_ids:["star"; "index"]
Program.(
Machine.Tool.(init star_tool)
&& shf "mkdir %s" result_dir
&& shf "STAR --runMode genomeGenerate --genomeDir %s --genomeFastaFiles %s --sjdbGTFfile %s --runThreadN %d"
result_dir
(Filename.quote reference_fasta#product#path)
(Filename.quote reference_annotations#product#path)
processors
))