let align
~reference_build
~configuration
~fastq
~(result_prefix:string)
~(run_with : Machine.t)
() =
let open KEDSL in
let reference_fasta =
Machine.get_reference_genome run_with reference_build
|> Reference_genome.fasta in
let reference_dir = (Filename.dirname reference_fasta#product#path) in
let version = configuration.Configuration.version in
let hisat_binary =
match version with
| `V_0_1_6_beta -> "hisat"
| `V_2_0_2_beta -> "hisat2"
in
let index_dir = sprintf "%s/%s-index/" reference_dir hisat_binary in
let index_prefix = index_dir // (sprintf "%s-index" hisat_binary) in
let in_work_dir =
Program.shf "cd %s" Filename.(quote (dirname result_prefix)) in
let hisat_tool =
Machine.get_tool run_with (Configuration.get_tool configuration) in
let hisat_index = index ~index_prefix ~reference_build ~run_with ~configuration in
let result = sprintf "%s.sam" result_prefix in
let r1_path, r2_path_opt = fastq#product#paths in
let name = sprintf "%s-rna-align-%s" hisat_binary (Filename.basename r1_path) in
let processors = Machine.max_processors run_with in
let hisat_base_command = sprintf
"%s -p %d -x %s -S %s"
hisat_binary
processors
(Filename.quote index_prefix)
(Filename.quote result)
in
let base_hisat_target ~hisat_command =
workflow_node ~name
(single_file
~host:(Machine.(as_host run_with))
result)
~edges:[
on_failure_activate (Remove.file ~run_with result);
depends_on reference_fasta;
depends_on hisat_index;
depends_on fastq;
depends_on Machine.Tool.(ensure hisat_tool);
]
~tags:[Target_tags.aligner]
~make:(Machine.run_big_program run_with ~processors ~name
~self_ids:["hisat"; "align"]
Program.(
Machine.Tool.(init hisat_tool)
&& in_work_dir
&& sh hisat_command
))
in
match r2_path_opt with
| Some read2 ->
let hisat_command =
String.concat ~sep:" " [
hisat_base_command;
"-1"; (Filename.quote r1_path);
"-2"; (Filename.quote read2);
] in
base_hisat_target ~hisat_command
| None ->
let hisat_command = String.concat ~sep:" " [
hisat_base_command;
"-U"; (Filename.quote r1_path);
] in
base_hisat_target ~hisat_command