struct
type input_fastq = [
| `Paired_end of File.t list * File.t list
| `Single_end of File.t list
]
let input_fastq ~dataset (fastqs: input_fastq) =
let is_fastq_gz p =
Filename.check_suffix p "fastq.gz" || Filename.check_suffix p "fq.gz" in
let is_fastq p =
Filename.check_suffix p "fastq" || Filename.check_suffix p "fq" in
let theyre_all l f = List.for_all l ~f:(fun file -> f file#product#path) in
let bring_to_single_fastq l =
match l with
| [] -> failwithf "Dataset %S seems empty" dataset
| gzs when theyre_all gzs is_fastq_gz ->
Gunzip_concat (List.map gzs (fun f -> Fastq_gz f))
| fqs when theyre_all fqs is_fastq ->
Concat_text (List.map fqs (fun f -> Fastq f))
| not_supported ->
failwithf
"For now, a sample must be a uniform list of fastq.gz/fq.gz or .fq/.fastq files. Dataset %S does not qualify: [%s]
"
dataset
(List.map not_supported ~f:(fun f -> Filename.basename f#product#path)
|> String.concat ~sep:", ")
in
let sample_info = {sample_name = dataset; fragment_id = dataset} in
match fastqs with
| `Paired_end (l1, l2) ->
Paired_end_sample (sample_info, bring_to_single_fastq l1, bring_to_single_fastq l2)
| `Single_end l ->
Single_end_sample (sample_info, bring_to_single_fastq l)
let bam ~dataset bam = Bam_sample (dataset, bam)
let bam_to_fastq how bam = Bam_to_fastq (how, bam)
let bwa ?(configuration = Bwa.Configuration.Aln.default) fastq =
Bwa (configuration, fastq)
let bwa_aln = bwa
let bwa_mem ?(configuration = Bwa.Configuration.Mem.default) fastq =
Bwa_mem (configuration, fastq)
let mosaik fastq = Mosaik fastq
let star ?(configuration = Star.Configuration.Align.default) fastq =
Star (configuration, fastq)
let hisat ?(configuration = Hisat.Configuration.default_v1) fastq =
Hisat (configuration, fastq)
let stringtie ?(configuration = Stringtie.Configuration.default) bam =
Stringtie (configuration, bam)
let gatk_indel_realigner
?(configuration=Gatk.Configuration.default_indel_realigner)
bam
= Gatk_indel_realigner (configuration, bam)
let picard_mark_duplicates
?(settings=Picard.Mark_duplicates_settings.default) bam =
Picard_mark_duplicates (settings, bam)
let gatk_bqsr ?(configuration=Gatk.Configuration.default_bqsr) bam = Gatk_bqsr (configuration, bam)
let pair ~normal ~tumor = Bam_pair (normal, tumor)
let germline_variant_caller t input_bam =
Germline_variant_caller (t, input_bam)
let gatk_haplotype_caller input_bam =
let configuration_name = "default" in
let configuration_json =
`Assoc [
"Name", `String configuration_name;
] in
let make_target
~run_with ~input ~result_prefix ?more_edges () =
match input with
| Variant_caller.Germline input_bam ->
Gatk.haplotype_caller ?more_edges ~run_with
~input_bam ~result_prefix `Map_reduce in
germline_variant_caller
{Variant_caller.name = "Gatk-HaplotypeCaller";
configuration_json;
configuration_name;
make_target;}
input_bam
let somatic_variant_caller t bam_pair =
Somatic_variant_caller (t, bam_pair)
let mutect ?(configuration=Mutect.Configuration.default) bam_pair =
let configuration_name = configuration.Mutect.Configuration.name in
let configuration_json = Mutect.Configuration.to_json configuration in
let make_target
~run_with ~input ~result_prefix ?more_edges () =
match input with | Variant_caller.Somatic {normal; tumor} ->
Mutect.run
~configuration
?more_edges
~run_with
~normal ~tumor
~result_prefix `Map_reduce in
somatic_variant_caller
{Variant_caller.name = "Mutect";
configuration_json;
configuration_name;
make_target;}
bam_pair
let mutect2 ?(configuration=Gatk.Configuration.Mutect2.default) bam_pair =
let configuration_name = configuration.Gatk.Configuration.Mutect2.name in
let configuration_json = Gatk.Configuration.Mutect2.to_json configuration in
let make_target
~run_with ~input ~result_prefix ?more_edges () =
match input with
| Variant_caller.Somatic {normal; tumor} ->
Gatk.mutect2
~configuration ?more_edges ~run_with
~input_normal_bam:normal ~input_tumor_bam:tumor
~result_prefix `Map_reduce in
somatic_variant_caller
{Variant_caller.name = "Mutect";
configuration_json;
configuration_name;
make_target;}
bam_pair
let somaticsniper
?(configuration = Somaticsniper.Configuration.default)
bam_pair =
let make_target
~run_with ~input ~result_prefix ?more_edges () =
match input with
| Variant_caller.Somatic {normal; tumor} ->
Somaticsniper.run
~configuration ~run_with ~normal ~tumor ~result_prefix () in
somatic_variant_caller
{Variant_caller.name = "Somaticsniper";
configuration_json = Somaticsniper.Configuration.to_json configuration;
configuration_name = Somaticsniper.Configuration.name configuration;
make_target;}
bam_pair
let varscan_somatic ?adjust_mapq bam_pair =
let configuration_name =
sprintf "amq-%s"
(Option.value_map ~default:"NONE" adjust_mapq ~f:Int.to_string) in
let configuration_json =
`Assoc [
"Name", `String configuration_name;
"Adjust_mapq",
`String (Option.value_map adjust_mapq ~f:Int.to_string ~default:"None");
] in
somatic_variant_caller
{Variant_caller.name = "Varscan-somatic";
configuration_json;
configuration_name;
make_target = begin
fun ~run_with ~input ~result_prefix ?more_edges () ->
match input with | Variant_caller.Somatic {normal; tumor} ->
Varscan.somatic_map_reduce ?adjust_mapq
?more_edges ~run_with ~normal ~tumor ~result_prefix ()
end}
bam_pair
let strelka ~configuration bam_pair =
somatic_variant_caller
{Variant_caller.name = "Strelka";
configuration_json = Strelka.Configuration.to_json configuration;
configuration_name = configuration.Strelka.Configuration.name;
make_target =
fun ~run_with ~input ~result_prefix ?more_edges () ->
match input with | Variant_caller.Somatic {normal; tumor} ->
Strelka.run
?more_edges
~configuration ~normal ~tumor
~run_with ~result_prefix
()
}
bam_pair
let virmid ~configuration bam_pair =
somatic_variant_caller
{Variant_caller.name = "Virmid";
configuration_json = Virmid.Configuration.to_json configuration;
configuration_name = configuration.Virmid.Configuration.name;
make_target =
fun ~run_with ~input ~result_prefix
?more_edges () ->
match input with | Variant_caller.Somatic {normal; tumor} ->
Virmid.run
?more_edges
~configuration ~normal ~tumor
~run_with ~result_prefix
()
}
bam_pair
let muse ~configuration bam_pair =
let make_target
~(run_with: Machine.t) ~input ~result_prefix
?more_edges () =
match input with | Variant_caller.Somatic {normal; tumor} ->
Muse.run ~configuration ?more_edges
~run_with ~normal ~tumor ~result_prefix `Map_reduce in
somatic_variant_caller
{Variant_caller.name = "Muse";
configuration_json = Muse.Configuration.to_json configuration;
configuration_name = configuration.Muse.Configuration.name;
make_target }
bam_pair
let seq2hla fastq_sample = Seq2HLA fastq_sample
let optitype kind fastq_sample = Optitype (kind, fastq_sample)
let add_tags ?(recursively = false) tags pipeline =
With_metadata ((if recursively then `Add_tags_rec tags else `Add_tags tags),
pipeline)
end