let input_fastq ~dataset (fastqs: input_fastq) =
let is_fastq_gz p =
Filename.check_suffix p "fastq.gz" || Filename.check_suffix p "fq.gz" in
let is_fastq p =
Filename.check_suffix p "fastq" || Filename.check_suffix p "fq" in
let theyre_all l f = List.for_all l ~f:(fun file -> f file#product#path) in
let bring_to_single_fastq l =
match l with
| [] -> failwithf "Dataset %S seems empty" dataset
| gzs when theyre_all gzs is_fastq_gz ->
Gunzip_concat (List.map gzs (fun f -> Fastq_gz f))
| fqs when theyre_all fqs is_fastq ->
Concat_text (List.map fqs (fun f -> Fastq f))
| not_supported ->
failwithf
"For now, a sample must be a uniform list of fastq.gz/fq.gz or .fq/.fastq files. Dataset %S does not qualify: [%s]
"
dataset
(List.map not_supported ~f:(fun f -> Filename.basename f#product#path)
|> String.concat ~sep:", ")
in
let sample_info = {sample_name = dataset; fragment_id = dataset} in
match fastqs with
| `Paired_end (l1, l2) ->
Paired_end_sample (sample_info, bring_to_single_fastq l1, bring_to_single_fastq l2)
| `Single_end l ->
Single_end_sample (sample_info, bring_to_single_fastq l)