(Bfx : Semantics.Bioinformatics_base) = struct
let fastq_of_files ~sample_name ?fragment_id ~r1 ?r2 () =
let is_gz r =
Filename.check_suffix r ".gz" || Filename.check_suffix r ".fqz"
in
match is_gz r1, Option.map ~f:is_gz r2 with
| true, None
| true, Some true ->
let r1 = Bfx.input_url r1 in
let r2 = Option.map ~f:Bfx.input_url r2 in
Bfx.(fastq_gz ~sample_name ?fragment_id ~r1 ?r2 () |> gunzip)
| false, None
| false, Some false ->
let r1 = Bfx.input_url r1 in
let r2 = Option.map ~f:Bfx.input_url r2 in
Bfx.(fastq ~sample_name ?fragment_id ~r1 ?r2 ())
| _ ->
failwithf "fastq_of_files: cannot handle mixed gzipped and non-gzipped fastq pairs (for a given same fragment)"
let bam_of_input_exn u =
let open Input in
match u with
| Fastq _ -> failwith "Can't pass Input.t Fastq to bam_of_input_exn"
| Bam {bam_sample_name; path; how; sorting; reference_build} ->
let f = Bfx.input_url path in
Bfx.bam ~sample_name:bam_sample_name ?sorting ~reference_build f
let fastq_of_input u =
let open Input in
match u with
| Bam {bam_sample_name; path; how; sorting; reference_build} ->
let f = Bfx.input_url path in
let bam =
Bfx.bam ~sample_name:bam_sample_name ?sorting ~reference_build f in
Bfx.list [Bfx.bam_to_fastq how bam]
| Fastq {fastq_sample_name; files} ->
let sample_name = fastq_sample_name in
List.map files ~f:(fun (fragment_id, source) ->
match source with
| PE (r1, r2) ->
fastq_of_files ~sample_name ?fragment_id ~r1 ~r2 ()
| SE r ->
fastq_of_files ~sample_name ?fragment_id ~r1:r ()
| Of_bam (how, sorting, reference_build, path) ->
let f = Bfx.input_url path in
let bam = Bfx.bam ~sample_name ?sorting ~reference_build f in
Bfx.bam_to_fastq ?fragment_id how bam
) |> Bfx.list
let bwa_mem_opt_inputs_exn inp =
let open Input in
let is_gz r =
Filename.check_suffix r ".gz" || Filename.check_suffix r ".fqz" in
let inputs =
match inp with
| Bam _ -> failwith "Can't pass Input.t Bam to bwa_mem_opt_inputs"
| Fastq {fastq_sample_name; files} ->
let sample_name = fastq_sample_name in
List.map files ~f:(fun (fragment_id, source) ->
match source with
| PE (r1, r2) when is_gz r1 && is_gz r2 ->
`Fastq_gz Bfx.(
let r1 = input_url r1 in
let r2 = input_url r2 in
Bfx.fastq_gz ~sample_name ?fragment_id ~r1 ~r2 ()
)
| PE (r1, r2) when not (is_gz r1 || is_gz r2) ->
`Fastq Bfx.(
let r1 = input_url r1 in
let r2 = input_url r2 in
Bfx.fastq ~sample_name ?fragment_id ~r1 ~r2 ()
)
| PE _ ->
failwithf "Heterogeneous gzipped / non-gzipped input paired-end FASTQs not implemented"
| SE r when is_gz r ->
`Fastq_gz Bfx.(
let r1 = input_url r in
Bfx.fastq_gz ~sample_name ?fragment_id ~r1 ()
)
| SE r ->
`Fastq_gz Bfx.(
let r1 = input_url r in
Bfx.fastq_gz ~sample_name ?fragment_id ~r1 ()
)
| Of_bam (how, sorting, reference_build, path) ->
let f = Bfx.input_url path in
let bam = Bfx.bam ~sample_name ?sorting ~reference_build f in
`Bam (bam, how)
)
in
inputs
end