module EDSL:sig
..end
Many functions may raise exceptions when called improperly, but this
should happen while building the workflow, not after it starts running.
module Host:sig
..end
module Program:sig
..end
module Condition:sig
..end
class type user_artifact =object
..end
host
(see also Artifact.t
).
val file : ?host:Host.t -> string -> user_artifact
val unit : user_artifact
class type user_target =object
..end
Target.t
).
val target : ?active:bool ->
?depends_on:user_target list ->
?make:Ketrew_pure.Target.Build_process.t ->
?done_when:Ketrew_pure.Target.Condition.t ->
?metadata:[ `String of string ] ->
?product:user_artifact ->
?equivalence:Ketrew_pure.Target.Equivalence.t ->
?on_failure_activate:user_target list ->
?on_success_activate:user_target list ->
?tags:string list -> string -> user_target
string
) is its name, then all optional arguments mean:
?active
: whether this target should be started by the engine or
wait to be ativated by another target (through depends_on
or
on_{success,failure}_activate
) (default:
false
, i.e., inactive). Usual workflows should not set this
value since the function Ketrew.Cliean.submit
will activate the
toplevel target automatically.?depends_on
: list of the dependencies of the target.?make
: the build-process used to “build” the target; where the
computation happens.?done_when
: the condition that the target ensures (checked
before potentially running and after running).?metadata
: arbitrary metadata to attach to the target.?product
: the Ketrew.EDSL.user_artifact
that the target embeds (returned
by the #product
method of the target).?equivalence
: how to tell if two targets are equivalent (and
then will be merged by the engine). The default is
`Same_active_condition
which means that if two targets have the
same non-None
?done_when
argument they will be considered
equivalent (i.e. they try to “ensure the same condition”).?on_failure_activate
: targets to activate when this target fails.?on_success_activate
: targets to activate when this target succeeds.?tags
: arbitrary tags to add to the target (e.g. for
search/filter in the UI)val file_target : ?depends_on:user_target list ->
?make:Ketrew_pure.Target.Build_process.t ->
?metadata:[ `String of string ] ->
?name:string ->
?host:Host.t ->
?equivalence:Ketrew_pure.Target.Equivalence.t ->
?on_failure_activate:user_target list ->
?on_success_activate:user_target list ->
?tags:string list -> string -> user_target
Ketrew.EDSL.user_artifact
and the Ketrew.EDSL.user_target
that produces it.
The ?product
of the target will be the file given as argument on
the host given by the ?host
option (default: localhost using "/tmp"
).
The ?done_when
condition will be the existence of that file.
This can be seen as a classical make
-like file-producing target,
but on any arbitrary host.
val daemonize : ?starting_timeout:float ->
?call_script:(string -> string list) ->
?using:[ `Nohup_setsid | `Python_daemon ] ->
?host:Host.t ->
?no_log_is_ok:bool ->
Program.t -> Ketrew_pure.Target.Build_process.t
?host
: the Host.t
on which the program is to be run.?starting_timeout
: how long to wait before considering that a
script failed to start (default: 5.
seconds).?call_script
: function creating a Unix.exec
-style command
given a shell script path
(default: (fun script -> ["bash"; script])
).?using
: which method to use when damonizing on the host
(see Ketrew_daemonize
for more details).?no_log_is_ok
: consider that if the script run does not
produce a log file, the process still has succeeded (the default
and most common is false
, this can be useful for example when
the Program.t
or call_script
do something special over the
network).val lsf : ?host:Host.t ->
?queue:string ->
?name:string ->
?wall_limit:string ->
?processors:[ `Min of int | `Min_max of int * int ] ->
?project:string ->
Program.t -> Ketrew_pure.Target.Build_process.t
val pbs : ?host:Host.t ->
?queue:string ->
?name:string ->
?wall_limit:[ `Hours of float ] ->
?processors:int ->
?email_user:[ `Always of string | `Never ] ->
?shell:string ->
Program.t -> [> `Long_running of string * string ]
val yarn_application : ?host:Host.t ->
?daemonize_using:[ `Nohup_setsid | `Python_daemon ] ->
?daemon_start_timeout:float ->
Program.t -> [> `Long_running of string * string ]
?host
: the “login” node of the Yarn cluster (default: localhost).?daemonize_using
: how to daemonize the process that calls and
waits-for the application-manager (default: `Python_daemon
).?daemon_start_timeout
: the timeout for the daemon.val yarn_distributed_shell : ?host:Host.t ->
?daemonize_using:[ `Nohup_setsid | `Python_daemon ] ->
?daemon_start_timeout:float ->
?hadoop_bin:string ->
?distributed_shell_shell_jar:string ->
container_memory:[ `GB of int | `MB of int | `Raw of string ] ->
timeout:[ `Raw of string | `Seconds of int ] ->
application_name:string ->
Program.t -> [> `Long_running of string * string ]
?host
: the “login” node of the Yarn cluster (default: localhost).?daemonize_using
: how to daemonize the process that calls and
waits-for the application-manager (default: `Python_daemon
).?daemon_start_timeout
: the timeout for the daemon.hadoop_bin
: the hdaoop
executable (default: "hadoop"
).distributed_shell_shell_jar
:
path to the Jar file containing the
org.apache.hadoop.yarn.applications.distributedshell.Client
class
(default: "/opt/cloudera/parcels/CDH/lib/hadoop-yarn/hadoop-yarn-applications-distributedshell.jar"
which seems to be the default installation path when using Cloudera-manager).container_memory
: how much memory to request from Yarn for the container
(`GB 42
for 42 GB; `Raw some_string
to pass directly some_string
to the option "-container_memory"
of distributedshell.Cllient
).timeout
: the “whole application” timeout
(`Seconds (24 * 60 * 60)
for about a day, `Raw some_string
to
pass directly some_string
to the option "-timeout"
of
distributedshell.Cllient
).application_name
: name of the application for Yarn (it is not
sanitized by Ketrew and, at least with some configurations, Yarn
can fail if this string contains spaces for example).val to_display_string : ?ansi_colors:bool -> ?indentation:int -> user_target -> string