module EDSL:sig
..end
Many functions may raise exceptions when called improperly, but this
should happen while building the workflow, not after it starts running.
module Host:sig
..end
module Program:sig
..end
module Condition:sig
..end
module Build_process:sig
..end
val daemonize : ?starting_timeout:float ->
?call_script:(string -> string list) ->
?using:[ `Nohup_setsid | `Python_daemon ] ->
?host:Host.t ->
?no_log_is_ok:bool -> Program.t -> Build_process.t
?host
: the Host.t
on which the program is to be run.?starting_timeout
: how long to wait before considering that a
script failed to start (default: 5.
seconds).?call_script
: function creating a Unix.exec
-style command
given a shell script path
(default: (fun script -> ["bash"; script])
).?using
: which method to use when damonizing on the host
(see Ketrew_daemonize
for more details).?no_log_is_ok
: consider that if the script run does not
produce a log file, the process still has succeeded (the default
and most common is false
, this can be useful for example when
the Program.t
or call_script
do something special over the
network).val lsf : ?host:Host.t ->
?queue:string ->
?name:string ->
?wall_limit:string ->
?processors:[ `Min of int | `Min_max of int * int ] ->
?project:string ->
?request_memory:[ `GB of int | `MB of int ] ->
?raw_options:string list ->
Program.t -> Build_process.t
val pbs : ?host:Host.t ->
?queue:string ->
?name:string ->
?wall_limit:[ `Hours of float ] ->
?processors:int ->
?email_user:[ `Always of string | `Never ] ->
?shell:string -> Program.t -> Build_process.t
val yarn_application : ?host:Host.t ->
?daemonize_using:[ `Nohup_setsid | `Python_daemon ] ->
?daemon_start_timeout:float ->
Program.t -> Build_process.t
?host
: the “login” node of the Yarn cluster (default: localhost).?daemonize_using
: how to daemonize the process that calls and
waits-for the application-manager (default: `Python_daemon
).?daemon_start_timeout
: the timeout for the daemon.val yarn_distributed_shell : ?host:Host.t ->
?daemonize_using:[ `Nohup_setsid | `Python_daemon ] ->
?daemon_start_timeout:float ->
?hadoop_bin:string ->
?distributed_shell_shell_jar:string ->
?container_vcores:int ->
container_memory:[ `GB of int | `MB of int | `Raw of string ] ->
timeout:[ `Raw of string | `Seconds of int ] ->
application_name:string ->
Program.t -> Build_process.t
?host
: the “login” node of the Yarn cluster (default: localhost).?daemonize_using
: how to daemonize the process that calls and
waits-for the application-manager (default: `Python_daemon
).?daemon_start_timeout
: the timeout for the daemon.hadoop_bin
: the hdaoop
executable (default: "hadoop"
).distributed_shell_shell_jar
:
path to the Jar file containing the
org.apache.hadoop.yarn.applications.distributedshell.Client
class
(default: "/opt/cloudera/parcels/CDH/lib/hadoop-yarn/hadoop-yarn-applications-distributedshell.jar"
which seems to be the default installation path when using Cloudera-manager).container_vcores
: how many virtual cores to request (default 1
).container_memory
: how much memory to request from Yarn for the container
(`GB 42
for 42 GB; `Raw some_string
to pass directly some_string
to the option "-container_memory"
of distributedshell.Cllient
).timeout
: the “whole application” timeout
(`Seconds (24 * 60 * 60)
for about a day, `Raw some_string
to
pass directly some_string
to the option "-timeout"
of
distributedshell.Cllient
).application_name
: name of the application for Yarn (it is not
sanitized by Ketrew and, at least with some configurations, Yarn
can fail if this string contains spaces for example).module Internal_representation:sig
..end
type< is_done : Condition.t option; .. >
product =< is_done : Condition.t option; .. > as 'a
Ketrew.EDSL.Condition.t
.type < is_done : Condition.t option; .. >
workflow_node = <
|
product : |
|
render : |
#product
method.
The #render
method is used internally by
Ketrew.Client.submit_workflow
.
type
workflow_edge
val depends_on : < is_done : Condition.t option; .. > workflow_node ->
workflow_edge
val on_success_activate : < is_done : Condition.t option; .. > workflow_node ->
workflow_edge
val on_failure_activate : < is_done : Condition.t option; .. > workflow_node ->
workflow_edge
typeensures_option =
[ `Is_verified of Condition.t | `Nothing | `Product_is_done ]
?ensures
argument of Ketrew.EDSL.workflow_node
.val workflow_node : ?name:string ->
?active:bool ->
?make:Build_process.t ->
?ensures:ensures_option ->
?metadata:[ `String of string ] ->
?equivalence:Ketrew_pure.Target.Equivalence.t ->
?tags:string list ->
?edges:workflow_edge list ->
(< is_done : Condition.t option; .. > as 'a) product ->
'a workflow_node
?name
: give a name to the node (visible in the UIs).?active
: whether this node should be started by the engine or
wait to be ativated by another node (through an edge)
The default is false
, i.e., inactive, normal workflows should not
set this value since the function Ketrew.Client.submit_workflow
will activate the toplevel node automatically.?make
: the build-process used to “run/build” the node; where the
computation happens.?ensures
: decides which condition the node has to ensure (checked
before potentially running and after running): `Nothing
: no condition, i.e. only dependencies will be
activated and when all succeed the node will run the ?make
and only this
will be checked for success.`Is_verified cond
: use the condition cond
(overrides the one
potentially provided by the 'a product
).`Product_is_done
: use the condition of the 'a product
; this is the
default, if the product has no condition, this is equivalent to
`Nothing
.?metadata
: arbitrary metadata to attach to the node.?equivalence
: how to tell if two nodes are equivalent (and
then will be merged by the engine). The default is
`Same_active_condition
which means that if two nodes have the
same non-None
?done_when
argument they will be considered
equivalent (i.e. they try to “ensure the same condition”).?tags
: arbitrary tags to add to the node (e.g. for
search/filter in the UI)?edges
: links to other nodes from the current node
(list of edges created with the Ketrew.EDSL.depends_on
,
Ketrew.EDSL.on_failure_activate
, and Ketrew.EDSL.on_success_activate
functions).'product_type product
: the main argument of the function is
the artifact produced by the node (returned by the #product
method of the node).type
not_already_done = <
|
is_done : |
val without_product : not_already_done
?is_done
argument) will always run.
This can be understood as a ".PHONY"
target in
make.
type
single_file = <
|
exists : |
|
host : |
|
is_bigger_than : |
|
is_done : |
|
path : |
Host.t
).val single_file : ?host:Host.t ->
string -> single_file product
single_file
product.
The path argument should be absolute since the notion of “current directory” is very ill-defined when it comes to this kind of distributed application.
The condition returned by the #is_done
method (used by default
in any worfklow-node that uses the single_file product
) is to
check the existence of the file.
type
list_of_files = <
|
is_done : |
|
paths : |
Host.t
).val list_of_files : ?host:Host.t ->
string list -> list_of_files product
list_of_files
product (#is_done
checks the existence
of all these files).type
unknown_product = <
|
is_done : |
Ketrew.EDSL.forget_product
.val forget_product : < is_done : Condition.t option; .. > workflow_node ->
unknown_product workflow_node
unknown_product workflow_node
type, this is
useful to make some programs that generate workflows type check
(putting nodes into lists, or in different branches of a match
.. with
).val workflow_to_string : ?ansi_colors:bool ->
?indentation:int ->
< is_done : Condition.t option; .. > workflow_node ->
string
?recursive:bool ->
< is_done : Condition.t option; .. > workflow_node ->
string list -> unit
: recursive
is true
, follow the
edges
recursively to add tags.val node_id : < is_done : Condition.t option; .. > workflow_node ->
string
val node_name : < is_done : Condition.t option; .. > workflow_node ->
string
This is the old and deprecated API to build workflows (deprecated since Ketrew 2.1.0).
Using functions like Ketrew.EDSL.target
is still possible but they will trigger
a compilation warning e.g. "Warning 3: deprecated: Ketrew.EDSL.target"
.
class type user_artifact =object
..end
val file : ?host:Host.t -> string -> user_artifact
val unit : user_artifact
class type user_target =object
..end
Target.t
).
val target : ?active:bool ->
?depends_on:user_target list ->
?make:Build_process.t ->
?done_when:Ketrew_pure.Target.Condition.t ->
?metadata:[ `String of string ] ->
?product:user_artifact ->
?equivalence:Ketrew_pure.Target.Equivalence.t ->
?on_failure_activate:user_target list ->
?on_success_activate:user_target list ->
?tags:string list -> string -> user_target
string
) is its name, then all optional arguments mean:
?active
: whether this target should be started by the engine or
wait to be ativated by another target (through depends_on
or
on_{success,failure}_activate
) (default:
false
, i.e., inactive). Usual workflows should not set this
value since the function Ketrew.Client.submit
will activate the
toplevel target automatically.?depends_on
: list of the dependencies of the target.?make
: the build-process used to “build” the target; where the
computation happens.?done_when
: the condition that the target ensures (checked
before potentially running and after running).?metadata
: arbitrary metadata to attach to the target.?product
: the Ketrew.EDSL.user_artifact
that the target embeds (returned
by the #product
method of the target).?equivalence
: how to tell if two targets are equivalent (and
then will be merged by the engine). The default is
`Same_active_condition
which means that if two targets have the
same non-None
?done_when
argument they will be considered
equivalent (i.e. they try to “ensure the same condition”).?on_failure_activate
: targets to activate when this target fails.?on_success_activate
: targets to activate when this target succeeds.?tags
: arbitrary tags to add to the target (e.g. for
search/filter in the UI)val file_target : ?depends_on:user_target list ->
?make:Build_process.t ->
?metadata:[ `String of string ] ->
?name:string ->
?host:Host.t ->
?equivalence:Ketrew_pure.Target.Equivalence.t ->
?on_failure_activate:user_target list ->
?on_success_activate:user_target list ->
?tags:string list -> string -> user_target
Ketrew.EDSL.user_artifact
and the Ketrew.EDSL.user_target
that produces it.
The ?product
of the target will be the file given as argument on
the host given by the ?host
option (default: localhost using "/tmp"
).
The ?done_when
condition will be the existence of that file.
This can be seen as a classical make
-like file-producing target,
but on any arbitrary host.
val to_display_string : ?ansi_colors:bool -> ?indentation:int -> user_target -> string