module stampede-schema { namespace "http://stampede-project.nsf.gov"; prefix "stmp"; // used when imported import ietf-inet-types { prefix "inet"; } organization "STAMPEDE project. NSF grant OCI-0943705"; contact "Dan Gunter Lawrence Berkeley National Laboratory Email: dkgunter@lbl.gov Karan Vahi USC Information Sciences Institute Email: vahi@isi.edu"; reference "https://pegasus.isi.edu/projects/stampede"; revision "2016-01-06" { description "Updated schema compatible with Pegasus 4.6 release"; } /* * Derived types * ------------- */ // (Grid) DN typedef distinguished-name { type string; } // UUID // eg: B7B27B5E-0221-4D90-AF5D-A325C2DD951C // Lowercase letters (or a mix) also accepted. typedef uuid { type string { length "36"; pattern "[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}"; } } // Integer representation of boolean value as '0' or '1' typedef intbool { type uint8 { range "0 .. 1"; } } // NetLogger timestamp typedef nl_ts { type string { pattern '(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?' + '(Z|[\+\-]\d{2}:\d{2}))|(\d{1,9}(\.\d+)?)'; } } // Integer Pegasus job types typedef peg_inttype { type uint8 { range "0 .. 11"; } } // Valid Pegasus Job Types as String typedef peg_strtype{ type enumeration { enum "unknown"; enum "compute"; enum "stage-in-tx"; enum "stage-out-tx"; enum "registration"; enum "inter-site-tx"; enum "create-dir"; enum "staged-compute"; //since 3.1 no longer generated enum "cleanup"; enum "chmod"; enum "dax"; enum "dag"; } } //Valid Condor Job States i.e the various ULOG_ events typedef condor_jobstates{ type enumeration{ enum "PRE_SCRIPT_STARTED"; enum "PRE_SCRIPT_TERMINATED"; enum "PRE_SCRIPT_SUCCESS"; enum "PRE_SCRIPT_FAILED"; enum "SUBMIT"; enum "GRID_SUBMIT"; enum "GLOBUS_SUBMIT"; enum "SUBMIT_FAILED"; enum "EXECUTE"; enum "REMOTE_ERROR"; enum "IMAGE_SIZE"; enum "JOB_TERMINATED"; enum "JOB_SUCCESS"; enum "JOB_FAILURE"; enum "JOB_HELD"; enum "JOB_EVICTED"; enum "JOB_RELEASED"; enum "POST_SCRIPT_STARTED"; enum "POST_SCRIPT_TERMINATED"; enum "POST_SCRIPT_SUCCESS"; enum "POST_SCRIPT_FAILED"; } } //Valid Condor Workflow States typedef condor_wfstates{ type enumeration{ enum "WORKFLOW_STARTED"; enum "WORKFLOW_TERMINATED"; } } /* * Standard fields * --------------- */ grouping base-event { description "Common components in all events"; leaf ts { description "Timestamp, ISO8601 or numeric seconds since 1/1/1970"; mandatory true; type nl_ts; } /*leaf event { description "Unique name of event"; type string; mandatory true; }*/ leaf level { description "Severity level of event. " + "Subset of NetLogger BP levels. " + "For '*.end' events, if status is non-zero then level should be Error."; type enumeration { enum Info; enum Error; } } leaf xwf.id { description "DAG workflow id"; type uuid; } } grouping base-job-inst { uses base-event; description "Common components for all job instance events"; leaf job_inst.id { description "Job instance identifier i.e the submit sequence generated by monitord"; type int32; mandatory true; } leaf js.id { description "Jobstate identifier"; type int32; } leaf job.id { description "Identifier for corresponding job in the DAG"; type string; mandatory true; } } grouping sched-job-inst { uses base-job-inst; description "Scheduled job instance"; leaf sched.id { description "Identifier for job in scheduler"; type string; mandatory true; } } grouping base-metadata { uses base-event; description "Common components for all metadata events that describe metadata for an entity."; leaf key { description "Key for the metadata tuple"; type string; mandatory true; } leaf value { description "Corresponding value of the key"; type string; } } /* * Event stream containing NetLogger events. * * Naming conventions: * wf = Workflow in DAX (abstract) * xwf = Workflow in DAG * xsubwf = Sub-workflow in DAG * task = DAX node * job = DAG node * job_inst = Runnable (Condor) job * inv = Executable that creates an invocation record */ container events { list event { config false; choice an_event { container stampede.wf.plan { uses base-event; leaf submit.hostname { description "The hostname of the Pegasus submit host"; type inet:host; mandatory true; } leaf dax.label { description "Label for abstract workflow specification"; type string; default "workflow"; } leaf dax.index { description "Index for the DAX"; type string; default "workflow"; } leaf dax.version { description "Version number for DAX"; type string; mandatory true; } leaf dax.file { description "Filename for for the DAX"; type string; mandatory true; } leaf dag.file.name { description "Filename for the DAG"; type string; mandatory true; } leaf planner.version { description "Version string for Pegasus planner, e.g. 3.0.0cvs"; type string; mandatory true; } leaf grid_dn { description "Grid DN of submitter"; type distinguished-name; } leaf user { description "User name of submitter"; type string; } leaf submit.dir { description "Directory path from which workflow was submitted"; type string; mandatory true; } leaf argv { description "All arguments given to planner on command-line"; type string; } leaf parent.xwf.id { description "Parent workflow in DAG, if any"; type uuid; } leaf root.xwf.id { description "Root of workflow hierarchy, in DAG. " + "Use this workflow's UUID if it is the root"; type string; mandatory true; } } container stampede.static.start{ uses base-event; } container stampede.static.end{ uses base-event; } container stampede.xwf.start { uses base-event; leaf restart_count { description "Number of times workflow was restarted (due to failures)"; type uint32; mandatory true; } } container stampede.xwf.end { uses base-event; leaf restart_count { description "Number of times workflow was restarted (due to failures)"; type uint32; mandatory true; } leaf status { description "Status of workflow. 0=success, -1=failure"; type int16; mandatory true; } } container stampede.task.info { description "Information about task in DAX"; uses base-event; leaf transformation { description "Logical name of the underlying executable"; type string; mandatory true; } leaf argv { description "All arguments given to transformation on command-line"; type string; } leaf type{ description "Type of task"; type peg_inttype; mandatory true; } leaf type_desc { description "String description of task type"; type peg_strtype; mandatory true; } leaf task.id { description "Identifier for this task in the DAX"; type string; mandatory true; } } container stampede.task.edge { description "Represents child/parent relationship between two tasks in DAX"; uses base-event; leaf parent.task.id { description "Parent task"; type string; mandatory true; } leaf child.task.id { description "Child task"; type string; mandatory true; } } container stampede.wf.map.task_job { description "Relates a DAX task to a DAG job."; uses base-event; leaf task.id { description "Identifier for the task in the DAX"; type string; mandatory true; } leaf job.id { description "Identifier for corresponding job in the DAG"; type string; mandatory true; } } container stampede.xwf.map.subwf_job { description "Relates a sub workflow to the corresponding job instance"; uses base-event; leaf subwf.id { description "Sub Workflow Identified / UUID"; type string; mandatory true; } leaf job.id { description "Identifier for corresponding job in the DAG"; type string; mandatory true; } leaf job_inst.id { description "Job instance identifier i.e the submit sequence generated by monitord"; type int32; mandatory true; } } container stampede.job.info { description "A description of a job in the DAG"; uses base-event; leaf job.id { description "Identifier for job in the DAG"; type string; mandatory true; } leaf submit_file { description "Name of file being submitted to the scheduler"; type string; mandatory true; } leaf type{ description "Type of task"; type peg_inttype; mandatory true; } leaf type_desc { description "String description of task type"; type peg_strtype; mandatory true; } leaf clustered { description "Whether job is clustered or not"; type intbool; mandatory true; } leaf max_retries { description "How many retries are allowed for this job before giving up"; type uint32; mandatory true; } leaf task_count { description "Number of DAX tasks for this job. " + "Auxiliary jobs without a task in the DAX will have the value '0'"; type uint32; mandatory true; } leaf executable { description "Program to execute"; type string; mandatory true; } leaf argv { description "All arguments given to executable (on command-line)"; type string; } } container stampede.job.edge { description "Parent/child relationship between two jobs in the DAG"; uses base-event; leaf parent.job.id { description "Parent job"; type string; mandatory true; } leaf child.job.id { description "Child job"; type string; mandatory true; } } container stampede.job_inst.pre.start { description "Start of a prescript for a job instance"; uses base-job-inst; } container stampede.job_inst.pre.term { description "Job prescript is terminated (success or failure not yet known)"; uses base-job-inst; } container stampede.job_inst.pre.end { description "End of a prescript for a job instance"; uses base-job-inst; leaf status { description "Status of prescript. 0 is success, -1 is error"; type int32; mandatory true; } leaf exitcode{ description "the exitcode with which the prescript exited"; type int32; mandatory true; } } container stampede.job_inst.submit.start { uses sched-job-inst; description "When job instance is going to be submitted. " + "Scheduler job id is not yet known"; } container stampede.job_inst.submit.end { uses sched-job-inst; description "When executable job is submitted"; leaf status { description "Status of workflow. 0=success, -1=failure"; type int16; mandatory true; } } container stampede.job_inst.held.start { uses sched-job-inst; description "When Condor holds the jobs"; } container stampede.job_inst.held.end { uses sched-job-inst; description "When the job is released after being held"; leaf status { description "Status of workflow. 0=success, -1=failure"; type int16; mandatory true; } } container stampede.job_inst.main.start { uses sched-job-inst; description "Start of execution of a scheduler job"; leaf stdin.file { description "Path to file containing standard input of job"; type string; //mandatory true; } leaf stdout.file { description "Path to file containing standard output of job"; type string; mandatory true; } leaf stderr.file { description "Path to file containing standard error of job"; type string; mandatory true; } } container stampede.job_inst.main.term { uses sched-job-inst; description "Job is terminated (success or failure not yet known)"; leaf status { description "Execution status. 0=means job terminated, -1=job was evicted, not terminated"; type int32; mandatory true; } } container stampede.job_inst.main.end { uses sched-job-inst; description "End of main part of scheduler job"; leaf stdin.file { description "Path to file containing standard input of job"; type string; } leaf stdout.file { description "Path to file containing standard output of job"; type string; mandatory true; } leaf stdout.text { description "Text containing output of job"; type string; } leaf stderr.file { description "Path to file containing standard error of job"; type string; mandatory true; } leaf stderr.text { description "Text containing standard error of job"; type string; } leaf user { description "Scheduler's name for user"; type string; } leaf site { description "DAX name for the site at which the job ran"; type string; mandatory true; } leaf work_dir { description "Path to working directory"; type string; } leaf local.dur { description "Duration as seen at the local node"; units "seconds"; type decimal64 { fraction-digits "6"; } } leaf status { description "Execution status. 0=success, -1=failure"; type int32; mandatory true; } leaf exitcode{ description "the exitcode with which the executable exited"; type int32; mandatory true; } leaf multiplier_factor{ description "the multiplier factor for use in statistics"; type int32; mandatory true; } // These values will be set if the job is clustered. leaf cluster.start { description "When the enclosing cluster started"; type nl_ts; } leaf cluster.dur { description "Duration of enclosing cluster"; units "seconds"; type decimal64 { fraction-digits "6"; } } } container stampede.job_inst.post.start { description "Start of a postscript for a job instance"; uses sched-job-inst; } container stampede.job_inst.post.term { description "Job postscript is terminated (success or failure not yet known)"; uses sched-job-inst; } container stampede.job_inst.post.end { description "End of a postscript for a job instance"; uses sched-job-inst; leaf status { description "Status of postscript. 0 is success, -1=failure"; type int32; mandatory true; } leaf exitcode{ description "the exitcode with which the postscript exited"; type int32; mandatory true; } } container stampede.job_inst.host.info { description "Host information associated with a job instance"; uses base-job-inst; leaf site { description "Site name"; type string; mandatory true; } leaf hostname { description "Host name"; type inet:host; mandatory true; } leaf ip { description "IP address"; type inet:ip-address; mandatory true; } leaf total_memory { description "Total RAM on host"; type uint64; /* mandatory true; */ } leaf uname { description "Operating system name"; type string; /* mandatory true; */ } } container stampede.job_inst.image.info { description "Image size associated with a job instance"; uses base-job-inst; leaf size { description "Image size"; type uint64; } leaf sched.id { description "Identifier for job in scheduler"; type string; mandatory true; } } container stampede.inv.start { description "Start of an invocation"; uses base-event; leaf job_inst.id { description "Job instance identifier i.e the submit sequence generated by monitord"; type int32; mandatory true; } leaf job.id { description "Identifier for corresponding job in the DAG"; type string; mandatory true; } leaf inv.id { description "Identifier for invocation. " + "Sequence number, with -1=prescript and -2=postscript"; type int32; mandatory true; } } container stampede.inv.end { description "End of an invocation"; uses base-event; leaf job_inst.id { description "Job instance identifier i.e the submit sequence generated by monitord"; type int32; mandatory true; } leaf inv.id { description "Identifier for invocation. " + "Sequence number, with -1=prescript and -2=postscript"; type int32; mandatory true; } leaf job.id { description "Identifier for corresponding job in the DAG"; type string; mandatory true; } leaf start_time{ description "The start time of the event"; type nl_ts; } leaf dur { description "Duration of invocation"; units "seconds"; type decimal64 { fraction-digits "6"; } } leaf remote_cpu_time { description "remote CPU time computed as the stime + utime"; units "seconds"; type decimal64 { fraction-digits "6"; } } leaf exitcode{ description "the exitcode with which the executable exited"; type int32; } leaf transformation { description "Transformation associated with this invocation"; type string; mandatory true; } leaf executable { description "Program executed for this invocation"; type string; mandatory true; } leaf argv { description "All arguments given to executable on command-line"; type string; } leaf task.id { description "Identifier for related task in the DAX"; type string; } } //metadata related events container stampede.static.meta.start{ uses base-event; } container stampede.static.meta.end{ uses base-event; } container stampede.xwf.meta{ description "Metadata associated with a workflow"; uses base-metadata; } container stampede.task.meta{ description "Metadata associated with a task"; uses base-metadata; leaf task.id { description "Identifier for related task in the DAX"; type string; } } container stampede.rc.meta{ description "Metadata associated with a file in the replica catalog"; uses base-metadata; leaf lfn.id { description "Logical File Identifier for the file"; type string; } } container stampede.wf.map.file{ description "Event that captures what task generates or consumes a particular file"; uses base-event; leaf lfn.id { description "Logical File Identifier for the file"; type string; } leaf task.id { description "Identifier for related task in the DAX"; type string; } } } // choice } // event } // events }