Monitoring Tasks

Labels

Sometime it is very useful to see task-specific information in GUI. For this, we can define a label. A label is a string that is attached to a task and that can be updated using the child command ecflow_client --label.

[2]:
with pf.Suite('test', host=pf.LocalHost(), files='/test') as s:
    with pf.Family('label'):
        with pf.Task('t1', script=[
            'n=1',
            'while [[ $n -le 5 ]]                   # Loop 5 times',
            'do',
            '    msg=\"The date is now $(date)\"',
            '    ecflow_client --label=info \"$msg\"  # Set the label',
            '    sleep 60                           # Wait a one minute',
            '    (( n = $n + 1 ))',
            'done',
            '',
            'ecflow_client --label=info \"I have now finished my work.\"',
        ]) as t1:
            pf.Label('info', '')

s
[2]:
suite test
  edit ECF_FILES '/test'
  edit ECF_JOB_CMD 'bash -c 'export ECF_PORT=%ECF_PORT%; export ECF_HOST=%ECF_HOST%; export ECF_NAME=%ECF_NAME%; export ECF_PASS=%ECF_PASS%; export ECF_TRYNO=%ECF_TRYNO%; export PATH=/usr/local/apps/ecflow/%ECF_VERSION%/bin:$PATH; ecflow_client --init="$$" && %ECF_JOB% && ecflow_client --complete || ecflow_client --abort ' 1> %ECF_JOBOUT% 2>&1 &'
  edit ECF_KILL_CMD 'pkill -15 -P %ECF_RID%'
  edit ECF_STATUS_CMD 'true'
  edit ECF_OUT '%ECF_HOME%'
  label exec_host "localhost"
  family label
    task t1
      label info ""
  endfamily
endsuite
[3]:
s.deploy_suite(pf.Notebook)
[3]:

File: /test/t1.ecf


#!/bin/bash

echo "Running on: $(hostname)" || true
set -uex


export ECF_PORT=%ECF_PORT%    # The server port number
export ECF_HOST=%ECF_HOST%    # The host name where the server is running
export ECF_NAME=%ECF_NAME%    # The name of this current task
export ECF_PASS=%ECF_PASS%    # A unique password
export ECF_TRYNO=%ECF_TRYNO%  # Current try number of the task

echo "Current working directory: $(pwd)"

%nopp

n=1
while [[ $n -le 5 ]]                   # Loop 5 times
do
    msg="The date is now $(date)"
    ecflow_client --label=info "$msg"  # Set the label
    sleep 60                           # Wait a one minute
    (( n = $n + 1 ))
done

ecflow_client --label=info "I have now finished my work."

%end

Late

Sometimes tasks don’t run as expected, and we want to get a notification when this is the case. For this, we use the late attribute.

A node can only have one late attribute. The late attribute only applies to a task. You can define it on a suite/family in which case it will be inherited. Any late defined lower down the hierarchy will override the aspect (submitted, active, complete) defined higher up.

-s submitted

The time node can stay submitted (format [+]hh:mm). submitted is always relative, so + is simply ignored, if present. If the node stays submitted longer than the time specified, the late flag is set.

-a active

The time of day the node must have become active (format hh:mm). If the node is still queued or submitted, the late flag is set.

-c complete

The time node must become complete (format {+}hh:mm). If relative, time is taken from the time the node became active, otherwise the node must be complete by the time given.

[4]:
with pf.Suite('test') as s:
    with pf.Family('f1'):
        with pf.Task('t1'):
            pf.Late('-s +00:15 -a 20:00 -c +02:00')

s
[4]:
suite test
  edit ECF_JOB_CMD 'bash -c 'export ECF_PORT=%ECF_PORT%; export ECF_HOST=%ECF_HOST%; export ECF_NAME=%ECF_NAME%; export ECF_PASS=%ECF_PASS%; export ECF_TRYNO=%ECF_TRYNO%; export PATH=/usr/local/apps/ecflow/%ECF_VERSION%/bin:$PATH; ecflow_client --init="$$" && %ECF_JOB% && ecflow_client --complete || ecflow_client --abort ' 1> %ECF_JOBOUT% 2>&1 &'
  edit ECF_KILL_CMD 'pkill -15 -P %ECF_RID%'
  edit ECF_STATUS_CMD 'true'
  edit ECF_OUT '%ECF_HOME%'
  label exec_host "default"
  family f1
    task t1
      late -s +00:15 -a 20:00 -c +02:00
  endfamily
endsuite

This is interpreted as: the node can stay submitted for a maximum of 15 minutes, and it must become active by 20:00 and the runtime must not exceed 2 hours.

[5]:
with pf.Suite('test') as s:
    with pf.Family('f6'):
        pf.Variable('SLEEP', 120)
        with pf.Task('t1'):
            pf.Late('-c +00:01') # set late flag if task take longer than a minute

s
[5]:
suite test
  edit ECF_JOB_CMD 'bash -c 'export ECF_PORT=%ECF_PORT%; export ECF_HOST=%ECF_HOST%; export ECF_NAME=%ECF_NAME%; export ECF_PASS=%ECF_PASS%; export ECF_TRYNO=%ECF_TRYNO%; export PATH=/usr/local/apps/ecflow/%ECF_VERSION%/bin:$PATH; ecflow_client --init="$$" && %ECF_JOB% && ecflow_client --complete || ecflow_client --abort ' 1> %ECF_JOBOUT% 2>&1 &'
  edit ECF_KILL_CMD 'pkill -15 -P %ECF_RID%'
  edit ECF_STATUS_CMD 'true'
  edit ECF_OUT '%ECF_HOME%'
  label exec_host "default"
  family f6
    edit SLEEP '120'
    task t1
      late -c +00:01
  endfamily
endsuite
[6]:
with pf.Suite('test', host=pf.LocalHost(), files='/test') as s:
    with pf.Family('label'):
        with pf.Task('t1', script=[
            'n=1',
            'while [[ $n -le 5 ]]                   # Loop 5 times',
            'do',
            '    msg="The date is now $(date)"',
            '    ecflow_client --label=info "$msg"  # Set the label',
            '    sleep 60                           # Wait a one minute',
            '    (( n = $n + 1 ))',
            'done',
            '',
            'ecflow_client --label=info "I have now finished my work."',
        ]) as t1:
            pf.Label('info', '')

s
[6]:
suite test
  edit ECF_FILES '/test'
  edit ECF_JOB_CMD 'bash -c 'export ECF_PORT=%ECF_PORT%; export ECF_HOST=%ECF_HOST%; export ECF_NAME=%ECF_NAME%; export ECF_PASS=%ECF_PASS%; export ECF_TRYNO=%ECF_TRYNO%; export PATH=/usr/local/apps/ecflow/%ECF_VERSION%/bin:$PATH; ecflow_client --init="$$" && %ECF_JOB% && ecflow_client --complete || ecflow_client --abort ' 1> %ECF_JOBOUT% 2>&1 &'
  edit ECF_KILL_CMD 'pkill -15 -P %ECF_RID%'
  edit ECF_STATUS_CMD 'true'
  edit ECF_OUT '%ECF_HOME%'
  label exec_host "localhost"
  family label
    task t1
      label info ""
  endfamily
endsuite

Meters

A meter is very similar to an event. Instead of being a boolean value (on/off), it can take a range of integer values. Other tasks are then triggered when the meter reaches a certain value.

Like events, meters have names and a task can have several of them.

[7]:
with pf.Suite('test', host=pf.LocalHost(), files='/test') as s:
    with pf.Family('f1'):
        pf.Variable('SLEEP', 20)
        with pf.Task('t1') as t1:
            pf.Meter('progress', 1, 100, 90)
        with pf.Task('t2', script=[
            'echo "I will now sleep for %SLEEP% seconds"',
            'sleep %SLEEP%',
            'n=1',
            'while [[ $n -le 100 ]]                   # Loop 100 times',
            'do',
            '    sleep 1                              # Wait a short time',
            '    ecflow_client --meter=progress $n    # Notify ecFlow',
            '    (( n = $n + 1 ))',
            'done',
        ]) as t2:
            pf.Event('a')
            pf.Event('b')
        t3 = pf.Task('t3')
        t4 = pf.Task('t4')
        t5 = pf.Task('t5')
        t6 = pf.Task('t6')
        t7 = pf.Task('t7')

    t2.triggers = t1
    t3.triggers = t2.a
    t4.completes = t2.b
    t4.triggers = t2
    t5.triggers = t1.progress >= 30
    t6.triggers = t1.progress >= 60
    t7.triggers = t1.progress >= 90

s
[7]:
suite test
  edit ECF_FILES '/test'
  edit ECF_JOB_CMD 'bash -c 'export ECF_PORT=%ECF_PORT%; export ECF_HOST=%ECF_HOST%; export ECF_NAME=%ECF_NAME%; export ECF_PASS=%ECF_PASS%; export ECF_TRYNO=%ECF_TRYNO%; export PATH=/usr/local/apps/ecflow/%ECF_VERSION%/bin:$PATH; ecflow_client --init="$$" && %ECF_JOB% && ecflow_client --complete || ecflow_client --abort ' 1> %ECF_JOBOUT% 2>&1 &'
  edit ECF_KILL_CMD 'pkill -15 -P %ECF_RID%'
  edit ECF_STATUS_CMD 'true'
  edit ECF_OUT '%ECF_HOME%'
  label exec_host "localhost"
  family f1
    edit SLEEP '20'
    task t1
      meter progress 1 100 90
    task t2
      trigger t1 eq complete
      event a
      event b
    task t3
      trigger t2:a
    task t4
      complete t2:b
      trigger t2 eq complete
    task t5
      trigger t1:progress ge 30
    task t6
      trigger t1:progress ge 60
    task t7
      trigger t1:progress ge 90
  endfamily
endsuite