otsdaq  v2_05_02_indev
RunControlStateMachine.cc
1 #include "otsdaq/FiniteStateMachine/RunControlStateMachine.h"
2 #include "otsdaq/MessageFacility/MessageFacility.h"
3 
4 #include "otsdaq/Macros/CoutMacros.h"
5 #include "otsdaq/Macros/StringMacros.h"
6 
7 #include "otsdaq/SOAPUtilities/SOAPCommand.h"
8 #include "otsdaq/SOAPUtilities/SOAPUtilities.h"
9 
10 #include <toolbox/fsm/FailedEvent.h>
11 #include <xdaq/NamespaceURI.h>
12 #include <xoap/Method.h>
13 
14 #include <iostream>
15 
16 #undef __MF_SUBJECT__
17 #define __MF_SUBJECT__ "FSM"
18 #define mfSubject_ std::string("FSM-") + theStateMachine_.getStateMachineName()
19 
20 using namespace ots;
21 
22 const std::string RunControlStateMachine::FAILED_STATE_NAME = "Failed";
23 const std::string RunControlStateMachine::HALTED_STATE_NAME = "Halted";
24 
25 //==============================================================================
26 RunControlStateMachine::RunControlStateMachine(const std::string& name) : theStateMachine_(name), asyncFailureReceived_(false), asyncSoftFailureReceived_(false)
27 {
28  INIT_MF("." /*directory used is USER_DATA/LOG/.*/);
29 
30  theStateMachine_.addState('I', "Initial", this, &RunControlStateMachine::stateInitial);
31  theStateMachine_.addState('H', RunControlStateMachine::HALTED_STATE_NAME, this, &RunControlStateMachine::stateHalted);
32  theStateMachine_.addState('C', "Configured", this, &RunControlStateMachine::stateConfigured);
33  theStateMachine_.addState('R', "Running", this, &RunControlStateMachine::stateRunning);
34  theStateMachine_.addState('P', "Paused", this, &RunControlStateMachine::statePaused);
35  theStateMachine_.addState('X', "Shutdown", this, &RunControlStateMachine::stateShutdown);
36  // theStateMachine_.addState('v', "Recovering", this,
37  // &RunControlStateMachine::stateRecovering); theStateMachine_.addState('T',
38  // "TTSTestMode", this, &RunControlStateMachine::stateTTSTestMode);
39 
40  // RAR added back in on 11/20/2016.. why was it removed..
41  // exceptions like..
42  // XCEPT_RAISE (toolbox::fsm::exception::Exception, ss.str());)
43  // take state machine to "failed" otherwise
44  theStateMachine_.setStateName('F', RunControlStateMachine::FAILED_STATE_NAME); // x
45  theStateMachine_.setFailedStateTransitionAction(this, &RunControlStateMachine::enteringError);
46  theStateMachine_.setFailedStateTransitionChanged(this, &RunControlStateMachine::inError);
47 
48  // this line was added to get out of Failed state
49  RunControlStateMachine::addStateTransition('F', 'H', "Halt", "Halting", this, &RunControlStateMachine::transitionHalting);
50  RunControlStateMachine::addStateTransition('F', 'X', "Shutdown", "Shutting Down", this, &RunControlStateMachine::transitionShuttingDown);
51 
52  RunControlStateMachine::addStateTransition(
53  'H', 'C', "Configure", "Configuring", "ConfigurationAlias", this, &RunControlStateMachine::transitionConfiguring);
54  RunControlStateMachine::addStateTransition('H', 'X', "Shutdown", "Shutting Down", this, &RunControlStateMachine::transitionShuttingDown);
55  RunControlStateMachine::addStateTransition('X', 'I', "Startup", "Starting Up", this, &RunControlStateMachine::transitionStartingUp);
56 
57  // Every state can transition to halted
58  RunControlStateMachine::addStateTransition('I', 'H', "Initialize", "Initializing", this, &RunControlStateMachine::transitionInitializing);
59  RunControlStateMachine::addStateTransition('H', 'H', "Halt", "Halting", this, &RunControlStateMachine::transitionHalting);
60  RunControlStateMachine::addStateTransition('C', 'H', "Halt", "Halting", this, &RunControlStateMachine::transitionHalting);
61  RunControlStateMachine::addStateTransition('R', 'H', "Abort", "Aborting", this, &RunControlStateMachine::transitionHalting);
62  RunControlStateMachine::addStateTransition('P', 'H', "Abort", "Aborting", this, &RunControlStateMachine::transitionHalting);
63 
64  RunControlStateMachine::addStateTransition('R', 'P', "Pause", "Pausing", this, &RunControlStateMachine::transitionPausing);
65  RunControlStateMachine::addStateTransition('P', 'R', "Resume", "Resuming", this, &RunControlStateMachine::transitionResuming);
66  RunControlStateMachine::addStateTransition('C', 'R', "Start", "Starting", this, &RunControlStateMachine::transitionStarting);
67  RunControlStateMachine::addStateTransition('R', 'C', "Stop", "Stopping", this, &RunControlStateMachine::transitionStopping);
68  RunControlStateMachine::addStateTransition('P', 'C', "Stop", "Stopping", this, &RunControlStateMachine::transitionStopping);
69 
70  // NOTE!! There must be a defined message handler for each transition name created
71  // above
72  xoap::bind(this, &RunControlStateMachine::runControlMessageHandler, "Initialize", XDAQ_NS_URI);
73  xoap::bind(this, &RunControlStateMachine::runControlMessageHandler, "Configure", XDAQ_NS_URI);
74  xoap::bind(this, &RunControlStateMachine::runControlMessageHandler, "Start", XDAQ_NS_URI);
75  xoap::bind(this, &RunControlStateMachine::runControlMessageHandler, "Stop", XDAQ_NS_URI);
76  xoap::bind(this, &RunControlStateMachine::runControlMessageHandler, "Pause", XDAQ_NS_URI);
77  xoap::bind(this, &RunControlStateMachine::runControlMessageHandler, "Resume", XDAQ_NS_URI);
78  xoap::bind(this, &RunControlStateMachine::runControlMessageHandler, "Halt", XDAQ_NS_URI);
79  xoap::bind(this, &RunControlStateMachine::runControlMessageHandler, "Abort", XDAQ_NS_URI);
80  xoap::bind(this, &RunControlStateMachine::runControlMessageHandler, "Shutdown", XDAQ_NS_URI);
81  xoap::bind(this, &RunControlStateMachine::runControlMessageHandler, "Startup", XDAQ_NS_URI);
82  xoap::bind(this, &RunControlStateMachine::runControlMessageHandler, "Fail", XDAQ_NS_URI);
83  xoap::bind(this, &RunControlStateMachine::runControlMessageHandler, "Error", XDAQ_NS_URI);
84 
85  xoap::bind(this, &RunControlStateMachine::runControlMessageHandler, "AsyncError", XDAQ_NS_URI);
86  xoap::bind(this, &RunControlStateMachine::runControlMessageHandler, "AsyncSoftError", XDAQ_NS_URI);
87 
88  reset();
89 }
90 
91 //==============================================================================
92 RunControlStateMachine::~RunControlStateMachine(void) {}
93 
94 //==============================================================================
95 void RunControlStateMachine::reset(void)
96 {
97  __GEN_COUT__ << "Resetting RunControlStateMachine with name '" << theStateMachine_.getStateMachineName() << "'..." << __E__;
98  theStateMachine_.setInitialState('I');
99  theStateMachine_.reset();
100 
101  theStateMachine_.setErrorMessage("", false /*append*/); // clear error message
102 
103  asyncFailureReceived_ = false;
104  asyncSoftFailureReceived_ = false;
105 }
106 
108 //(RunControlStateMachine::stateMachineFunction_t)
109 // RunControlStateMachine::getTransitionName( const toolbox::fsm::State from,
110 // const std::string& transition)
111 //{
112 // auto itFrom = stateTransitionFunctionTable_.find(from);
113 // if(itFrom == stateTransitionFunctionTable_.end())
114 // {
115 // __GEN_SS__ << "Cannot find transition function from '" << from <<
116 // "' with transition '" << transition << "!'" << __E__;
117 // XCEPT_RAISE (toolbox::fsm::exception::Exception, ss.str());
118 // }
119 //
120 // auto itTrans = itFrom->second.find(transition);
121 // if(itTrans == itFrom->second.end())
122 // {
123 // __GEN_SS__ << "Cannot find transition function from '" << from <<
124 // "' with transition '" << transition << "!'" << __E__;
125 // XCEPT_RAISE (toolbox::fsm::exception::Exception, ss.str());
126 // }
127 //
128 // return itTrans->second;
129 //}
130 
131 //==============================================================================
132 // runControlMessageHandler
133 // Handles the command broadcast message from the Gateway Supervisor
134 // and maps the command to a transition function, allowing for multiple iteration
135 // passes through the transition function.
136 xoap::MessageReference RunControlStateMachine::runControlMessageHandler(xoap::MessageReference message)
137 
138 {
139  __GEN_COUT__ << "Received... \t" << SOAPUtilities::translate(message) << std::endl;
140 
141  std::string command = SOAPUtilities::translate(message).getCommand();
142 
143  // get iteration index
144  try
145  {
146  StringMacros::getNumber(SOAPUtilities::translate(message).getParameters().getValue("iterationIndex"), iterationIndex_);
147  }
148  catch(...) // ignore errors and set iteration index to 0
149  {
150  __GEN_COUT__ << "Defaulting iteration index to 0." << __E__;
151  iterationIndex_ = 0;
152  }
153  // get subIteration index
154  try
155  {
156  StringMacros::getNumber(SOAPUtilities::translate(message).getParameters().getValue("subIterationIndex"), subIterationIndex_);
157  }
158  catch(...) // ignore errors and set subIteration index to 0
159  {
160  __GEN_COUT__ << "Defaulting subIterationIndex_ index to 0." << __E__;
161  subIterationIndex_ = 0;
162  }
163 
164  // get retransmission indicator
165  try
166  {
167  if(SOAPUtilities::translate(message).getParameters().getValue("retransmission") == "1")
168  {
169  // handle retransmission
170 
171  // attempt to stop an error if last command was same
172  if(lastIterationCommand_ == command && lastIterationIndex_ == iterationIndex_ && lastSubIterationIndex_ == subIterationIndex_)
173  {
174  __GEN_COUT__ << "Assuming a timeout occurred at Gateway waiting for a response. "
175  << "Attempting to avoid error, by giving last result for command '" << command << "': " << lastIterationResult_ << __E__;
176  return SOAPUtilities::makeSOAPMessageReference(lastIterationResult_);
177  }
178  else
179  __GEN_COUT__ << "Looks like Gateway command '" << command << "' was lost - attempting to handle retransmission." << __E__;
180  }
181  }
182  catch(...) // ignore errors for retransmission indicator (assume it is not a
183  // retransmission)
184  {
185  ;
186  }
187  lastIterationIndex_ = iterationIndex_;
188  lastSubIterationIndex_ = subIterationIndex_;
189 
190  std::string currentState;
191  if(iterationIndex_ == 0 && subIterationIndex_ == 0)
192  {
193  // this is the first iteration attempt for this transition
194  theProgressBar_.reset(command, theStateMachine_.getStateMachineName());
195  currentState = theStateMachine_.getCurrentStateName();
196  __GEN_COUT__ << "Starting state for " << theStateMachine_.getStateMachineName() << " is " << currentState << " and attempting to " << command
197  << std::endl;
198  }
199  else
200  {
201  currentState = theStateMachine_.getStateName(lastIterationState_);
202 
203  __GEN_COUT__ << "Iteration index " << iterationIndex_ << "." << subIterationIndex_ << " for " << theStateMachine_.getStateMachineName() << " from "
204  << currentState << " attempting to " << command << std::endl;
205  }
206 
207  RunControlStateMachine::theProgressBar_.step();
208 
209  std::string result = command + "Done";
210  lastIterationResult_ = result;
211 
212  // if error is received, immediately go to fail state
213  // likely error was sent by central FSM or external xoap
214  if(command == "Error" || command == "Fail")
215  {
216  __GEN_SS__ << command << " was received! Halting immediately." << std::endl;
217  __GEN_COUT_ERR__ << "\n" << ss.str();
218 
219  try
220  {
221  if(currentState == "Configured")
222  theStateMachine_.execTransition("Halt", message);
223  else if(currentState == "Running" || currentState == "Paused")
224  theStateMachine_.execTransition("Abort", message);
225  }
226  catch(...)
227  {
228  __GEN_COUT_ERR__ << "Halting failed in reaction to " << command << "... ignoring." << __E__;
229  }
230  return SOAPUtilities::makeSOAPMessageReference(result);
231  }
232  else if(command == "AsyncError")
233  {
234  std::string errorMessage = SOAPUtilities::translate(message).getParameters().getValue("ErrorMessage");
235 
236  __GEN_SS__ << command << " was received! Error'ing immediately: " << errorMessage << std::endl;
237  __GEN_COUT_ERR__ << "\n" << ss.str();
238  theStateMachine_.setErrorMessage(ss.str());
239 
240  asyncFailureReceived_ = true; // mark flag, to be used to abort next transition
241  // determine any valid transition from where we are
242  theStateMachine_.execTransition("fail");
243  // XCEPT_RAISE (toolbox::fsm::exception::Exception, ss.str());
244 
245  return SOAPUtilities::makeSOAPMessageReference(result);
246  }
247  else if(command == "AsyncSoftError")
248  {
249  std::string errorMessage = SOAPUtilities::translate(message).getParameters().getValue("ErrorMessage");
250 
251  __GEN_SS__ << command << " was received! Pause'ing immediately: " << errorMessage << std::endl;
252  __GEN_COUT_ERR__ << "\n" << ss.str();
253  theStateMachine_.setErrorMessage(ss.str());
254 
255  if(!asyncSoftFailureReceived_) // launch pause only first time
256  {
257  asyncSoftFailureReceived_ = true; // mark flag, to be used to avoid double
258  // pausing and identify pause was due to
259  // soft error
260  theStateMachine_.execTransition("Pause");
261  }
262 
263  return SOAPUtilities::makeSOAPMessageReference(result);
264  }
265 
266  // if already Halted, respond to Initialize with "done"
267  // (this avoids race conditions involved with artdaq mpi reset)
268  if(command == "Initialize" && currentState == RunControlStateMachine::HALTED_STATE_NAME)
269  {
270  __GEN_COUT__ << "Already Initialized.. ignoring Initialize command." << std::endl;
271 
272  theStateMachine_.setErrorMessage("", false /*append*/); // clear error message
273  return SOAPUtilities::makeSOAPMessageReference(result);
274  }
275 
276  __GEN_COUTV__(command);
277  __GEN_COUTV__(currentState);
278 
279  if(command == "Halt" && currentState == "Initial")
280  {
281  __GEN_COUT__ << "Converting Halt command to Initialize, since currently in "
282  "Initialized state."
283  << std::endl;
284  command = "Initialize";
285  message = SOAPUtilities::makeSOAPMessageReference(command);
286  }
287 
288  // handle normal transitions here
289  try
290  {
291  if(!(asyncSoftFailureReceived_ && command == "Pause")) // only clear if not soft error
292  theStateMachine_.setErrorMessage("", false /*append*/); // clear error message
293 
294  iterationWorkFlag_ = false;
295  subIterationWorkFlag_ = false;
296  if(iterationIndex_ || subIterationIndex_)
297  {
298  __GEN_COUT__ << command << " iteration " << iterationIndex_ << "." << subIterationIndex_ << __E__;
299  toolbox::Event::Reference event(new toolbox::Event(command, this));
300 
301  // call inheriting transition function based on last state and command
302  {
303  // e.g. transitionConfiguring(event);
304  __GEN_COUT__ << "Iterating on the transition function from " << currentState << " through " << lastIterationCommand_ << __E__;
305 
306  auto itFrom = stateTransitionFunctionTable_.find(lastIterationState_);
307  if(itFrom == stateTransitionFunctionTable_.end())
308  {
309  __GEN_SS__ << "Cannot find transition function from '" << currentState << "' with transition '" << lastIterationCommand_ << "!'" << __E__;
310  __GEN_COUT_ERR__ << ss.str();
311  XCEPT_RAISE(toolbox::fsm::exception::Exception, ss.str());
312  }
313 
314  auto itTransition = itFrom->second.find(lastIterationCommand_);
315  if(itTransition == itFrom->second.end())
316  {
317  __GEN_SS__ << "Cannot find transition function from '" << currentState << "' with transition '" << lastIterationCommand_ << "!'" << __E__;
318  __GEN_COUT_ERR__ << ss.str();
319  XCEPT_RAISE(toolbox::fsm::exception::Exception, ss.str());
320  }
321 
322  (this->*(itTransition->second))(event); // call the transition function
323  }
324  }
325  else
326  {
327  // save the lookup parameters for the last function to be called for the case
328  // of additional iterations
329  lastIterationState_ = theStateMachine_.getCurrentState();
330  lastIterationCommand_ = command;
331 
332  theStateMachine_.execTransition(command, message);
333  }
334 
335  if(subIterationWorkFlag_) // sub-iteration has priority over 'Working'
336  {
337  __GEN_COUTV__(subIterationWorkFlag_);
338  result = command + "SubIterate"; // indicate another sub-iteration back to Gateway
339  }
340  else if(iterationWorkFlag_)
341  {
342  __GEN_COUTV__(iterationWorkFlag_);
343  result = command + "Iterate"; // indicate another iteration back to Gateway
344  }
345  }
346  catch(toolbox::fsm::exception::Exception& e)
347  {
348  __GEN_SS__ << "Run Control Message Handling Failed: " << e.what() << " " << theStateMachine_.getErrorMessage() << __E__;
349  __GEN_COUT_ERR__ << ss.str();
350  theStateMachine_.setErrorMessage(ss.str());
351 
352  result = command + " " + RunControlStateMachine::FAILED_STATE_NAME + ": " + theStateMachine_.getErrorMessage();
353  }
354  catch(...)
355  {
356  __GEN_SS__ << "Run Control Message Handling encountered an unknown error." << theStateMachine_.getErrorMessage() << __E__;
357  __GEN_COUT_ERR__ << ss.str();
358  theStateMachine_.setErrorMessage(ss.str());
359 
360  result = command + " " + RunControlStateMachine::FAILED_STATE_NAME + ": " + theStateMachine_.getErrorMessage();
361  }
362 
363  RunControlStateMachine::theProgressBar_.step();
364 
365  currentState = theStateMachine_.getCurrentStateName();
366 
367  if(currentState == RunControlStateMachine::FAILED_STATE_NAME)
368  {
369  result = command + " " + RunControlStateMachine::FAILED_STATE_NAME + ": " + theStateMachine_.getErrorMessage();
370  __GEN_COUT_ERR__ << "Unexpected Failure state for " << theStateMachine_.getStateMachineName() << " is " << currentState << std::endl;
371  __GEN_COUT_ERR__ << "Error message was as follows: " << theStateMachine_.getErrorMessage() << std::endl;
372  }
373 
374  RunControlStateMachine::theProgressBar_.step();
375 
376  if(!iterationWorkFlag_ && !subIterationWorkFlag_)
377  theProgressBar_.complete();
378  else
379  {
380  __GEN_COUTV__(theProgressBar_.read());
381  __GEN_COUTV__(theProgressBar_.isComplete());
382  }
383 
384  __GEN_COUT__ << "Ending state for " << theStateMachine_.getStateMachineName() << " is " << currentState << std::endl;
385  __GEN_COUT__ << "result = " << result << std::endl;
386  lastIterationResult_ = result;
387  return SOAPUtilities::makeSOAPMessageReference(result);
388 }