otsdaq  v2_05_02_indev
XmlDocument.cc
1 #include "otsdaq/XmlUtilities/XmlDocument.h"
2 #include "otsdaq/Macros/CoutMacros.h"
3 #include "otsdaq/Macros/MessageTools.h"
4 #include "otsdaq/MessageFacility/MessageFacility.h"
5 #include "otsdaq/XmlUtilities/ConvertFromXML.h"
6 #include "otsdaq/XmlUtilities/ConvertToXML.h"
7 
8 #include <stdexcept>
9 #include <xercesc/dom/DOM.hpp>
10 #include <xercesc/dom/DOMDocument.hpp>
11 #include <xercesc/dom/DOMDocumentType.hpp>
12 #include <xercesc/dom/DOMElement.hpp>
13 #include <xercesc/dom/DOMImplementation.hpp>
14 #include <xercesc/dom/DOMImplementationLS.hpp>
15 #include <xercesc/dom/DOMImplementationRegistry.hpp>
16 #include <xercesc/parsers/XercesDOMParser.hpp>
17 //#include <xercesc/dom/DOMLSSerializer.hpp>
18 //#include <xercesc/dom/DOMLSOutput.hpp>
19 #include <xercesc/dom/DOMNodeIterator.hpp>
20 #include <xercesc/dom/DOMNodeList.hpp>
21 #include <xercesc/dom/DOMText.hpp>
22 #include <xercesc/validators/common/Grammar.hpp>
23 
24 #include <xercesc/parsers/XercesDOMParser.hpp>
25 #include <xercesc/util/XMLUni.hpp>
26 #include <xercesc/util/XercesDefs.hpp>
27 
28 #include <xercesc/framework/LocalFileFormatTarget.hpp>
29 #include <xercesc/util/OutOfMemoryException.hpp>
30 
31 #include <boost/regex.hpp>
32 
33 #include <iostream>
34 #include <list>
35 #include <sstream>
36 
37 #include <errno.h>
38 #include <sys/stat.h>
39 #include <sys/types.h>
40 #include <unistd.h>
41 
42 using namespace ots;
43 
44 //==============================================================================
45 XmlDocument::XmlDocument(std::string rootName) : rootTagName_(rootName)
46 {
47  //__COUT__ << "in" << std::endl;
48  initDocument();
49 
50  rootElement_ = theDocument_->getDocumentElement();
51  //__COUT__ << "out" << std::endl;
52 }
53 
54 //==============================================================================
55 XmlDocument::XmlDocument(const XmlDocument& doc) : rootTagName_(doc.rootTagName_)
56 {
57  //__COUT__ << "in" << std::endl;
58  *this = doc;
59  //__COUT__ << "out" << std::endl;
60 }
61 
62 //==============================================================================
63 XmlDocument& XmlDocument::operator=(const XmlDocument& doc)
64 {
65  __COUT__ << "in" << std::endl;
66  initDocument();
67  rootElement_ = theDocument_->getDocumentElement();
68  recursiveElementCopy(doc.rootElement_, rootElement_);
69  __COUT__ << "out" << std::endl;
70  return *this;
71 }
72 
73 //==============================================================================
74 XmlDocument::~XmlDocument(void)
75 {
76  //__COUT__ << "Xml Destructor" << std::endl;
77  terminatePlatform();
78 }
79 
80 //==============================================================================
81 void XmlDocument::initDocument(void)
82 {
83  initPlatform();
84 
85  theImplementation_ = xercesc::DOMImplementationRegistry::getDOMImplementation(CONVERT_TO_XML("Core"));
86 
87  if (theImplementation_)
88  {
89  try
90  {
91  theDocument_ = theImplementation_->createDocument(CONVERT_TO_XML("http://www.w3.org/2001/XMLSchema-instance"), // root
92  // element
93  // namespace
94  // URI.
95  CONVERT_TO_XML(rootTagName_), // root element name
96  0); // theDocument_ type object (DTD).
97  }
98  catch (const xercesc::OutOfMemoryException&)
99  {
100  XERCES_STD_QUALIFIER cerr << "OutOfMemoryException" << XERCES_STD_QUALIFIER endl;
101  }
102  catch (const xercesc::DOMException & e)
103  {
104  XERCES_STD_QUALIFIER cerr << "DOMException code is: " << e.code << XERCES_STD_QUALIFIER endl;
105  }
106  catch (const xercesc::XMLException & e)
107  {
108  __COUT__ << "Error Message: " << XML_TO_CHAR(e.getMessage()) << std::endl;
109  }
110  catch (...)
111  {
112  XERCES_STD_QUALIFIER cerr << "An error occurred creating the theDocument_" << XERCES_STD_QUALIFIER endl;
113  }
114  }
115  else
116  XERCES_STD_QUALIFIER cerr << "Requested theImplementation_ is not supported" << XERCES_STD_QUALIFIER endl;
117  darioXMLStyle_ = false;
118  isALeaf_[true] = "true";
119  isALeaf_[false] = "false";
120 }
121 
122 //==============================================================================
123 void XmlDocument::initPlatform(void)
124 {
125  try
126  {
127  xercesc::XMLPlatformUtils::Initialize(); // Initialize Xerces infrastructure
128  //__COUT__ << "Initialized new
129  // theDocument_" << std::endl;
130  }
131  catch (xercesc::XMLException & e)
132  {
133  __COUT__ << "XML toolkit initialization error: " << XML_TO_CHAR(e.getMessage()) << std::endl;
134  }
135 }
136 
137 //==============================================================================
138 void XmlDocument::terminatePlatform(void)
139 {
140  try
141  {
142  //__COUT__ << "Releasing the document" << std::endl;
143  theDocument_->release();
144  //__COUT__ << "document released" << std::endl;
145  }
146  catch (...)
147  {
148  XERCES_STD_QUALIFIER cerr << "An error occurred destroying the theDocument_" << XERCES_STD_QUALIFIER endl;
149  }
150 
151  try
152  {
153  xercesc::XMLPlatformUtils::Terminate(); // Terminate after release of memory
154  }
155  catch (xercesc::XMLException & e)
156  {
157  __COUT__ << "XML toolkit teardown error: " << XML_TO_CHAR(e.getMessage()) << std::endl;
158  // XMLString::release(&message);
159  }
160 }
161 
162 //==============================================================================
163 // addTextElementToParent
164 // add to parent by pointer to parent
165 // returns pointer to element that is added
166 xercesc::DOMElement* XmlDocument::addTextElementToParent(std::string childName, std::string childText, xercesc::DOMElement* parent)
167 {
168  if (parent == 0)
169  {
170  __SS__ << "Illegal Null Parent Pointer!" << __E__;
171  __SS_THROW__;
172  // return 0;
173  }
174  xercesc::DOMElement* child = nullptr;
175  try
176  {
177  child = theDocument_->createElement(CONVERT_TO_XML(childName));
178  }
179  catch (xercesc::DOMException & e)
180  {
181  __COUT__ << "Can't use the name: " << childName << " to create the child element because the exception says: " << XML_TO_CHAR(e.getMessage())
182  << ". Very likely you have a name that starts with a number and that's "
183  "not allowed!"
184  << std::endl;
185  }
186  parent->appendChild(child);
187 
188  try
189  {
190  child->appendChild(theDocument_->createTextNode(CONVERT_TO_XML(childText)));
191  }
192  catch (...) // sometimes see TranscodingException
193  {
194  __COUT_ERR__ << "Error caught attempting to create a text node for this text: " << childText << ". Converting instead to 'Illegal text..'" << std::endl;
195  child->appendChild(theDocument_->createTextNode(CONVERT_TO_XML("Illegal text content blocked.")));
196  }
197 
198  return child;
199 }
200 
201 //==============================================================================
202 // addTextElementToParent
203 // add to parent by instance number of parent name
204 // returns pointer to element that is added
205 xercesc::DOMElement* XmlDocument::addTextElementToParent(std::string childName, std::string childText, std::string parentName, unsigned int parentIndex)
206 {
207  xercesc::DOMNodeList* nodeList = theDocument_->getElementsByTagName(CONVERT_TO_XML(parentName));
208 
209  if (parentIndex >= nodeList->getLength())
210  {
211  __COUT__ << "WARNING: Illegal parent index attempted in tags with name: " << parentName << ", index: " << parentIndex << std::endl;
212  return 0; // illegal index attempted
213  }
214 
215  return addTextElementToParent(childName, childText, (xercesc::DOMElement*)(nodeList->item(parentIndex)));
216 }
217 
218 //==============================================================================
219 void XmlDocument::copyDocument(const xercesc::DOMDocument* toCopy, xercesc::DOMDocument* copy)
220 {
221  recursiveElementCopy(toCopy->getDocumentElement(), copy->getDocumentElement());
222 }
223 
224 //==============================================================================
225 void XmlDocument::recursiveElementCopy(const xercesc::DOMElement* toCopy, xercesc::DOMElement* copy)
226 {
227  xercesc::DOMNodeList* nodeListToCopy = toCopy->getChildNodes(); // get all children of the list to copy
228  xercesc::DOMNode* iNode;
229  xercesc::DOMDocument* copyDocument = copy->getOwnerDocument();
230  for (unsigned int i = 0; i < nodeListToCopy->getLength(); i++)
231  {
232  iNode = nodeListToCopy->item(i);
233  xercesc::DOMElement* child = copyDocument->createElement(iNode->getNodeName());
234  copy->appendChild(child);
235  if (child->getFirstChild() != NULL)
236  {
237  if (iNode->getFirstChild() != 0 &&
238  iNode->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE) // if has a text node first, insert as value
239  // attribute
240  {
241  child->appendChild(copyDocument->createTextNode(child->getFirstChild()->getNodeValue()));
242  }
243  recursiveElementCopy((xercesc::DOMElement*)(iNode), child);
244  }
245  }
246 }
247 
248 //==============================================================================
249 // XmlDocument::addElementToParent
250 // Add field/value element to XML doc at parent
251 // On Success, The child index of the added element with respect to the parent is
252 // returned and can be used to add
253 // children to the new element
254 // On Failure, return -1
255 /*
256 unsigned int XmlDocument::addElementToParent(std::string field, std::string value,
257 xercesc::DOMElement *parentEl, bool verbose)
258 {
259  DOMNodeList *nodeList = parentEl->getChildNodes(); //get all children
260 
261  if(verbose)
262  {
263  //display parent info
264  //__COUT__ << "Parent Name: " << XML_TO_CHAR(parentEl->getNodeName()) << " Field:
265 " << field << " Value: " << value << std::endl; if( parentEl->getFirstChild() != NULL &&
266 parentEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE)
267  __COUT__ << "Parent's First Child Node Value: " <<
268 XML_TO_CHAR(parentEl->getFirstChild()->getNodeValue()) << std::endl;
269  }
270 
271  //add field/value element
272  DOMElement *newEl = theDocument_->createElement(CONVERT_TO_XML(field));
273  parentEl->appendChild(newEl);
274 
275  DOMText* valueStr = theDocument_->createTextNode(CONVERT_TO_XML(value));
276  newEl->appendChild(valueStr);
277 
278  if( parentEl->getFirstChild() != NULL && parentEl->getFirstChild()->getNodeType() ==
279 DOMNode::TEXT_NODE) return nodeList->getLength() - 2; //return child index among parent's
280 children, not counting first child text node return nodeList->getLength() - 1; //return
281 child index among parent's children
282 }
283 */
284 //==============================================================================
285 // XmlDocument::addDataElement
286 // Add field/value element to XML doc at parent which is returned from
287 // getElementsByTagName(parentName), entry number parentNameIndex
288 // On Success, The child index of the added element with respect to the parent is
289 // returned and can be used to add
290 // children to the new element
291 // On Failure, return -1
292 /*
293 unsigned int XmlDocument::addDataElement ( std::string field, std::string value,
294 std::string parentName, unsigned int parentNameIndex)
295 {
296  DOMNodeList *nodeList =
297 theDocument_->getElementsByTagName(CONVERT_TO_XML(parentName));
298 
299  if(parentNameIndex >= nodeList->getLength()) {
300  __COUT__ << "illegal parent index attempted in tags with name: " << parentName <<
301 ", index: " << parentNameIndex << std::endl; return -1; //illegal index attempted
302  }
303 
304  return addElementToParent(field,value,(DOMElement*)(nodeList->item(parentNameIndex)));
305 }
306 */
307 //==============================================================================
308 // XmlDocument::addDataElement
309 // Add field/value element to XML doc at parentIndexArray (with depth of parent indicated
310 // by parentIndexArraySize) If parentIndexArray = NULL, element is added with <DATA>
311 // parent otherwise, parentIndexArray indicates the parent within the node list for
312 //<DATA> where the element will be added
313 // On Success, The child index of the added element with respect to the parent is
314 // returned and can be used to add
315 // children to the new element
316 // On Failure, return -1
317 /*
318 unsigned int XmlDocument::addDataElement ( std::string field, std::string value, unsigned
319 int *parentIndexArray, unsigned int parentIndexArraySize)
320 {
321 
322  //__COUT__ << "field: " << field << ", value: " << value << ", parent: " <<
323 parentIndexArraySize << std::endl;
324 
325  DOMElement *parentEl = dataElement; // initialize parent to <DATA>
326 
327  if(parentIndexArray) //if there passed an array find parent relative to data element
328  {
329  //__COUT__ << "Using Parent Index Array" << std::endl;
330 
331  DOMNodeList *nodeList;
332 
333  //iterate through nested parents based on parentIndexArray
334  unsigned int tmpi,cntNotTxt;
335  for(unsigned int i=0;i<parentIndexArraySize;++i)
336  {
337  nodeList = parentEl->getChildNodes(); //get all children
338  cntNotTxt = 0;
339 
340  //get cntNotTxt to proper non text node
341  for(tmpi=0;tmpi<nodeList->getLength();++tmpi)
342  {
343  if(((DOMElement*)(nodeList->item(tmpi)))->getNodeType() ==
344 DOMNode::TEXT_NODE) continue; //skip text nodes
345 
346  if(cntNotTxt == parentIndexArray[i]) break; //at proper parent node!
347  ++cntNotTxt; //else look for next
348  }
349 
350  //in theory, only first child can be text - ignore text node children
351  //if(parentEl->getFirstChild() != NULL &&
352 parentEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE) ++tmpi;
353 
354  if(tmpi >= nodeList->getLength()) {
355  __COUT__ << "illegal child index attempted in nested parents: " <<
356 parentIndexArray[i] << ", depth: " << i << ", tmpi: " << tmpi << std::endl; return -1;
357 //illegal child index attempted in nested parents
358  }
359 
360  parentEl = (DOMElement*)(nodeList->item(tmpi));
361  }
362  }
363 
364  return addElementToParent(field,value,parentEl);
365 }
366 */
367 //==============================================================================
368 // XmlDocument::addXmlData
369 // Append <DATA> from xmldoc to this XML doc
370 // On Success, The child index within <DATA> of the first element is returned
371 // On Failure, return -1
372 /*
373 unsigned int XmlDocument::addXmlData (XmlDocument *xmldoc)
374 {
375  //
376 
377  int retIndex = dataElement->getChildNodes()->getLength(); //will be index of first
378 appended data element
379 
380  //add all first level child elements of data and recurse on them
381  DOMNodeList *nodeList = xmldoc->dataElement->getChildNodes(); //get all children
382 within data for(unsigned int i = 0; i<nodeList->getLength();++i)
383  {
384  if(nodeList->item(i)->getNodeType() == DOMNode::TEXT_NODE) //ignore text node
385 children continue;
386 
387  recursiveAddElementToParent((DOMElement*)(nodeList->item(i)),dataElement);
388  }
389 
390  return retIndex;
391 }
392 */
393 //==============================================================================
394 // XmlDocument::recursiveAddElementToParent
395 // add currEl and its children tree to parentEl
396 /*
397 void XmlDocument::recursiveAddElementToParent (DOMElement *currEl, DOMElement *parentEl)
398 {
399 std::string field, value = "";
400 
401  //char *tmpField =
402  field = XML_TO_CHAR(currEl->getNodeName());//XML_TO_CHAR(currEl->getNodeName());
403  //field = tmpField;
404  //XMLString::release( &tmpField );
405 
406  if( currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() ==
407 DOMNode::TEXT_NODE) //if has a text node first, insert as value attribute value =
408 escapeString(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()));
409 
410  //insert currEl
411  addElementToParent(field,value,parentEl);
412 
413  //insert rest of currEl tree
414  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children of currEl
415  for(unsigned int i = 0; i<nodeList->getLength();++i)
416  {
417  if(nodeList->item(i)->getNodeType() == DOMNode::TEXT_NODE) //ignore text node
418 children continue;
419 
420  recursiveAddElementToParent((DOMElement*)(nodeList->item(i)),currEl);
421  }
422 }
423 */
424 //==============================================================================
425 // XmlDocument::outputXmlDocument
426 // recurse through XML theDocument_ and std out and output to stream parameter if not
427 // null
428 void XmlDocument::outputXmlDocument(std::ostringstream* out, bool dispStdOut)
429 {
430  recursiveOutputXmlDocument(theDocument_->getDocumentElement(), out, dispStdOut);
431 }
432 
433 //==============================================================================
434 void XmlDocument::setDocument(xercesc::DOMDocument* doc) { theDocument_ = doc; }
435 //==============================================================================
436 // XmlDocument::recursiveOutputXmlDocument
437 // recursively printout XML theDocument_ to std out and output stream if not null
438 void XmlDocument::recursiveOutputXmlDocument(xercesc::DOMElement* currEl, std::ostringstream* out, bool dispStdOut, std::string tabStr)
439 {
440  // open field tag
441  if (dispStdOut)
442  {
443  __COUT__ << tabStr << "<" << XML_TO_CHAR(currEl->getNodeName());
444  }
445  if (out)
446  {
447  *out << tabStr << "<" << XML_TO_CHAR(currEl->getNodeName());
448  }
449 
450  // insert value if text node child
451  if (currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE) // if has a text node first, insert as value
452  // attribute
453  {
454  if (dispStdOut)
455  std::cout << " value='" << (XML_TO_CHAR(currEl->getFirstChild()->getNodeValue())) << "'";
456  if (out)
457  *out << " value='" << (XML_TO_CHAR(currEl->getFirstChild()->getNodeValue())) << "'";
458  }
459 
460  xercesc::DOMNodeList* nodeList = currEl->getChildNodes(); // get all children
461 
462  // close opening field tag
463  if (dispStdOut)
464  std::cout << ((nodeList->getLength() == 0 || (nodeList->getLength() == 1 && currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE))
465  ? "/"
466  : "")
467  << ">" << std::endl;
468  if (out)
469  *out << ((nodeList->getLength() == 0 || (nodeList->getLength() == 1 && currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE)) ? "/"
470  : "")
471  << ">" << std::endl;
472 
473  // insert children
474  std::string newTabStr = tabStr + "\t";
475  for (unsigned int i = 0; i < nodeList->getLength(); ++i)
476  if (nodeList->item(i)->getNodeType() != xercesc::DOMNode::TEXT_NODE) // ignore text node children
477  recursiveOutputXmlDocument((xercesc::DOMElement*)(nodeList->item(i)), out, dispStdOut, newTabStr);
478 
479  // close tag if children
480  if (nodeList->getLength() > 1 || (nodeList->getLength() == 1 && currEl->getFirstChild()->getNodeType() != xercesc::DOMNode::TEXT_NODE))
481  {
482  if (dispStdOut)
483  __COUT__ << tabStr << "</" << XML_TO_CHAR(currEl->getNodeName()) << ">" << std::endl;
484  if (out)
485  *out << tabStr << "</" << XML_TO_CHAR(currEl->getNodeName()) << ">" << std::endl;
486  }
487 }
488 
489 //==============================================================================
490 // XmlDocument::getDataElement
491 // returns the value for field found occurance number of times
492 // returns empty std::string "" if field was not found
493 /*
494 std::string XmlDocument::getDataElement (const std::string field, const unsigned int
495 occurance)
496 {
497  unsigned int count = 0;
498  return recursiveFindElement(theDocument_->getDocumentElement(),field,occurance,count);
499 }
500 */
501 //==============================================================================
502 // XmlDocument::recursiveFindElement
503 // recursively searches and returns the value for field found occurance number of times
504 /*
505 std::string XmlDocument::recursiveFindElement (DOMElement *currEl, const std::string
506 field, const unsigned int occurance, unsigned int &count)
507 {
508  if (XML_TO_CHAR(currEl->getNodeName()) == field && occurance == count++) //found,
509 done!!
510  {
511  if( currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() ==
512 DOMNode::TEXT_NODE) //if has a text node first, return as value attribute return
513 escapeString(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue())); else return "";
514 //empty value attribute
515  }
516 
517  std::string retStr;
518  //look through children recursively
519  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children
520  for(unsigned int i = 0; i<nodeList->getLength();++i)
521  if(nodeList->item(i)->getNodeType() != DOMNode::TEXT_NODE) //ignore text node
522 children
523  {
524  retStr = recursiveFindElement
525 ((DOMElement*)(nodeList->item(i)),field,occurance,count); if(retStr != "") return retStr;
526 //found among children already, done
527  //else continue search within children recursively
528  }
529  return ""; //nothing found
530 }
531 */
532 //==============================================================================
533 // XmlDocument::getAllDataElements
534 // returns all of the values found for the field in a vector
535 // if none found vector will have size 0
536 /*
537 std::vector<std::string> XmlDocument::getAllDataElements (std::string field)
538 {
539  vector<string> retVec;
540 
541  recursiveFindAllElements(theDocument_->getDocumentElement(),field,&retVec);
542 
543  return retVec;
544 }
545 */
546 //==============================================================================
547 // XmlDocument::recursiveFindElement
548 // recursively searches and returns the value for field found occurance number of times
549 /*
550 void XmlDocument::recursiveFindAllElements (DOMElement *currEl, const std::string
551 field,std::vector<std::string> *retVec)
552 {
553  if (XML_TO_CHAR(currEl->getNodeName()) == field &&
554  currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() ==
555 DOMNode::TEXT_NODE) //if has a text node first, return as value attribute
556  retVec->push_back(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()));
557 
558 
559  //look through children recursively
560  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children
561  for(unsigned int i = 0; i<nodeList->getLength();++i)
562  if(nodeList->item(i)->getNodeType() != DOMNode::TEXT_NODE) //ignore text node
563 children recursiveFindAllElements ((DOMElement*)(nodeList->item(i)),field,retVec);
564 }
565 */
566 //==============================================================================
567 // XmlDocument::escapeString
568 // convert quotes to html quote characters &apos; = ' and &quot; = "
569 // remove new line characters
570 // and (if !allowWhiteSpace) remove white space (so that read from file white space
571 // artifact removed)
572 //
573 // convert &amp; = &
574 // if(allowWhiteSpace) convert \t to 8 &#160; spaces and \n to <br>
575 std::string XmlDocument::escapeString(std::string inString, bool allowWhiteSpace)
576 {
577  bool doit = false;
578 
579  unsigned int ws = -1;
580  char htmlTmp[6];
581 
582  for (unsigned int i = 0; i < inString.length(); i++)
583  if (inString[i] != ' ')
584  {
585  if (doit)
586  __COUT__ << inString[i] << ":" << (int)inString[i] << ":" << inString << std::endl;
587 
588  // remove new lines and unprintable characters
589  if (inString[i] == '\r' || inString[i] == '\n' || // remove new line chars
590  inString[i] == '\t' || // remove tabs
591  inString[i] < 32 || // remove un-printable characters (they mess up xml
592  // interpretation)
593  (inString[i] > char(126) && inString[i] < char(161))) // this is aggravated by the bug in
594  // MFextensions (though Eric says he fixed on
595  // 8/24/2016) Note: greater than 255 should be
596  // impossible if by byte (but there are html
597  // chracters in 300s and 8000s)
598  {
599  if ( // maintain new lines and tabs
600  inString[i] == '\n')
601  {
602  if (allowWhiteSpace)
603  {
604  sprintf(htmlTmp, "&#%3.3d", inString[i]);
605  inString.insert(i, htmlTmp); // insert html str sequence
606  inString.replace(i + 5, 1, 1, ';'); // replace special character with ;
607  i += 6; // skip to next char to check
608  --i;
609  }
610  else // translate to ' '
611  inString[i] = ' ';
612  }
613  else if ( // maintain new lines and tabs
614  inString[i] == '\t')
615  {
616  if (allowWhiteSpace)
617  {
618  if (0)
619  {
620  // tab = 8 spaces
621  sprintf(htmlTmp, "&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160");
622  inString.insert(i, htmlTmp); // insert html str sequence
623  inString.replace(i + 47, 1, 1, ';'); // replace special character with ;
624  i += 48; // skip to next char to check
625  --i;
626  }
627  else // tab = 0x09
628  {
629  sprintf(htmlTmp, "&#009");
630  inString.insert(i, htmlTmp); // insert html str sequence
631  inString.replace(i + 5, 1, 1, ';'); // replace special character with ;
632  i += 6; // skip to next char to check
633  --i;
634  }
635  }
636  else // translate to ' '
637  inString[i] = ' ';
638  }
639  else
640  {
641  inString.erase(i, 1); // erase character
642  --i; // step back so next char to check is correct
643  }
644  if (doit)
645  __COUT__ << inString << std::endl;
646  continue;
647  }
648 
649  if (doit)
650  __COUT__ << inString << std::endl;
651 
652  // replace special characters
653  if (inString[i] == '\"' || inString[i] == '\'')
654  {
655  inString.insert(i,
656  (inString[i] == '\'') ? "&apos" : "&quot"); // insert HTML name before quotes
657  inString.replace(i + 5, 1, 1, ';'); // replace special character with ;
658  i += 5; // skip to next char to check
659  //__COUT__ << inString << std::endl;
660  }
661  else if (inString[i] == '&')
662  {
663  inString.insert(i, "&amp"); // insert HTML name before special character
664  inString.replace(i + 4, 1, 1, ';'); // replace special character with ;
665  i += 4; // skip to next char to check
666  }
667  else if (inString[i] == '<' || inString[i] == '>')
668  {
669  inString.insert(i,
670  (inString[i] == '<') ? "&lt" : "&gt"); // insert HTML name before special character
671  inString.replace(i + 3, 1, 1, ';'); // replace special character with ;
672  i += 3; // skip to next char to check
673  }
674  else if (inString[i] >= char(161) && inString[i] <= char(255)) // printable special characters
675  {
676  sprintf(htmlTmp, "&#%3.3d", inString[i]);
677  inString.insert(i, htmlTmp); // insert html number sequence
678  inString.replace(i + 5, 1, 1, ';'); // replace special character with ;
679  i += 5; // skip to next char to check
680  }
681 
682  if (doit)
683  __COUT__ << inString << std::endl;
684 
685  ws = i; // last non white space char
686  }
687  else if (allowWhiteSpace) // keep white space if allowed
688  {
689  if (i - 1 == ws)
690  continue; // dont do anything for first white space
691 
692  // for second white space add 2, and 1 from then
693  if (0 && i - 2 == ws)
694  {
695  inString.insert(i, "&#160;"); // insert html space
696  i += 6; // skip to point at space again
697  }
698  inString.insert(i, "&#160"); // insert html space
699  inString.replace(i + 5, 1, 1, ';'); // replace special character with ;
700  i += 5; // skip to next char to check
701  // ws = i;
702  }
703 
704  if (doit)
705  __COUT__ << inString.size() << " " << ws << std::endl;
706 
707  // inString.substr(0,ws+1);
708 
709  if (doit)
710  __COUT__ << inString.size() << " " << inString << std::endl;
711 
712  if (allowWhiteSpace) // keep all white space
713  return inString;
714  // else trim trailing white space
715 
716  if (ws == (unsigned int)-1)
717  return ""; // empty std::string since all white space
718  return inString.substr(0, ws + 1); // trim right white space
719 }
720 
721 //==============================================================================
722 // XmlDocument::recursiveRemoveChild
723 // remove child and all of child's sub-tree from parent
724 void XmlDocument::recursiveRemoveChild(xercesc::DOMElement* childEl, xercesc::DOMElement* parentEl)
725 {
726  // release child's children first
727  xercesc::DOMNodeList* nodeList = childEl->getChildNodes(); // get all children within data
728  for (unsigned int i = 0; i < nodeList->getLength(); ++i)
729  recursiveRemoveChild((xercesc::DOMElement*)(nodeList->item(nodeList->getLength() - 1 - i)), childEl);
730 
731  // then release child
732  parentEl->removeChild(childEl);
733  childEl->release();
734 }
735 
736 //==============================================================================
737 // XmlDocument::saveXmlDocument
738 // wrapper for private outputXML
739 // Warning: filePath must be accessible or program will crash!
740 void XmlDocument::saveXmlDocument(std::string filePath)
741 {
742  __COUT__ << "Saving theDocument_ to file: " << filePath << std::endl;
743  // Return the first registered theImplementation_ that has the desired features. In
744  // this case, we are after a DOM theImplementation_ that has the LS feature... or
745  // Load/Save. DOMImplementation *theImplementation_ =
746  // DOMImplementationRegistry::getDOMImplementation(L"LS");
747  xercesc::DOMImplementation* saveImplementation = xercesc::DOMImplementationRegistry::getDOMImplementation(CONVERT_TO_XML("LS"));
748 
749  //__COUT__ << "XERCES Version: " << _XERCES_VERSION << std::endl;
750 
751 #if _XERCES_VERSION >= 30000
752 
753  //__COUT__ << "making file" << filePath << std::endl;
754  // Create a DOMLSSerializer which is used to serialize a DOM tree into an XML
755  // theDocument_.
756  xercesc::DOMLSSerializer* serializer = ((xercesc::DOMImplementationLS*)saveImplementation)->createLSSerializer();
757 
758  // Make the output more human readable by inserting line feeds.
759  if (serializer->getDomConfig()->canSetParameter(xercesc::XMLUni::fgDOMWRTFormatPrettyPrint, true))
760  serializer->getDomConfig()->setParameter(xercesc::XMLUni::fgDOMWRTFormatPrettyPrint, true);
761 
762  // The end-of-line sequence of characters to be used in the XML being written out.
763  serializer->setNewLine(CONVERT_TO_XML("\r\n"));
764 
765  // Convert the path into Xerces compatible XMLCh*.
766  // XMLCh *tempFilePath = const_cast<XMLCh*>(CONVERT_TO_XML(filePath));
767 
768  // Specify the target for the XML output.
769  xercesc::XMLFormatTarget* formatTarget;
770  try
771  {
772  // formatTarget = new xercesc::LocalFileFormatTarget(tempFilePath);
773  formatTarget = new xercesc::LocalFileFormatTarget(filePath.c_str());
774  }
775  catch (...)
776  {
777  __COUT__ << "Inaccessible file path: " << filePath << std::endl;
778  serializer->release();
779  // xercesc::XMLString::release(&tempFilePath);
780 
781  return;
782  }
783 
784  // Create a new empty output destination object.
785  xercesc::DOMLSOutput* output = ((xercesc::DOMImplementationLS*)saveImplementation)->createLSOutput();
786 
787  // Set the stream to our target.
788  output->setByteStream(formatTarget);
789  // Write the serialized output to the destination.
790  serializer->write(theDocument_, output);
791  serializer->release();
792  // xercesc::XMLString::release(&tempFilePath);
793  delete formatTarget;
794 #else
795 
796  xercesc::DOMWriter* serializer = ((xercesc::DOMImplementationLS*)saveImplementation)->createDOMWriter();
797  serializer->setFeature(xercesc::XMLUni::fgDOMWRTFormatPrettyPrint, true);
798 
799  /*
800  Choose a location for the serialized output. The 3 options are:
801  1) StdOutFormatTarget (std output stream - good for debugging)
802  2) MemBufFormatTarget (to Memory)
803  3) LocalFileFormatTarget (save to file)
804  (Note: You'll need a different header file for each one)
805  */
806  // XMLFormatTarget* pTarget = new StdOutFormatTarget();
807  // Convert the path into Xerces compatible XMLCh*.
808  XMLCh* tempFilePath = xercesc::XMLString::transcode(filePath.c_str());
809  xercesc::XMLFormatTarget* formatTarget;
810  try
811  {
812  formatTarget = new xercesc::LocalFileFormatTarget(tempFilePath);
813  }
814  catch (...)
815  {
816  __COUT__ << "Inaccessible file path: " << filePath << std::endl;
817  serializer->release();
818  xercesc::XMLString::release(&tempFilePath);
819  return;
820  }
821 
822  // Write the serialized output to the target.
823 
824  serializer->writeNode(formatTarget, *theDocument_);
825  serializer->release();
826  xercesc::XMLString::release(&tempFilePath);
827  delete formatTarget;
828 #endif
829 
830  // Cleanup.
831  //__COUT__ << "delete format target" << std::endl;
832 
833 #if _XERCES_VERSION >= 30000
834 
835  //__COUT__ << "delete output0" << std::endl;
836  output->release();
837  //__COUT__ << "delete output1" << std::endl;
838 
839 #endif
840 }
841 
842 //==============================================================================
843 bool XmlDocument::loadXmlDocument(std::string filePath)
844 {
845  __COUT__ << "Loading theDocument_ from file: " << filePath << std::endl;
846 
847  struct stat fileStatus;
848 
849  if (stat(filePath.c_str(), &fileStatus) != 0)
850  {
851  __COUT__ << "File not accessible." << std::endl;
852  return false;
853  }
854 
855  terminatePlatform();
856  initPlatform();
857 
858  xercesc::XercesDOMParser* parser = new xercesc::XercesDOMParser;
859  parser->setValidationScheme(xercesc::XercesDOMParser::Val_Auto);
860  parser->setDoNamespaces(true);
861  parser->setDoSchema(true);
862  parser->useCachedGrammarInParse(false);
863 
864  try
865  {
866  parser->parse(filePath.c_str());
867 
868  // theDocument_ memory object owned by the parent parser object
869  theDocument_ = parser->adoptDocument(); // instead of getDocument() so parser
870  // will not free theDocument_ when
871  // released
872 
873  // Get the top-level element: Name is "root". No attributes for "root"
874  rootElement_ = theDocument_->getDocumentElement();
875  if (!rootElement_)
876  throw(std::runtime_error("empty XML theDocument_"));
877  }
878  catch (xercesc::XMLException & e)
879  {
880  __COUT__ << "Error parsing file." << std::endl;
881  return false;
882  }
883  delete parser;
884 
885  return true;
886 }
887 // clang-format off
888 //============================================================================
889 void XmlDocument::setAnchors(std::string fSystemPath,
890  std::string fRootPath)
891 {
892  fSystemPath_ = fSystemPath;
893  fRootPath_ = fRootPath;
894 }
895 
896 //============================================================================
897 void XmlDocument::makeDirectoryBinaryTree(std::string fSystemPath,
898  std::string fRootPath,
899  int indent,
900  xercesc::DOMElement* anchorNode)
901 {
902  DIR* dir;
903  struct dirent* entry;
904 
905  std::string newFullPath = "";
906  char fchar = '.';
907  char schar = '.';
908 
909  fSystemPath_ = fSystemPath;
910  fRootPath_ = fRootPath;
911 
912  std::string fullPathName = fSystemPath_ +
913  std::string("/") +
914  fRootPath_ +
915  std::string("/") +
916  fFoldersPath_;
917 
918  if (!anchorNode) anchorNode = rootElement_;
919 
920  if (!(dir = opendir(fullPathName.c_str()))) return;
921 
922  while ((entry = readdir(dir)) != NULL)
923  {
924  std::string sName = std::string(entry->d_name);
925  fchar = sName.at(0);
926  if (sName.size() == 2) schar = sName.at(1);
927  if (((sName.size() == 1) && fchar == '.') ||
928  ((sName.size() == 2) && schar == '.'))
929  {
930  continue; // do not consider . and .. pseudo-folders
931  }
932 
933  if (entry->d_type == DT_DIR)
934  {
935  fThisFolderPath_ = std::string(entry->d_name);
936  newFullPath = fSystemPath_ +
937  fRootPath +
938  std::string("/") +
939  fThisFolderPath_;
940  if (hierarchyPaths_.size() > 0) STDLINE(std::string("Before push_back: ") + hierarchyPaths_.back(), std::string(ACGreen) + std::string(ACReverse));
941  hierarchyPaths_.push_back(std::string(entry->d_name) + std::string(""));
942  fFoldersPath_ += hierarchyPaths_.back() + "/";
943  STDLINE(std::string("Before push_back: ") + hierarchyPaths_.back(), std::string(ACRed) + std::string(ACReverse));
944  xercesc::DOMElement* node = this->populateBinaryTreeNode(
945  anchorNode,
946  std::string(entry->d_name),
947  indent,
948  false
949  );
950  this->makeDirectoryBinaryTree(fSystemPath, fRootPath, indent + 1, node);
951  STDLINE(std::string("Before popBack: ") + hierarchyPaths_.back(), std::string(ACGreen) + std::string(ACReverse));
952  if (hierarchyPaths_.size() > 0) hierarchyPaths_.pop_back();
953  if (hierarchyPaths_.size() > 0)
954  {
955  fFoldersPath_ = hierarchyPaths_.back() + "/";
956  }
957  else
958  {
959  fFoldersPath_ = "/";
960  }
961 
962  STDLINE(std::string("After popBack: ") + fFoldersPath_, std::string(ACRed) + std::string(ACReverse));
963  }
964  else
965  {
966  newFullPath = fSystemPath_ + std::string("/") + std::string(entry->d_name);
967  boost::smatch what;
968  boost::regex re{ ".*\\.root$" };
969  if (boost::regex_search(newFullPath, what, re))
970  {
971  fFileName_ = std::string(entry->d_name);
972  STDLINE(std::string("fFileName: ") + fFileName_, std::string(ACCyan) + std::string(ACReverse));
973  /*xercesc::DOMElement* node = */this->populateBinaryTreeNode(
974  anchorNode,
975  fFileName_,
976  indent,
977  true
978  );
979  }
980  }
981  }
982  closedir(dir);
983 }
984 
985 //==========================================================================================
986 xercesc::DOMElement* XmlDocument::populateBinaryTreeNode(xercesc::DOMElement* anchorNode,
987  std::string name,
988  int indent,
989  bool isLeaf)
990 {
991  std::string nm = "unassigned";
992  xercesc::DOMElement* nodes = NULL;
993 
994  // if( isLeaf )
995  // {
996  // STDLINE("","") ;
997  // if( theNodes_.find(indent) != theNodes_.end() ) nodes = theNodes_.find(indent)->second ;
998  // if( theNames_.find(indent) != theNames_.end() ) nm = theNames_.find(indent)->second ;
999  // ss_.str("") ; ss_ << "Attaching " << name << " to " << nm << " size: " << theNames_.size();
1000  // STDLINE(ss_.str(),ACGreen) ;
1001  // }
1002  // else
1003  // {
1004  if (theNodes_.find(indent) != theNodes_.end()) // a new node
1005  {
1006  if (theNodes_.find(indent) != theNodes_.end()) nodes = theNodes_.find(indent)->second;
1007  if (theNames_.find(indent) != theNames_.end()) nm = theNames_.find(indent)->second;
1008  }
1009  else
1010  {
1011  nodes = theDocument_->createElement(xercesc::XMLString::transcode("nodes"));
1012  theNodes_[indent] = nodes;
1013  theNames_[indent] = name;
1014  anchorNode->appendChild(nodes);
1015  }
1016  // }
1017 
1018  xercesc::DOMElement* node = theDocument_->createElement(xercesc::XMLString::transcode("node"));
1019  nodes->appendChild(node);
1020 
1021  xercesc::DOMElement* nChilds = theDocument_->createElement(xercesc::XMLString::transcode("nChilds"));
1022  node->appendChild(nChilds);
1023 
1024  xercesc::DOMText* nChildsVal = theDocument_->createTextNode(xercesc::XMLString::transcode("x"));
1025  nChilds->appendChild(nChildsVal);
1026 
1027  xercesc::DOMElement* fSystemPathNode = theDocument_->createElement(xercesc::XMLString::transcode("fSystemPath"));
1028  node->appendChild(fSystemPathNode);
1029 
1030  xercesc::DOMText* fSystemPathVal = theDocument_->createTextNode(xercesc::XMLString::transcode(fSystemPath_.c_str()));
1031  fSystemPathNode->appendChild(fSystemPathVal);
1032 
1033  xercesc::DOMElement* fRootPathNode = theDocument_->createElement(xercesc::XMLString::transcode("fRootPath"));
1034  node->appendChild(fRootPathNode);
1035 
1036  xercesc::DOMText* fRootPathVal = theDocument_->createTextNode(xercesc::XMLString::transcode(fRootPath_.c_str()));
1037  fRootPathNode->appendChild(fRootPathVal);
1038 
1039  xercesc::DOMElement* fFoldersPathNode = theDocument_->createElement(xercesc::XMLString::transcode("fFoldersPath"));
1040  node->appendChild(fFoldersPathNode);
1041 
1042  xercesc::DOMText* foldersPathVal = theDocument_->createTextNode(xercesc::XMLString::transcode(fFoldersPath_.c_str()));
1043  fFoldersPathNode->appendChild(foldersPathVal);
1044 
1045  xercesc::DOMElement* fThisFolderPath = NULL;
1046  xercesc::DOMElement* fFileOrHistName = NULL;
1047  xercesc::DOMText* fileOrDirNameVal = NULL;
1048  xercesc::DOMText* thisFolderNameVal = NULL;
1049 
1050  fThisFolderPath = theDocument_->createElement(xercesc::XMLString::transcode("fDisplayName"));
1051 
1052  if (isLeaf)
1053  {
1054  fFileOrHistName = theDocument_->createElement(xercesc::XMLString::transcode("fFileName"));
1055  fileOrDirNameVal = theDocument_->createTextNode(xercesc::XMLString::transcode(name.c_str()));
1056  thisFolderNameVal = theDocument_->createTextNode(xercesc::XMLString::transcode(name.c_str()));
1057  ss_.str(""); ss_ << "name: " << ACRed << fThisFolderPath_ << ACPlain << "/" << ACGreen << name;
1058  STDLINE(ss_.str(), "");
1059  }
1060  else
1061  {
1062  std::string blank;
1063  fFileOrHistName = theDocument_->createElement(xercesc::XMLString::transcode("fFileName"));
1064  fileOrDirNameVal = theDocument_->createTextNode(xercesc::XMLString::transcode(blank.c_str()));
1065  thisFolderNameVal = theDocument_->createTextNode(xercesc::XMLString::transcode(fThisFolderPath_.c_str()));
1066  STDLINE(std::string("name : "), ACCyan);
1067  }
1068 
1069  node->appendChild(fFileOrHistName);
1070  fFileOrHistName->appendChild(fileOrDirNameVal);
1071 
1072  node->appendChild(fThisFolderPath);
1073  fThisFolderPath->appendChild(thisFolderNameVal);
1074 
1075  xercesc::DOMElement* leaf = theDocument_->createElement(xercesc::XMLString::transcode("leaf"));
1076  node->appendChild(leaf);
1077 
1078  xercesc::DOMText* leafVal = theDocument_->createTextNode(xercesc::XMLString::transcode(isALeaf_[isLeaf].c_str()));
1079  leaf->appendChild(leafVal);
1080 
1081  return node;
1082 }
1083 //==========================================================================================
1084 void XmlDocument::setDarioStyle(bool darioStyle)
1085 {
1086  darioXMLStyle_ = darioStyle;
1087 }
1088 // clang-format on
1089 //==============================================================================
1090 // XmlDocument::recursiveOutputXmlDocument
1091 // recursively printout XML theDocument_ to std out and output stream if not null
1092 /*
1093 void XmlDocument::recursiveFixTextFields(DOMElement *currEl)
1094 {
1095  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children
1096 
1097  //recurse through children
1098  for(unsigned int i = 0; i<nodeList->getLength();++i)
1099  if(nodeList->item(i)->getNodeType() == DOMNode::TEXT_NODE) //fix text nodes
1100  ((DOMElement*)(nodeList->item(i)))->setTextContent(CONVERT_TO_XML(
1101 //change text value to escaped version
1102  escapeString(XML_TO_CHAR(((DOMElement*)(nodeList->item(i)))->getNodeValue()))));
1103  else
1104  recursiveFixTextFields ((DOMElement*)(nodeList->item(i)));
1105 }
1106 */