LibOFX
ofx_sgml.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  ofx_sgml.cpp
3  -------------------
4  copyright : (C) 2002 by Benoit GrĂ©goire
5  email : benoitg@coeus.ca
6 ***************************************************************************/
12 /***************************************************************************
13  * *
14  * This program is free software; you can redistribute it and/or modify *
15  * it under the terms of the GNU General Public License as published by *
16  * the Free Software Foundation; either version 2 of the License, or *
17  * (at your option) any later version. *
18  * *
19  ***************************************************************************/
20 
21 #ifdef HAVE_CONFIG_H
22 #include <config.h>
23 #endif
24 
25 #include <iostream>
26 #include <stdlib.h>
27 #include <string>
28 #include <cassert>
29 #include "ParserEventGeneratorKit.h"
30 #include "libofx.h"
31 #include "ofx_utilities.hh"
32 #include "messages.hh"
33 #include "ofx_containers.hh"
34 #include "ofx_sgml.hh"
35 
36 
37 OfxMainContainer * MainContainer = NULL;
38 extern SGMLApplication::OpenEntityPtr entity_ptr;
39 extern SGMLApplication::Position position;
40 static const std::string MESSAGE_NON_SGML_CHAR = "non SGML character";
41 
42 
45 class OFXApplication : public SGMLApplication
46 {
47 private:
48  OfxGenericContainer *curr_container_element;
49  OfxGenericContainer *tmp_container_element;
50  bool is_data_element;
51  std::string incoming_data;
52  LibofxContext * libofx_context;
53  unsigned errorCountToIgnore = 0;
54 
55 public:
56 
57  OFXApplication (LibofxContext * p_libofx_context)
58  {
59  MainContainer = NULL;
60  curr_container_element = NULL;
61  is_data_element = false;
62  libofx_context = p_libofx_context;
63  }
65  {
66  message_out(DEBUG, "Entering the OFXApplication's destructor");
67  }
68 
69  unsigned getErrorCountToIgnore() const { return errorCountToIgnore; }
70 
75  void startElement (const StartElementEvent & event)
76  {
77  std::string identifier = CharStringtostring (event.gi);
78  message_out(PARSER, "startElement event received from OpenSP for element " + identifier);
79 
80  position = event.pos;
81 
82  switch (event.contentType)
83  {
84  case StartElementEvent::empty:
85  message_out(ERROR, "StartElementEvent::empty\n");
86  break;
87  case StartElementEvent::cdata:
88  message_out(ERROR, "StartElementEvent::cdata\n");
89  break;
90  case StartElementEvent::rcdata:
91  message_out(ERROR, "StartElementEvent::rcdata\n");
92  break;
93  case StartElementEvent::mixed:
94  message_out(PARSER, "StartElementEvent::mixed");
95  is_data_element = true;
96  break;
97  case StartElementEvent::element:
98  message_out(PARSER, "StartElementEvent::element");
99  is_data_element = false;
100  break;
101  default:
102  message_out(ERROR, "Unknown SGML content type?!?!?!? OpenSP interface changed?");
103  }
104 
105  if (is_data_element == false)
106  {
107  /*------- The following are OFX entities ---------------*/
108 
109  if (identifier == "OFX")
110  {
111  message_out (PARSER, "Element " + identifier + " found");
112  MainContainer = new OfxMainContainer (libofx_context, curr_container_element, identifier);
113  curr_container_element = MainContainer;
114  }
115  else if (identifier == "STATUS")
116  {
117  message_out (PARSER, "Element " + identifier + " found");
118  curr_container_element = new OfxStatusContainer (libofx_context, curr_container_element, identifier);
119  }
120  else if (identifier == "STMTRS" ||
121  identifier == "CCSTMTRS" ||
122  identifier == "INVSTMTRS")
123  {
124  message_out (PARSER, "Element " + identifier + " found");
125  curr_container_element = new OfxStatementContainer (libofx_context, curr_container_element, identifier);
126  }
127  else if (identifier == "BANKTRANLIST" || identifier == "INVTRANLIST")
128  {
129  message_out (PARSER, "Element " + identifier + " found");
130  //BANKTRANLIST ignored, we will process it's attributes directly inside the STATEMENT,
131  if (curr_container_element && curr_container_element->type != "STATEMENT")
132  {
133  message_out(ERROR, "Element " + identifier + " found while not inside a STATEMENT container");
134  }
135  else
136  {
137  curr_container_element = new OfxPushUpContainer (libofx_context, curr_container_element, identifier);
138  }
139  }
140  else if (identifier == "STMTTRN")
141  {
142  message_out (PARSER, "Element " + identifier + " found");
143  if (curr_container_element && curr_container_element->type == "INVESTMENT")
144  {
145  //push up to the INVBANKTRAN OfxInvestmentTransactionContainer
146  curr_container_element = new OfxPushUpContainer (libofx_context, curr_container_element, identifier);
147  }
148  else
149  {
150  curr_container_element = new OfxBankTransactionContainer (libofx_context, curr_container_element, identifier);
151  }
152  }
153  else if (identifier == "BUYDEBT" ||
154  identifier == "BUYMF" ||
155  identifier == "BUYOPT" ||
156  identifier == "BUYOTHER" ||
157  identifier == "BUYSTOCK" ||
158  identifier == "CLOSUREOPT" ||
159  identifier == "INCOME" ||
160  identifier == "INVEXPENSE" ||
161  identifier == "JRNLFUND" ||
162  identifier == "JRNLSEC" ||
163  identifier == "MARGININTEREST" ||
164  identifier == "REINVEST" ||
165  identifier == "RETOFCAP" ||
166  identifier == "SELLDEBT" ||
167  identifier == "SELLMF" ||
168  identifier == "SELLOPT" ||
169  identifier == "SELLOTHER" ||
170  identifier == "SELLSTOCK" ||
171  identifier == "SPLIT" ||
172  identifier == "TRANSFER" ||
173  identifier == "INVBANKTRAN" )
174  {
175  message_out (PARSER, "Element " + identifier + " found");
176  curr_container_element = new OfxInvestmentTransactionContainer (libofx_context, curr_container_element, identifier);
177  }
178  /*The following is a list of OFX elements whose attributes will be processed by the parent container*/
179  else if (identifier == "INVBUY" ||
180  identifier == "INVSELL" ||
181  identifier == "INVTRAN" ||
182  identifier == "SECINFO" ||
183  identifier == "SECID" ||
184  identifier == "CURRENCY" ||
185  identifier == "ORIGCURRENCY")
186  {
187  message_out (PARSER, "Element " + identifier + " found");
188  curr_container_element = new OfxPushUpContainer (libofx_context, curr_container_element, identifier);
189  }
190 
191  /* provide a parent for the account list response so its ACCTFROM can be recognized */
192  else if (identifier == "BANKACCTINFO" || identifier == "CCACCTINFO" || identifier == "INVACCTINFO")
193  {
194  message_out (PARSER, "Element " + identifier + " found");
195  curr_container_element = new OfxPushUpContainer (libofx_context, curr_container_element, identifier);
196  }
197 
198  /* The different types of accounts */
199  else if (identifier == "BANKACCTFROM" || identifier == "CCACCTFROM" || identifier == "INVACCTFROM")
200  {
201  message_out (PARSER, "Element " + identifier + " found");
202  /* check the container to avoid creating multiple statements for TRANSFERs */
203  if (curr_container_element &&
204  ( curr_container_element->type == "STATEMENT"
205  || curr_container_element->tag_identifier == "BANKACCTINFO"
206  || curr_container_element->tag_identifier == "CCACCTINFO"
207  || curr_container_element->tag_identifier == "INVACCTINFO"))
208  curr_container_element = new OfxAccountContainer (libofx_context, curr_container_element, identifier);
209  else
210  // no new account or statement for a <TRANSFER>
211  curr_container_element = new OfxDummyContainer (libofx_context, curr_container_element, identifier);
212  }
213  else if (identifier == "STOCKINFO" || identifier == "OPTINFO" ||
214  identifier == "DEBTINFO" || identifier == "MFINFO" || identifier == "OTHERINFO")
215  {
216  message_out (PARSER, "Element " + identifier + " found");
217  curr_container_element = new OfxSecurityContainer (libofx_context, curr_container_element, identifier);
218  }
219  /* The different types of balances */
220  else if (identifier == "LEDGERBAL" ||
221  identifier == "AVAILBAL" ||
222  identifier == "INVBAL")
223  {
224  message_out (PARSER, "Element " + identifier + " found");
225  curr_container_element = new OfxBalanceContainer (libofx_context, curr_container_element, identifier);
226  }
227  else if (identifier == "INVPOS")
228  {
229  message_out (PARSER, "Element " + identifier + " found");
230  curr_container_element = new OfxPositionContainer (libofx_context, curr_container_element, identifier);
231  }
232  else
233  {
234  /* We dont know this OFX element, so we create a dummy container */
235  curr_container_element = new OfxDummyContainer(libofx_context, curr_container_element, identifier);
236  }
237  }
238  else
239  {
242  if (identifier == "INV401K")
243  {
244  /* Minimal handler for this section to discard <DTASOF>, <DTSTART> and <DTEND> that need to be ignored */
245  message_out (PARSER, "Element " + identifier + " found");
246  curr_container_element = new OfxInv401kContainer (libofx_context, curr_container_element, identifier);
247  }
248  if (identifier == "INV401KBAL")
249  {
250  message_out (PARSER, "Element " + identifier + " found");
251  curr_container_element = new OfxBalanceContainer (libofx_context, curr_container_element, identifier);
252  }
253  else
254  {
255  /* The element was a data element. OpenSP will call one or several data() callback with the data */
256  message_out (PARSER, "Data element " + identifier + " found");
257  /* There is a bug in OpenSP 1.3.4, which won't send endElement Event for some elements, and will instead send an error like "document type does not allow element "MESSAGE" here". Incoming_data should be empty in such a case, but it will not be if the endElement event was skipped. So we empty it, so at least the last element has a chance of having valid data */
258  if (incoming_data != "")
259  {
260  message_out (ERROR, "startElement: incoming_data should be empty! You are probably using OpenSP <= 1.3.4. The following data was lost: " + incoming_data );
261  incoming_data.assign ("");
262  }
263  }
264  }
265  }
266 
271  void endElement (const EndElementEvent & event)
272  {
273  std::string identifier = CharStringtostring (event.gi);
274  bool end_element_for_data_element = is_data_element;
275  message_out(PARSER, "endElement event received from OpenSP for element " + identifier);
276 
277  position = event.pos;
278  if (curr_container_element == NULL)
279  {
280  message_out (ERROR, "Tried to close a " + identifier + " without a open element (NULL pointer)");
281  incoming_data.assign ("");
282  }
283  else //curr_container_element != NULL
284  {
285  if (end_element_for_data_element == true)
286  {
287  incoming_data = strip_whitespace(incoming_data);
288 
289  curr_container_element->add_attribute (identifier, incoming_data);
290  message_out (PARSER, "endElement: Added data '" + incoming_data + "' from " + identifier + " to " + curr_container_element->type + " container_element");
291  incoming_data.assign ("");
292  is_data_element = false;
293  }
294  else
295  {
296  if (identifier == curr_container_element->tag_identifier)
297  {
298  if (incoming_data != "")
299  {
300  message_out(ERROR, "End tag for non data element " + identifier + ", incoming data should be empty but contains: " + incoming_data + " DATA HAS BEEN LOST SOMEWHERE!");
301  }
302 
303  if (identifier == "OFX")
304  {
305  /* The main container is a special case */
306  tmp_container_element = curr_container_element;
307  curr_container_element = curr_container_element->getparent ();
308  if (curr_container_element == NULL)
309  {
310  //Defensive coding, this isn't supposed to happen
311  curr_container_element = tmp_container_element;
312  }
313  if (MainContainer != NULL)
314  {
315  MainContainer->gen_event();
316  delete MainContainer;
317  MainContainer = NULL;
318  curr_container_element = NULL;
319  message_out (DEBUG, "Element " + identifier + " closed, MainContainer destroyed");
320  }
321  else
322  {
323  message_out (DEBUG, "Element " + identifier + " closed, but there was no MainContainer to destroy (probably a malformed file)!");
324  }
325  }
326  else
327  {
328  tmp_container_element = curr_container_element;
329  curr_container_element = curr_container_element->getparent ();
330  if (MainContainer != NULL)
331  {
335  if (identifier == "CURRENCY" || identifier == "ORIGCURRENCY")
336  {
337  tmp_container_element->add_attribute (identifier, incoming_data);
338  message_out (DEBUG, "Element " + identifier + " closed, container " + tmp_container_element->type + " updated");
339  }
340  else
341  {
342  tmp_container_element->add_to_main_tree();
343  message_out (PARSER, "Element " + identifier + " closed, object added to MainContainer");
344  }
345  }
346  else
347  {
348  message_out (ERROR, "MainContainer is NULL trying to add element " + identifier);
349  }
350  }
351  }
352  else
353  {
354  message_out (ERROR, "Tried to close a " + identifier + " but a " + curr_container_element->type + " is currently open.");
355  }
356  }
357  }
358  }
359 
364  void data (const DataEvent & event)
365  {
366  std::string tmp;
367  position = event.pos;
368  AppendCharStringtostring (event.data, incoming_data);
369  message_out(PARSER, "data event received from OpenSP, incoming_data is now: " + incoming_data);
370  }
371 
376  void error (const ErrorEvent & event)
377  {
378  std::string message;
379  OfxMsgType error_type = ERROR;
380  const std::string eventMessage = CharStringtostring (event.message);
381 
382  position = event.pos;
383  message = message + "OpenSP parser: ";
384  switch (event.type)
385  {
386  case SGMLApplication::ErrorEvent::quantity:
387  message = message + "quantity (Exceeding a quantity limit):";
388  error_type = ERROR;
389  break;
390  case SGMLApplication::ErrorEvent::idref:
391  message = message + "idref (An IDREF to a non-existent ID):";
392  error_type = ERROR;
393  break;
394  case SGMLApplication::ErrorEvent::capacity:
395  message = message + "capacity (Exceeding a capacity limit):";
396  error_type = ERROR;
397  break;
398  case SGMLApplication::ErrorEvent::otherError:
399  // #60: If the SGML parser encounters a non-ascii char, it sends an error
400  // message, even though those characters are being forwarded just fine.
401  // Hence we count the occurrence of those errors and subtract it from the
402  // final number of errors.
403  if (eventMessage.find(MESSAGE_NON_SGML_CHAR) != std::string::npos) {
404  ++errorCountToIgnore;
405  message = message + "ignored character error:";
406  error_type = INFO;
407  } else {
408  message = message + "otherError (misc parse error):";
409  error_type = ERROR;
410  }
411  break;
412  case SGMLApplication::ErrorEvent::warning:
413  message = message + "warning (Not actually an error.):";
414  error_type = WARNING;
415  break;
416  case SGMLApplication::ErrorEvent::info:
417  message = message + "info (An informationnal message. Not actually an error):";
418  error_type = INFO;
419  break;
420  default:
421  message = message + "OpenSP sent an unknown error to LibOFX (You probably have a newer version of OpenSP):";
422  }
423  message = message + "\n" + eventMessage;
424  message_out (error_type, message);
425  }
426 
431  void openEntityChange (const OpenEntityPtr & para_entity_ptr)
432  {
433  message_out(DEBUG, "openEntityChange()\n");
434  entity_ptr = para_entity_ptr;
435 
436  };
437 
438 private:
439 };
440 
444 int ofx_proc_sgml(LibofxContext * libofx_context, int argc, char * const* argv)
445 {
446  message_out(DEBUG, "Begin ofx_proc_sgml()");
447  assert(argc >= 3);
448  message_out(DEBUG, argv[0]);
449  message_out(DEBUG, argv[1]);
450  message_out(DEBUG, argv[2]);
451 
452  ParserEventGeneratorKit parserKit;
453  parserKit.setOption (ParserEventGeneratorKit::showOpenEntities);
454  EventGenerator *egp = parserKit.makeEventGenerator (argc, argv);
455  egp->inhibitMessages (true); /* Error output is handled by libofx not OpenSP */
456  OFXApplication app(libofx_context);
457  unsigned originalErrorCount = egp->run (app); /* Begin parsing */
458  unsigned nErrors = originalErrorCount - app.getErrorCountToIgnore(); // but ignore certain known errors that we want to ignore
459  delete egp; //Note that this is where bug is triggered
460  return nErrors > 0;
461 }
OfxGenericContainer::type
std::string type
Definition: ofx_containers.hh:41
OfxDummyContainer
A container to hold OFX SGML elements that LibOFX knows nothing about.
Definition: ofx_containers.hh:74
OfxMsgType
OfxMsgType
Definition: messages.hh:23
OfxMainContainer
The root container. Created by the <OFX> OFX element or by the export functions.
Definition: ofx_containers.hh:289
strip_whitespace
std::string strip_whitespace(const std::string para_string)
Sanitize a string coming from OpenSP.
Definition: ofx_utilities.cpp:153
OfxStatusContainer
Represents the <STATUS> OFX SGML entity.
Definition: ofx_containers.hh:105
ERROR
@ ERROR
Definition: messages.hh:41
OfxInvestmentTransactionContainer
Represents a bank or credid card transaction.
Definition: ofx_containers.hh:274
OfxMainContainer::gen_event
int gen_event()
Generate libofx.h events.
Definition: ofx_container_main.cpp:183
OfxAccountContainer
Represents a bank account or a credit card account.
Definition: ofx_containers.hh:178
entity_ptr
SGMLApplication::OpenEntityPtr entity_ptr
Definition: messages.cpp:33
OfxBankTransactionContainer
Represents a bank or credid card transaction.
Definition: ofx_containers.hh:263
OFXApplication::data
void data(const DataEvent &event)
Callback: Data from an OFX element.
Definition: ofx_sgml.cpp:364
ofx_utilities.hh
Various simple functions for type conversion & al.
OFXApplication::error
void error(const ErrorEvent &event)
Callback: SGML parse error.
Definition: ofx_sgml.cpp:376
OFXApplication::startElement
void startElement(const StartElementEvent &event)
Callback: Start of an OFX element.
Definition: ofx_sgml.cpp:75
OfxSecurityContainer
Represents a security, such as a stock or bond.
Definition: ofx_containers.hh:202
OfxGenericContainer
A generic container for an OFX SGML element. Every container inherits from OfxGenericContainer.
Definition: ofx_containers.hh:31
OfxGenericContainer::add_attribute
virtual void add_attribute(const std::string identifier, const std::string value)
Add data to a container object.
Definition: ofx_container_generic.cpp:57
LibofxContext
Definition: context.hh:23
ofx_containers.hh
LibOFX internal object code.
OfxInv401kContainer
A container to hold OFX SGML elements for <INV401K>
Definition: ofx_containers.hh:85
message_out
int message_out(OfxMsgType error_type, const std::string message)
Message output function.
Definition: messages.cpp:67
OFXApplication
This object is driven by OpenSP as it parses the SGML from the ofx file(s)
Definition: ofx_sgml.cpp:45
OfxBalanceContainer
Represents the <BALANCE>, <INVBAL> or <INV401KBAL> OFX SGML entity.
Definition: ofx_containers.hh:119
AppendCharStringtostring
void AppendCharStringtostring(const SGMLApplication::CharString source, std::string &dest)
Append an OpenSP CharString to an existing C++ STL string.
Definition: ofx_utilities.cpp:52
INFO
@ INFO
Definition: messages.hh:39
OfxGenericContainer::getparent
OfxGenericContainer * getparent()
Returns the parent container object (the one representing the containing OFX SGML element)
Definition: ofx_container_generic.cpp:63
OFXApplication::openEntityChange
void openEntityChange(const OpenEntityPtr &para_entity_ptr)
Callback: Receive internal OpenSP state.
Definition: ofx_sgml.cpp:431
OfxStatementContainer
Represents a statement for either a bank account or a credit card account.
Definition: ofx_containers.hh:155
ofx_proc_sgml
int ofx_proc_sgml(LibofxContext *libofx_context, int argc, char *const *argv)
Parses a DTD and OFX file(s)
Definition: ofx_sgml.cpp:444
OFXApplication::endElement
void endElement(const EndElementEvent &event)
Callback: End of an OFX element.
Definition: ofx_sgml.cpp:271
ofx_sgml.hh
OFX/SGML parsing functionality.
WARNING
@ WARNING
Definition: messages.hh:40
OfxPushUpContainer
A container to hold a OFX SGML element for which you want the parent to process it's data elements.
Definition: ofx_containers.hh:96
OfxPositionContainer
Represents an investment position, such as a stock or bond.
Definition: ofx_containers.hh:222
position
SGMLApplication::Position position
Definition: messages.cpp:34
messages.hh
Message IO functionality.
OfxGenericContainer::tag_identifier
std::string tag_identifier
Definition: ofx_containers.hh:42
CharStringtostring
std::string CharStringtostring(const SGMLApplication::CharString source)
Convert OpenSP CharString to a C++ STL string.
Definition: ofx_utilities.cpp:40
PARSER
@ PARSER
Definition: messages.hh:42
DEBUG
@ DEBUG
Definition: messages.hh:32
OfxGenericContainer::add_to_main_tree
virtual int add_to_main_tree()
Add this container to the main tree.
Definition: ofx_container_generic.cpp:74