CMakeDoxygenFilter.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498
  1. /*=============================================================================
  2. Library: CTK
  3. Copyright (c) German Cancer Research Center,
  4. Division of Medical and Biological Informatics
  5. Licensed under the Apache License, Version 2.0 (the "License");
  6. you may not use this file except in compliance with the License.
  7. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. =============================================================================*/
  15. #include <cstdlib>
  16. #include <string>
  17. #include <fstream>
  18. #include <iostream>
  19. #include <assert.h>
  20. //--------------------------------------
  21. // Utilitiy classes and functions
  22. //--------------------------------------
  23. struct ci_char_traits : public std::char_traits<char>
  24. // just inherit all the other functions
  25. // that we don't need to override
  26. {
  27. static bool eq(char c1, char c2)
  28. { return toupper(c1) == toupper(c2); }
  29. static bool ne(char c1, char c2)
  30. { return toupper(c1) != toupper(c2); }
  31. static bool lt(char c1, char c2)
  32. { return toupper(c1) < toupper(c2); }
  33. static bool gt(char c1, char c2)
  34. { return toupper(c1) > toupper(c2); }
  35. static int compare(const char* s1, const char* s2, std::size_t n)
  36. {
  37. while (n-- > 0)
  38. {
  39. if (lt(*s1, *s2)) return -1;
  40. if (gt(*s1, *s2)) return 1;
  41. ++s1; ++s2;
  42. }
  43. return 0;
  44. }
  45. static const char* find(const char* s, int n, char a)
  46. {
  47. while (n-- > 0 && toupper(*s) != toupper(a))
  48. {
  49. ++s;
  50. }
  51. return s;
  52. }
  53. };
  54. typedef std::basic_string<char, ci_char_traits> ci_string;
  55. //--------------------------------------
  56. // Lexer
  57. //--------------------------------------
  58. class CMakeLexer
  59. {
  60. public:
  61. enum Token {
  62. TOK_EOF = -1,
  63. TOK_EOL = -2,
  64. // commands
  65. TOK_MACRO = -3, TOK_ENDMACRO = -4,
  66. TOK_FUNCTION = -5, TOK_ENDFUNCTION = -6,
  67. TOK_DOXYGEN_COMMENT = -7,
  68. TOK_SET = -8,
  69. TOK_STRING_LITERAL = -100,
  70. TOK_NUMBER_LITERAL = -102,
  71. // primary
  72. TOK_IDENTIFIER = -200
  73. };
  74. CMakeLexer(std::istream& is)
  75. : _lastChar(' '), _is(is), _line(1), _col(1)
  76. {}
  77. int getToken()
  78. {
  79. // skip whitespace
  80. while (isspace(_lastChar) && _lastChar != '\r' && _lastChar != '\n')
  81. {
  82. _lastChar = getChar();
  83. }
  84. if (isalpha(_lastChar) || _lastChar == '_')
  85. {
  86. _identifier = _lastChar;
  87. while (isalnum(_lastChar = getChar()) || _lastChar == '-' || _lastChar == '_')
  88. {
  89. _identifier += _lastChar;
  90. }
  91. if (_identifier == "set")
  92. return TOK_SET;
  93. if (_identifier == "function")
  94. return TOK_FUNCTION;
  95. if (_identifier == "macro")
  96. return TOK_MACRO;
  97. if (_identifier == "endfunction")
  98. return TOK_ENDFUNCTION;
  99. if (_identifier == "endmacro")
  100. return TOK_ENDMACRO;
  101. return TOK_IDENTIFIER;
  102. }
  103. if (isdigit(_lastChar))
  104. {
  105. // very lax!! number detection
  106. _identifier = _lastChar;
  107. while (isalnum(_lastChar = getChar()) || _lastChar == '.' || _lastChar == ',')
  108. {
  109. _identifier += _lastChar;
  110. }
  111. return TOK_NUMBER_LITERAL;
  112. }
  113. if (_lastChar == '#')
  114. {
  115. _lastChar = getChar();
  116. if (_lastChar == '!')
  117. {
  118. // found a doxygen comment marker
  119. _identifier.clear();
  120. _lastChar = getChar();
  121. while (_lastChar != EOF && _lastChar != '\n' && _lastChar != '\r')
  122. {
  123. _identifier += _lastChar;
  124. _lastChar = getChar();
  125. }
  126. return TOK_DOXYGEN_COMMENT;
  127. }
  128. // skip the comment
  129. while (_lastChar != EOF && _lastChar != '\n' && _lastChar != '\r')
  130. {
  131. _lastChar = getChar();
  132. }
  133. }
  134. if (_lastChar == '"')
  135. {
  136. _lastChar = getChar();
  137. _identifier.clear();
  138. while (_lastChar != EOF && _lastChar != '"')
  139. {
  140. _identifier += _lastChar;
  141. _lastChar = getChar();
  142. }
  143. // eat the closing "
  144. _lastChar = getChar();
  145. return TOK_STRING_LITERAL;
  146. }
  147. // don't eat the EOF
  148. if (_lastChar == EOF) return TOK_EOF;
  149. // don't eat the EOL
  150. if (_lastChar == '\r' || _lastChar == '\n')
  151. {
  152. if (_lastChar == '\r') _lastChar = getChar();
  153. if (_lastChar == '\n') _lastChar = getChar();
  154. return TOK_EOL;
  155. }
  156. // return the character as its ascii value
  157. int thisChar = _lastChar;
  158. _lastChar = getChar();
  159. return thisChar;
  160. }
  161. std::string getIdentifier() const
  162. {
  163. return std::string(_identifier.c_str());
  164. }
  165. int curLine() const
  166. { return _line; }
  167. int curCol() const
  168. { return _col; }
  169. int getChar()
  170. {
  171. int c = _is.get();
  172. updateLoc(c);
  173. return c;
  174. }
  175. private:
  176. void updateLoc(int c)
  177. {
  178. if (c == '\n' || c == '\r')
  179. {
  180. ++_line;
  181. _col = 1;
  182. }
  183. else
  184. {
  185. ++_col;
  186. }
  187. }
  188. ci_string _identifier;
  189. int _lastChar;
  190. std::istream& _is;
  191. int _line;
  192. int _col;
  193. };
  194. //--------------------------------------
  195. // Parser
  196. //--------------------------------------
  197. class CMakeParser
  198. {
  199. public:
  200. CMakeParser(std::istream& is, std::ostream& os)
  201. : _is(is), _os(os), _lexer(is), _curToken(CMakeLexer::TOK_EOF), _lastToken(CMakeLexer::TOK_EOF)
  202. { }
  203. int curToken()
  204. {
  205. return _curToken;
  206. }
  207. int nextToken()
  208. {
  209. _lastToken = _curToken;
  210. _curToken = _lexer.getToken();
  211. while (_curToken == CMakeLexer::TOK_EOL)
  212. {
  213. // Try to preserve lines in output to allow correct line number referencing by doxygen.
  214. _os << std::endl;
  215. _curToken = _lexer.getToken();
  216. }
  217. return _curToken;
  218. }
  219. void handleMacro()
  220. {
  221. if(!parseMacro())
  222. {
  223. // skip token for error recovery
  224. nextToken();
  225. }
  226. }
  227. void handleFunction()
  228. {
  229. if(!parseFunction())
  230. {
  231. // skip token for error recovery
  232. nextToken();
  233. }
  234. }
  235. void handleSet()
  236. {
  237. // SET(var ...) following a documentation block is assumed to be a variable declaration.
  238. if (_lastToken != CMakeLexer::TOK_DOXYGEN_COMMENT)
  239. {
  240. // No comment block before
  241. nextToken();
  242. } else if(!parseSet())
  243. {
  244. // skip token for error recovery
  245. nextToken();
  246. }
  247. }
  248. void handleDoxygenComment()
  249. {
  250. _os << "///" << _lexer.getIdentifier();
  251. nextToken();
  252. }
  253. void handleTopLevelExpression()
  254. {
  255. // skip token
  256. nextToken();
  257. }
  258. private:
  259. void printError(const char* str)
  260. {
  261. std::cerr << "Error: " << str << " (at line " << _lexer.curLine() << ", col " << _lexer.curCol() << ")";
  262. }
  263. bool parseMacro()
  264. {
  265. if (nextToken() != '(')
  266. {
  267. printError("Expected '(' after MACRO");
  268. return false;
  269. }
  270. nextToken();
  271. std::string macroName = _lexer.getIdentifier();
  272. if (curToken() != CMakeLexer::TOK_IDENTIFIER || macroName.empty())
  273. {
  274. printError("Expected macro name");
  275. return false;
  276. }
  277. _os << macroName << '(';
  278. if (nextToken() == CMakeLexer::TOK_IDENTIFIER)
  279. {
  280. _os << _lexer.getIdentifier();
  281. while (nextToken() == CMakeLexer::TOK_IDENTIFIER)
  282. {
  283. _os << ", " << _lexer.getIdentifier();
  284. }
  285. }
  286. if (curToken() != ')')
  287. {
  288. printError("Missing expected ')'");
  289. }
  290. else
  291. {
  292. _os << ");";
  293. }
  294. // eat the ')'
  295. nextToken();
  296. return true;
  297. }
  298. bool parseSet()
  299. {
  300. if (nextToken() != '(')
  301. {
  302. printError("Expected '(' after SET");
  303. return false;
  304. }
  305. nextToken();
  306. std::string variableName = _lexer.getIdentifier();
  307. if (curToken() != CMakeLexer::TOK_IDENTIFIER || variableName.empty())
  308. {
  309. printError("Expected variable name");
  310. return false;
  311. }
  312. _os << "CMAKE_VARIABLE " << variableName;
  313. nextToken();
  314. while ((curToken() == CMakeLexer::TOK_IDENTIFIER)
  315. || (curToken() == CMakeLexer::TOK_STRING_LITERAL)
  316. || (curToken() == CMakeLexer::TOK_NUMBER_LITERAL))
  317. {
  318. nextToken();
  319. }
  320. if (curToken() != ')')
  321. {
  322. printError("Missing expected ')'");
  323. }
  324. else
  325. {
  326. _os << ";";
  327. }
  328. // eat the ')'
  329. nextToken();
  330. return true;
  331. }
  332. bool parseFunction()
  333. {
  334. if (nextToken() != '(')
  335. {
  336. printError("Expected '(' after FUNCTION");
  337. return false;
  338. }
  339. nextToken();
  340. std::string funcName = _lexer.getIdentifier();
  341. if (curToken() != CMakeLexer::TOK_IDENTIFIER || funcName.empty())
  342. {
  343. printError("Expected function name");
  344. return false;
  345. }
  346. _os << funcName << '(';
  347. if (nextToken() == CMakeLexer::TOK_IDENTIFIER)
  348. {
  349. _os << _lexer.getIdentifier();
  350. while (nextToken() == CMakeLexer::TOK_IDENTIFIER)
  351. {
  352. _os << ", " << _lexer.getIdentifier();
  353. }
  354. }
  355. if (curToken() != ')')
  356. {
  357. printError("Missing expected ')'");
  358. }
  359. else
  360. {
  361. _os << ");";
  362. }
  363. // eat the ')'
  364. nextToken();
  365. return true;
  366. }
  367. std::istream& _is;
  368. std::ostream& _os;
  369. CMakeLexer _lexer;
  370. int _curToken;
  371. int _lastToken;
  372. };
  373. #define STRINGIFY(a) #a
  374. #define DOUBLESTRINGIFY(a) STRINGIFY(a)
  375. int main(int argc, char** argv)
  376. {
  377. assert(argc > 1);
  378. for (int i = 1; i < argc; ++i)
  379. {
  380. std::ifstream ifs(argv[i]);
  381. std::ostream& os = std::cout;
  382. #ifdef USE_NAMESPACE
  383. os << "namespace " << DOUBLESTRINGIFY(USE_NAMESPACE) << " {\n";
  384. #endif
  385. CMakeParser parser(ifs, os);
  386. parser.nextToken();
  387. while (ifs.good())
  388. {
  389. switch (parser.curToken())
  390. {
  391. case CMakeLexer::TOK_EOF:
  392. return ifs.get(); // eat EOF
  393. case CMakeLexer::TOK_MACRO:
  394. parser.handleMacro();
  395. break;
  396. case CMakeLexer::TOK_FUNCTION:
  397. parser.handleFunction();
  398. break;
  399. case CMakeLexer::TOK_SET:
  400. parser.handleSet();
  401. break;
  402. case CMakeLexer::TOK_DOXYGEN_COMMENT:
  403. parser.handleDoxygenComment();
  404. break;
  405. default:
  406. parser.handleTopLevelExpression();
  407. break;
  408. }
  409. }
  410. #ifdef USE_NAMESPACE
  411. os << "}\n";
  412. #endif
  413. }
  414. return EXIT_SUCCESS;
  415. }