| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498 | /*=============================================================================  Library: CTK  Copyright (c) German Cancer Research Center,    Division of Medical and Biological Informatics  Licensed under the Apache License, Version 2.0 (the "License");  you may not use this file except in compliance with the License.  You may obtain a copy of the License at    http://www.apache.org/licenses/LICENSE-2.0  Unless required by applicable law or agreed to in writing, software  distributed under the License is distributed on an "AS IS" BASIS,  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the specific language governing permissions and  limitations under the License.=============================================================================*/#include <cstdlib>#include <string>#include <fstream>#include <iostream>#include <assert.h>//--------------------------------------// Utilitiy classes and functions//--------------------------------------struct ci_char_traits : public std::char_traits<char>    // just inherit all the other functions    //  that we don't need to override{  static bool eq(char c1, char c2)  { return toupper(c1) == toupper(c2); }  static bool ne(char c1, char c2)  { return toupper(c1) != toupper(c2); }  static bool lt(char c1, char c2)  { return toupper(c1) <  toupper(c2); }  static bool gt(char c1, char c2)  { return toupper(c1) >  toupper(c2); }  static int compare(const char* s1, const char* s2, std::size_t n)  {    while (n-- > 0)    {      if (lt(*s1, *s2)) return -1;      if (gt(*s1, *s2)) return 1;      ++s1; ++s2;    }    return 0;  }  static const char* find(const char* s, int n, char a)  {    while (n-- > 0 && toupper(*s) != toupper(a))    {      ++s;    }    return s;  }};typedef std::basic_string<char, ci_char_traits> ci_string;//--------------------------------------// Lexer//--------------------------------------class CMakeLexer{public:  enum Token {    TOK_EOF = -1,    TOK_EOL = -2,    // commands    TOK_MACRO = -3, TOK_ENDMACRO = -4,    TOK_FUNCTION = -5, TOK_ENDFUNCTION = -6,    TOK_DOXYGEN_COMMENT = -7,    TOK_SET = -8,    TOK_STRING_LITERAL = -100,    TOK_NUMBER_LITERAL = -102,    // primary    TOK_IDENTIFIER = -200  };  CMakeLexer(std::istream& is)    : _lastChar(' '), _is(is), _line(1), _col(1)  {}  int getToken()  {    // skip whitespace    while (isspace(_lastChar) && _lastChar != '\r' && _lastChar != '\n')    {      _lastChar = getChar();    }    if (isalpha(_lastChar) || _lastChar == '_')    {      _identifier = _lastChar;      while (isalnum(_lastChar = getChar()) || _lastChar == '-' || _lastChar == '_')      {        _identifier += _lastChar;      }      if (_identifier == "set")        return TOK_SET;      if (_identifier == "function")        return TOK_FUNCTION;      if (_identifier == "macro")        return TOK_MACRO;      if (_identifier == "endfunction")        return TOK_ENDFUNCTION;      if (_identifier == "endmacro")        return TOK_ENDMACRO;      return TOK_IDENTIFIER;    }    if (isdigit(_lastChar))    {      // very lax!! number detection      _identifier = _lastChar;      while (isalnum(_lastChar = getChar()) || _lastChar == '.' || _lastChar == ',')      {        _identifier += _lastChar;      }      return TOK_NUMBER_LITERAL;    }    if (_lastChar == '#')    {      _lastChar = getChar();      if (_lastChar == '!')      {        // found a doxygen comment marker        _identifier.clear();        _lastChar = getChar();        while (_lastChar != EOF && _lastChar != '\n' && _lastChar != '\r')        {          _identifier += _lastChar;          _lastChar = getChar();        }        return TOK_DOXYGEN_COMMENT;      }      // skip the comment      while (_lastChar != EOF && _lastChar != '\n' && _lastChar != '\r')      {        _lastChar = getChar();      }    }    if (_lastChar == '"')    {      _lastChar = getChar();      _identifier.clear();      while (_lastChar != EOF && _lastChar != '"')      {        _identifier += _lastChar;        _lastChar = getChar();      }      // eat the closing "      _lastChar = getChar();      return TOK_STRING_LITERAL;    }    // don't eat the EOF    if (_lastChar == EOF) return TOK_EOF;    // don't eat the EOL    if (_lastChar == '\r' || _lastChar == '\n')    {       if (_lastChar == '\r') _lastChar = getChar();       if (_lastChar == '\n') _lastChar = getChar();       return TOK_EOL;    }    // return the character as its ascii value    int thisChar = _lastChar;    _lastChar = getChar();    return thisChar;  }  std::string getIdentifier() const  {    return std::string(_identifier.c_str());  }  int curLine() const  { return _line; }  int curCol() const  { return _col; }  int getChar()  {    int c = _is.get();    updateLoc(c);    return c;  }private:  void updateLoc(int c)  {    if (c == '\n' || c == '\r')    {      ++_line;      _col = 1;    }    else    {      ++_col;    }  }  ci_string _identifier;  int _lastChar;  std::istream& _is;  int _line;  int _col;};//--------------------------------------// Parser//--------------------------------------class CMakeParser{public:  CMakeParser(std::istream& is, std::ostream& os)    : _is(is), _os(os), _lexer(is), _curToken(CMakeLexer::TOK_EOF), _lastToken(CMakeLexer::TOK_EOF)  { }  int curToken()  {    return _curToken;  }  int nextToken()  {    _lastToken = _curToken;    _curToken = _lexer.getToken();    while (_curToken == CMakeLexer::TOK_EOL)    {      // Try to preserve lines in output to allow correct line number referencing by doxygen.      _os << std::endl;      _curToken = _lexer.getToken();    }    return _curToken;  }  void handleMacro()  {    if(!parseMacro())    {      // skip token for error recovery      nextToken();    }  }  void handleFunction()  {    if(!parseFunction())    {      // skip token for error recovery      nextToken();    }  }  void handleSet()  {    // SET(var ...) following a documentation block is assumed to be a variable declaration.    if (_lastToken != CMakeLexer::TOK_DOXYGEN_COMMENT)    {      // No comment block before      nextToken();    } else if(!parseSet())    {      // skip token for error recovery      nextToken();    }  }  void handleDoxygenComment()  {    _os << "///" << _lexer.getIdentifier();    nextToken();  }  void handleTopLevelExpression()  {    // skip token    nextToken();  }private:  void printError(const char* str)  {    std::cerr << "Error: " << str << " (at line " << _lexer.curLine() << ", col " << _lexer.curCol() << ")";  }  bool parseMacro()  {    if (nextToken() != '(')    {      printError("Expected '(' after MACRO");      return false;    }    nextToken();    std::string macroName = _lexer.getIdentifier();    if (curToken() != CMakeLexer::TOK_IDENTIFIER || macroName.empty())    {      printError("Expected macro name");      return false;    }    _os << macroName << '(';    if (nextToken() == CMakeLexer::TOK_IDENTIFIER)    {      _os << _lexer.getIdentifier();      while (nextToken() == CMakeLexer::TOK_IDENTIFIER)      {        _os << ", " << _lexer.getIdentifier();      }    }    if (curToken() != ')')    {      printError("Missing expected ')'");    }    else    {      _os << ");";    }    // eat the ')'    nextToken();    return true;  }  bool parseSet()  {    if (nextToken() != '(')    {      printError("Expected '(' after SET");      return false;    }    nextToken();    std::string variableName = _lexer.getIdentifier();    if (curToken() != CMakeLexer::TOK_IDENTIFIER || variableName.empty())    {      printError("Expected variable name");      return false;    }    _os << "CMAKE_VARIABLE " << variableName;    nextToken();    while ((curToken() == CMakeLexer::TOK_IDENTIFIER)           || (curToken() == CMakeLexer::TOK_STRING_LITERAL)           || (curToken() == CMakeLexer::TOK_NUMBER_LITERAL))    {      nextToken();    }    if (curToken() != ')')    {      printError("Missing expected ')'");    }    else    {      _os << ";";    }    // eat the ')'    nextToken();    return true;  }  bool parseFunction()  {    if (nextToken() != '(')    {      printError("Expected '(' after FUNCTION");      return false;    }    nextToken();    std::string funcName = _lexer.getIdentifier();    if (curToken() != CMakeLexer::TOK_IDENTIFIER || funcName.empty())    {      printError("Expected function name");      return false;    }    _os << funcName << '(';    if (nextToken() == CMakeLexer::TOK_IDENTIFIER)    {      _os << _lexer.getIdentifier();      while (nextToken() == CMakeLexer::TOK_IDENTIFIER)      {        _os << ", " << _lexer.getIdentifier();      }    }    if (curToken() != ')')    {      printError("Missing expected ')'");    }    else    {      _os << ");";    }    // eat the ')'    nextToken();    return true;  }  std::istream& _is;  std::ostream& _os;  CMakeLexer _lexer;  int _curToken;  int _lastToken;};#define STRINGIFY(a) #a#define DOUBLESTRINGIFY(a) STRINGIFY(a)int main(int argc, char** argv){  assert(argc > 1);  for (int i = 1; i < argc; ++i)  {     std::ifstream ifs(argv[i]);    std::ostream& os = std::cout;        #ifdef USE_NAMESPACE    os << "namespace " << DOUBLESTRINGIFY(USE_NAMESPACE) << " {\n";    #endif        CMakeParser parser(ifs, os);    parser.nextToken();    while (ifs.good())    {      switch (parser.curToken())      {      case CMakeLexer::TOK_EOF:        return ifs.get(); // eat EOF      case CMakeLexer::TOK_MACRO:        parser.handleMacro();        break;      case CMakeLexer::TOK_FUNCTION:        parser.handleFunction();        break;      case CMakeLexer::TOK_SET:        parser.handleSet();        break;      case CMakeLexer::TOK_DOXYGEN_COMMENT:        parser.handleDoxygenComment();        break;      default:        parser.handleTopLevelExpression();        break;      }    }        #ifdef USE_NAMESPACE    os << "}\n";    #endif  }  return EXIT_SUCCESS;}
 |