/*! \file \brief Делаем для теста раскраску плюсового кода в HTML - обработка препроцессора вынесена в библиотечный класс CcPreprocessorFilter #include "umba/umba.h" // #include "umba/tokenizer.h" #include "umba/assert.h" #include "umba/filename.h" #include "umba/filesys.h" #include "umba/tokenizer/token_filters.h" #include "umba/app_main.h" // #include "umba/tokenizer/lang/cpp.h" // #include "umba/debug_helpers.h" // #include "umba/text_position_info.h" #include "umba/iterator.h" #include "umba/the.h" // #include "marty_cpp/src_normalization.h" // #include <iostream> #include <map> #include <set> #include <unordered_set> #include <unordered_map> #include <deque> #include <sstream> #define USE_SIMPLE_NUMBER_SUFFIX_GLUING_FILTER // #define NUMBER_PRINTING_PRINT_PARSED_VALUE // #define PRINT_ONLY_NUMBERS // #define USE_TRY_CATCH //#define DUPLICATE_TO_STD_OUT #include "utils.h" // using std::cout; using std::cerr; /* color:blue } VSCode statements AF00DB keywords 0000FF known types 267F99 brackets 319331 comment 008000 highlighted str 008000 str A31515 .m .com { color:green } .m .str { color:#8b0000 } .m .lineQuote { color:#c1e5b9 } .m .lineQuote.level1 { color:#137900 } .m .lineQuote.level2 { color:#74b967 } .m .lineQuote.level3 { color:#9fd095 } .m .tagline { color:#d3acac; text-indent:5mm; padding-top:5mm } .m TABLE.formatter { margin:5px } .m TH.formatter { font-family:Verdana,Geneva,sans-serif; background- #include "css_style.h" std::string inputFilename; UMBA_MAIN() { // auto t1 = getCharClassTable(); // auto t2 = getTrieVector(); // auto t3 = getString(); // auto t4 = getIterator(); using namespace umba::tokenizer; std::vector<std::string> inputFiles; for(int argIdx=1; argIdx<argc; ++argIdx) { if (false) { } else { inputFiles.emplace_back(argv[argIdx]); } } if (umba::isDebuggerPresent() || inputFiles.empty()) { std::string cwd = umba::filesys::getCurrentDirectory(); std::cout << "Working Dir: " << cwd << "\n"; std::string rootPath; #if (defined(WIN32) || defined(_WIN32)) if (winhelpers::isProcessHasParentOneOf({"devenv"})) { // По умолчанию студия задаёт текущим каталогом На уровень выше от того, где лежит бинарник rootPath = umba::filename::makeCanonical(umba::filename::appendPath<std::string>(cwd, "..\\..\\..\\")); //argsParser.args.push_back("--batch-output-root=D:/temp/mdpp-test"); } else if (winhelpers::isProcessHasParentOneOf({"code"})) { // По умолчанию VSCode задаёт текущим каталогом тот, где лежит бинарник rootPath = umba::filename::makeCanonical(umba::filename::appendPath<std::string>(cwd, "..\\..\\..\\..\\")); //argsParser.args.push_back("--batch-output-root=C:/work/temp/mdpp-test"); } else { //rootPath = umba::filename::makeCanonical(umba::filename::appendPath<std::string>(cwd, "..\\..\\..\\")); } //#endif if (!rootPath.empty()) rootPath = umba::filename::appendPathSepCopy(rootPath); #endif inputFiles.clear(); // inputFiles.emplace_back(umba::filename::appendPath(rootPath, std::string("_libs/umba/preprocessor.h"))); // inputFiles.emplace_back(umba::filename::appendPath(rootPath, std::string("_libs/umba/the.h"))); // inputFiles.emplace_back(umba::filename::appendPath(rootPath, std::string("_libs/umba/stl_keil_initializer_list.h"))); // inputFiles.emplace_back(umba::filename::appendPath(rootPath, std::string("_libs/umba/stl_keil_type_traits.h"))); // inputFiles.emplace_back(umba::filename::appendPath(rootPath, std::string("_libs/umba/string_plus.h"))); // inputFiles.emplace_back(umba::filename::appendPath(rootPath, std::string("_libs/umba/rgbquad.h"))); // inputFiles.emplace_back(umba::filename::appendPath(rootPath, std::string("_libs/umba/"))); // inputFiles.emplace_back(umba::filename::appendPath(rootPath, std::string("_libs/marty_decimal/tests/src/regression_tests.cpp"))); // inputFiles.emplace_back(umba::filename::appendPath(rootPath, std::string("tests/tokenizer/number_ull.cpp"))); inputFiles.emplace_back(umba::filename::appendPath(rootPath, std::string("tests/suffix_gluing_sample.h"))); } // Пока с kind не определился, и пихаем в keywords всё std::unordered_map<std::string, int> cppKeywords; // std::unordered_map<std::string, int> cppPreprocessorKeywords; { // https://en.cppreference.com/w/cpp/keyword std::vector<std::string> allCppKeywords = { "alignas", "alignof", "and", "and_eq", "asm", "atomic_cancel", "atomic_commit", "atomic_noexcept", "auto" , "bitand", "bitor", "bool", "break" , "case", "catch", "char", "char8_t", "char16_t", "char32_t", "class", "compl", "concept", "const", "consteval" , "constexpr", "constinit", "const_cast", "continue", "co_await", "co_return", "co_yield" , "decltype", "default", "delete", "do", "double", "dynamic_cast" , "else", "enum", "explicit", "export", "extern" , "false", "float", "for", "friend" , "goto" , "if", "inline", "int" , "long", "mutable" , "namespace", "new", "noexcept", "not", "not_eq", "nullptr" , "operator", "or", "or_eq", "private", "protected", "public" , "reflexpr", "register", "reinterpret_cast", "requires", "return" , "short", "signed", "sizeof", "static", "static_assert", "static_cast", "struct", "switch", "synchronized" , "template", "this", "thread_local", "throw", "true", "try", "typedef", "typeid", "typename" , "union", "unsigned", "using", "virtual", "void", "volatile" , "wchar_t", "while", "xor", "xor_eq" // identifiers with special meaning , "final", "override", "transaction_safe", "transaction_safe_dynamic", "import", "module" }; for(const auto &kw : allCppKeywords) { cppKeywords[kw] = 0; } } auto tokenizerBuilder = umba::tokenizer::makeTokenizerBuilderCpp<char>(); using TokenizerBuilderType = decltype(tokenizerBuilder); //using tokenizer_type = std::decay<decltype(tokenizer)>; using tokenizer_type = typename TokenizerBuilderType::tokenizer_type; using InputIteratorType = typename tokenizer_type::iterator_type; using tokenizer_char_type = typename tokenizer_type::value_type; using messages_string_type = typename tokenizer_type::messages_string_type; using token_parsed_data_type = typename tokenizer_type::token_parsed_data_type; std::ostringstream oss; bool bOk = true; bool inPreprocessor = false; auto tokenHandler = [&]( auto &tokenizer , bool bLineStart, payload_type tokenType , InputIteratorType b, InputIteratorType e , token_parsed_data_type parsedData // std::basic_string_view<tokenizer_char_type> parsedData , messages_string_type &errMsg ) -> bool { UMBA_USED(parsedData); if (tokenType==UMBA_TOKENIZER_TOKEN_CTRL_RST || tokenType==UMBA_TOKENIZER_TOKEN_CTRL_FIN) return true; else if (tokenType==UMBA_TOKENIZER_TOKEN_CTRL_CC_PP_START) { oss << "<span class=\"pp\">"; #if defined(DUPLICATE_TO_STD_OUT) std::cout << "<span class=\"pp\">"; #endif inPreprocessor = true; return true; } else if (tokenType==UMBA_TOKENIZER_TOKEN_CTRL_CC_PP_END) { oss << "</span>"; #if defined(DUPLICATE_TO_STD_OUT) std::cout << "</span>"; #endif inPreprocessor = false; return true; } else if (tokenType==UMBA_TOKENIZER_TOKEN_CTRL_CC_PP_DEFINE) { return true; } else if (tokenType==UMBA_TOKENIZER_TOKEN_CTRL_CC_PP_INCLUDE) { return true; } if (tokenType&UMBA_TOKENIZER_TOKEN_CTRL_FLAG) { return true; // Управляющий токен, не надо выводить, никакой нагрузки при нём нет } if (tokenType==UMBA_TOKENIZER_TOKEN_IDENTIFIER) { auto identStr = umba::iterator::makeString(b, e); if (!inPreprocessor) { if (cppKeywords.find(identStr)!=cppKeywords.end()) tokenType = UMBA_TOKENIZER_TOKEN_KEYWORD_SET1_FIRST; } else // preprocessor { if (cppKeywords.find(identStr)!=cppKeywords.end()) tokenType = UMBA_TOKENIZER_TOKEN_KEYWORD_SET1_FIRST; } } printTokenHtml(oss, tokenType, b, e); if (tokenType>=UMBA_TOKENIZER_TOKEN_NUMBER_LITERAL_FIRST && tokenType<=UMBA_TOKENIZER_TOKEN_NUMBER_LITERAL_LAST) { oss.flush(); // https://en.cppreference.com/w/cpp/utility/variant/get_if // https://en.cppreference.com/w/cpp/utility/variant/get if (tokenType&UMBA_TOKENIZER_TOKEN_FLOAT_FLAG) { auto numericLiteralData = std::get<typename tokenizer_type::FloatNumericLiteralData>(parsedData); #if defined(NUMBER_PRINTING_PRINT_PARSED_VALUE) oss << " " << numericLiteralData.data << " "; if (numericLiteralData.fIntegerOverflow) oss << "integer part overflow "; if (numericLiteralData.fFractionalOverflow) oss << "floating part overflow "; #endif } else { auto numericLiteralData = std::get<typename tokenizer_type::IntegerNumericLiteralData>(parsedData); #if defined(NUMBER_PRINTING_PRINT_PARSED_VALUE) oss << " " << numericLiteralData.data << " "; if (numericLiteralData.fOverflow) oss << "overflow "; #endif } } return true; }; auto tokenizer = umba::tokenizer::makeTokenizerCpp( tokenizerBuilder , tokenHandler ); tokenizer.unexpectedHandler = [&](auto &tokenizer, InputIteratorType it, InputIteratorType itEnd, const char* srcFile, int srcLine) -> bool { printError(std::cout, inputFilename, UMBA_TOKENIZER_TOKEN_UNEXPECTED, it, itEnd, srcFile, srcLine); return false; }; tokenizer.reportUnknownOperatorHandler = [&](auto &tokenizer, InputIteratorType b, InputIteratorType e) { //cout << "Possible unknown operator: '" << umba::iterator::makeString(b, e) << "'\n"; UMBA_USED(b); UMBA_USED(e); }; tokenizer.reportStringLiteralMessageHandler = [&](auto &tokenizer, bool bErr, InputIteratorType it, const messages_string_type &msg) { UMBA_USED(bErr); UMBA_USED(it); UMBA_USED(msg); }; if (inputFiles.empty()) { std::cout << "No input files taken\n"; return 1; } #if defined(WIN32) || defined(_WIN32) marty_cpp::ELinefeedType outputLinefeed = marty_cpp::ELinefeedType::crlf; #else marty_cpp::ELinefeedType outputLinefeed = marty_cpp::ELinefeedType::lf; #endif for(const auto fn: inputFiles) { inputFilename = fn; std::string text; std::cout << "\nProcessing: '" << inputFilename << "'\n"; if (!umba::filesys::readFile(inputFilename, text)) { std::cout << "Failed to read input file\n"; continue; } if (text.empty()) { std::cout << "Input file is empty\n"; continue; } if (text.back()!='\n' && text.back()!='\r') { std::cout << "Warning: no linefeed at end of file\n"; } oss = std::ostringstream(); bOk = true; oss<<"<!DOCTYPE html>\n<html>\n<head>\n<meta charset=\"utf-8\"/>\n<style>\n" << cssStyle << "\n</style>\n</head>\n<body>\n<pre>\n"; #if defined(USE_TRY_CATCH) try { #endif auto itBegin = InputIteratorType(text.data(), text.size()); auto itEnd = InputIteratorType(); tokenizer.tokenizeInit(); InputIteratorType it = itBegin; for(; it!=itEnd && bOk; ++it) { if (!tokenizer.tokenize(it, itEnd)) { bOk = false; } } if (bOk) { bOk = tokenizer.tokenizeFinalize(itEnd); } //if (bOk) { oss<<"</pre>\n</body>\n</html>\n"; if (inputFilename.empty()) { std::cout << oss.str() << "\n"; } else { #if defined(DUPLICATE_TO_STD_OUT) std::cout << oss.str() << "\n"; #endif auto resultText = marty_cpp::converLfToOutputFormat(oss.str(), outputLinefeed); auto outputFilename = umba::filename::replaceExtention(inputFilename, std::string("html")); std::cout << "Writting output to '" << outputFilename << "' - "; if (!umba::filesys::writeFile(outputFilename, resultText, true /* overwrite */ )) std::cout << "Failed"; else std::cout << "Success"; std::cout << "\n"; } } #if defined(USE_TRY_CATCH) } catch(const std::exception &e) { #if defined(DUPLICATE_TO_STD_OUT) std::cout << oss.str() << "\n"; #endif // !!! Inform about exception auto resultText = marty_cpp::converLfToOutputFormat("!!!" + oss.str(), outputLinefeed); auto outputFilename = umba::filename::replaceExtention(inputFilename, std::string("html")); umba::filesys::writeFile(outputFilename, resultText, true /* overwrite */ ); std::cout << "!!!\n"; } #endif } return bOk ? 0 : 1; }