// ./stgen3 src dest 7.98s user 0.32s system 102% cpu 8.123 total #include #include #include #include #include #include #include #include #include #include #include "util.h" #include "markdown.h" namespace fs = std::filesystem; const std::string TEMPLATE_CODE_START = "{{"; const std::string TEMPLATE_CODE_END = "}}"; enum job_type { COPY_FILE = 1, MARKDOWN = 1 << 1, TEMPLATE = 1 << 2, DELETE_FILE = 1 << 3, MAKE_DIR = 1 << 4, WRITE_ARTICLE = 1 << 5, POSTPROCESS_HTML = 1 << 6 }; struct blog_item { job_type type; fs::path src; time_t post_date; std::unordered_map properties; }; class substitution_plugin { public: std::vector get_arguments(const std::string &invocation, int numargs); /* Must be globally unique: invocation name in the template */ virtual std::string hook_name() = 0; /* Must return lengh of replaced text */ virtual int perform_substitution(int start, int end, const std::string &invocation, std::string &file_text, const std::unordered_map &properties) = 0; virtual ~substitution_plugin() = default; }; class s2_substitution_plugin : public substitution_plugin { public: int perform_substitution(int start, int end, const std::string &invocation, std::string &file_text, const std::unordered_map &properties) override { return 0; } /* Must return lengh of replaced text */ virtual int perform_substitution(int start, int end, const std::string &invocation, std::string &file_text, const std::unordered_map &properties, const std::unordered_map & pages) = 0; }; /* * Returns vector of arguments from string of the form " cmdname:arg1 :arg2 ". * Trims whitespace from around arguments so the result is * {"cmdname", "arg1", "arg2"} * * @numargs: the number of argument to get after the command name: in this case * 2. This is so the last argument can contain ':', since it just reads until * the end. * * if it cannot find numargs arguments, it finds as many arguments as it can * and retuns them, so it is neccessary to check the size of the returned * vector before use. */ std::vector substitution_plugin::get_arguments(const std::string &invocation, int numargs) { std::vector args; int next = invocation.find(":"); if (next == std::string::npos) { return {}; } args.push_back(invocation.substr(0, next)); int last = next + 1; for (int i = 1; i < numargs; i++) { next = invocation.find(":", last); if (next == std::string::npos) { spdlog::warn("get_arguments: not enough arguments"); break; } args.push_back(trim_whitespace(invocation.substr(last, next - last))); last = next + 1; } args.push_back(invocation.substr(last)); return args; } class file_transclude_plugin : public substitution_plugin { public: std::string hook_name() override {return "include";}; int perform_substitution(int start, int end, const std::string &invocation, std::string &file_text, const std::unordered_map &properties) override { auto args = get_arguments(invocation, 1); std::string filename {args.at(1)}; fs::path path; std::string subst_file; if (properties.count("transclude base") && fs::exists(path = fs::path(properties.at("transclude_base")).append(filename))) { subst_file = read_file(path); } else if (fs::exists(path = fs::path(properties.at("current_directory")).append(filename))) { subst_file = read_file(path); } else if(fs::exists(path = fs::path(properties.at("source_root")).append(filename))) { subst_file= read_file(path); } else { return 0; } file_text.replace(start, end - start, subst_file); return subst_file.length(); } }; class mmd_snippet_transclude_plugin : public substitution_plugin { public: std::string hook_name() override {return "md";}; int perform_substitution(int start, int end, const std::string &invocation, std::string &file_text, const std::unordered_map &properties) override { auto args = get_arguments(invocation, 1); fs::path path; mmd::markdown_parser p {}; if (args.size() == 2) { std::string subst_text = p.parse_to_html(args.at(1)); file_text.replace(start, end - start, subst_text); return subst_text.length(); } return 0; } }; class variable_transclude_plugin final : public substitution_plugin { public: std::string hook_name() override {return "";}; int perform_substitution(int start, int end, const std::string &invocation, std::string &file_text, const std::unordered_map &properties) override { auto args = get_arguments(invocation, 1); if (args.size() != 2) { return 0; } std::string name {args.at(1)}; std::string subst_text = TEMPLATE_CODE_START + invocation + TEMPLATE_CODE_END; int rval = 0; if (properties.count(name)) { subst_text = properties.at(name); rval = subst_text.length(); } file_text.replace(start, end - start, subst_text); return rval; } }; class ifdef_plugin : public substitution_plugin { protected: struct do_replace { std::string::size_type first; std::string::size_type second; std::string body; bool name_exists; bool error; }; do_replace should_substitute(int start, int end, const std::string &invocation, std::string &file_text, const std::unordered_map &properties) { do_replace val {}; val.first = invocation.find(":") + 1; val.second = invocation.find(":", val.first); auto args = get_arguments(invocation, 2); if (args.size() != 3) { val.error = true; return val; } std::string name {args.at(1)}; // std::string name = invocation.substr(val.first, val.second - val.first); std::string subst_text = ""; val.body = args.at(2); // invocation.substr(val.second + 1); if (properties.count(name)) { val.name_exists = true; } return val; } public: std::string hook_name() override {return "ifdef";}; int perform_substitution(int start, int end, const std::string &invocation, std::string &file_text, const std::unordered_map &properties) override { do_replace val = should_substitute(start, end, invocation, file_text, properties); std::string subst_text = ""; if (val.error) { spdlog::warn("Bad ifdef syntax."); } else { if (val.name_exists) { subst_text = val.body; } } file_text.replace(start, end - start, subst_text); return subst_text.length(); } }; class comment_plugin : public substitution_plugin { public: std::string hook_name() override {return "#";}; int perform_substitution(int start, int end, const std::string &invocation, std::string &file_text, const std::unordered_map &properties) override { std::string subst_text = "\n"; file_text.replace(start, end - start, subst_text); return subst_text.length(); // success substituting 0 } }; class ifndef_plugin : public ifdef_plugin { public: std::string hook_name() override {return "ifndef";}; int perform_substitution(int start, int end, const std::string &invocation, std::string &file_text, const std::unordered_map &properties) override { do_replace val = should_substitute(start, end, invocation, file_text, properties); std::string subst_text = ""; if (val.error) { spdlog::warn("Bad ifdef syntax."); } else { if (!val.name_exists) { subst_text = val.body; } } file_text.replace(start, end - start, subst_text); return subst_text.length(); } }; class file_index_plugin : public substitution_plugin { public: struct post_entry { fs::path path; time_t date; std::string title; std::multimap sub_directories; }; std::set exclude_filenames {}; mmd::markdown_parser parser {}; file_index_plugin() {} file_index_plugin(std::set exclude) : exclude_filenames(exclude) {} /** * Recursively get a sorted unordered_map of directories. * * Directory must exist. Does not handle any errors. * */ std::multimap get_directory_list(const fs::path &dir, const std::unordered_map &properties) { // todo support custom depth std::multimap entries; for (auto &p : fs::directory_iterator(dir)) { // skip conditions if (file_ext(p.path()) == "template") { // skip templates continue; } if (file_ext(p.path()) == "draft") { // skip drafts continue; } if (p.path().filename().string().at(0) == '.') { // skip hidden files continue; } if (exclude_filenames.contains(p.path().filename())) { continue; } if (p.is_directory()) { auto m = get_directory_list(p, properties); std::string title = p.path().filename(); title += "/"; if (!m.empty()) { post_entry smallest_entry = m.begin()->second; entries.insert({smallest_entry.date, {p, smallest_entry.date, title, m}}); } else { time_t t = to_time_t(fs::last_write_time(p)); entries.insert({t, {p, t, title}}); } } else { std::string file = read_file(fs::path(p)); if (parser.get_property(file, "noindex")) { continue; } std::optional date_time = parser.get_property(file, "date"); std::optional article_title = parser.get_property(file, "title"); struct post_entry post; // add date if (date_time) { date::sys_seconds timepoint; std::chrono::file_clock f; std::istringstream in {*date_time}; in >> date::parse(properties.at("date-in-format"), timepoint); auto t = std::chrono::system_clock::to_time_t(timepoint); post = {p, t}; } else { fs::file_time_type t = fs::last_write_time(p); auto syst = to_time_t(t); post = {p, syst}; } // add title if (article_title) { post.title = *article_title; } else { post.title = post.path.filename(); } entries.insert({post.date, post}); } } return entries; } public: std::string hook_name() override {return "postlist";}; int perform_substitution(int start, int end, const std::string &invocation, std::string &file_text, const std::unordered_map &properties) override { int rval = 0; auto args = get_arguments(invocation, 1); std::string relpath = args.at(1); fs::path path; if (relpath.at(0) == '/') { // path relative to site root path = fs::path(properties.at("source_root")).append(relpath.substr(1)); } else { // path relative to current directory path = fs::path(properties.at("current_directory")).append(relpath); } std::string markdown_index = ""; if (fs::exists(path)) { auto m = get_directory_list(path,properties); for (auto entry = m.rbegin(); entry != m.rend(); entry++) { std::ostringstream ss; if (entry->second.date != time_t {0}) { date::to_stream(ss, properties.at("date-out-format").c_str(), std::chrono::system_clock::from_time_t(entry->second.date)); } ss << "\n: "; std::string url = stgen::compute_url(entry->second.path, properties); std::string title = entry->second.title; if (fs::is_directory(entry->second.path)) { ss << "[" << title << "](" << url << ")\n\n"; } else { ss << "[" << title << "](" << url << ")\n\n"; } markdown_index += ss.str(); } } else { spdlog::warn("build index: path not exist: {}", path.string()); } rval = markdown_index.length(); if (rval) { file_text.replace(start, end - start, markdown_index); } return rval; } }; class feed_builder { public: enum feed_format { ATOM, RSS }; private: feed_format type = ATOM; tinyxml2::XMLDocument feed; tinyxml2::XMLElement *e_feed; public: feed_builder(const std::string &url, const std::string &title, const std::vector &author_names, feed_format type) : type(type) { using namespace tinyxml2; auto decl = feed.NewDeclaration(); feed.InsertFirstChild(decl); if (ATOM == type) { e_feed = feed.NewElement("feed"); e_feed->SetAttribute("xmlns", "http://www.w3.org/2005/Atom"); feed.InsertEndChild(e_feed); } else if (RSS == type) { auto i_feed = feed.NewElement("rss"); i_feed->SetAttribute("version", 2.0); feed.InsertEndChild(i_feed); e_feed = i_feed->InsertNewChildElement("channel"); } auto e_title = e_feed->InsertNewChildElement("title"); e_title->SetText(title.c_str()); auto e_link = e_feed->InsertNewChildElement("link"); e_link->SetAttribute("rel", "self"); std::string uurl = url; e_link->SetAttribute("href", uurl.c_str()); if (ATOM == type) { auto e_updated = e_feed->InsertNewChildElement("updated"); /* shouldn't really be generation time: should be last modification time * - might be nice to check the diff against the old feed, or only * update when the site has been changed. */ auto timepoint = std::chrono::system_clock::now(); std::ostringstream ss; date::to_stream(ss, "%FT%TZ", timepoint); e_updated->SetText(ss.str().c_str()); auto e_author = e_feed->InsertNewChildElement("author"); for (auto author: author_names) { auto e_author_name = e_author->InsertNewChildElement("name"); e_author_name->SetText(author.c_str()); } auto e_feed_id = e_feed->InsertNewChildElement("id"); e_feed_id->SetText(uurl.c_str()); } else if (RSS == type) { auto e_updated = e_feed->InsertNewChildElement("pubDate"); /* shouldn't really be generation time: should be last modification time * - might be nice to check the diff against the old feed, or only * update when the site has been changed. */ auto timepoint = std::chrono::system_clock::now(); std::ostringstream ss; date::to_stream(ss, "%a, %d %b %Y %T %Z", timepoint); e_updated->SetText(ss.str().c_str()); auto e_feed_id = e_feed->InsertNewChildElement("guid"); e_feed_id->SetText(uurl.c_str()); auto e_descr = e_feed->InsertNewChildElement("description"); e_descr->SetText(title.c_str()); } } tinyxml2::XMLElement *add_article(std::string const &url, std::string const &title, time_t updated) { tinyxml2::XMLElement * article; if (ATOM == type) { article = e_feed->InsertNewChildElement("entry"); auto id = article->InsertNewChildElement("id"); id->SetText(url.c_str()); auto a_title = article->InsertNewChildElement("title"); a_title->SetText(title.c_str()); auto a_updated = article->InsertNewChildElement("updated"); auto timepoint = std::chrono::system_clock::from_time_t(updated); std::ostringstream ss; date::to_stream(ss, "%FT%TZ", timepoint); a_updated->SetText(ss.str().c_str()); auto link = article->InsertNewChildElement("link"); link->SetAttribute("rel", "alternate"); link->SetAttribute("href", url.c_str()); } else if (RSS == type) { article = e_feed->InsertNewChildElement("item"); auto a_title = article->InsertNewChildElement("title"); a_title->SetText(title.c_str()); auto link = article->InsertNewChildElement("link"); link->SetAttribute("rel", "alternate"); link->SetAttribute("href", url.c_str()); } return article; } tinyxml2::XMLElement *add_article(std::string const &url, std::string const &title, time_t updated, time_t published) { auto article = add_article(url, title, updated); std::string key; if (ATOM == type) { key = "published"; auto timepoint = std::chrono::system_clock::from_time_t(published); std::ostringstream ss; date::to_stream(ss, "%FT%TZ", timepoint); auto a_published = article->InsertNewChildElement(key.c_str()); a_published->SetText(ss.str().c_str()); } else if (RSS == type) { key = "pubDate"; auto timepoint = std::chrono::system_clock::from_time_t(published); std::ostringstream ss; date::to_stream(ss, "%a, %d %b %Y %T %Z", timepoint); auto a_published = article->InsertNewChildElement(key.c_str()); a_published->SetText(ss.str().c_str()); } return article; } std::string str() { tinyxml2::XMLPrinter p; feed.Print(&p); return std::string {p.CStr()}; } tinyxml2::XMLElement *add_article(std::string const &url, std::string const &title, const std::string &category, time_t updated, time_t published) { auto article = add_article(url, title, updated, published); auto e_category = article->InsertNewChildElement("category"); if (ATOM == type) { e_category->SetAttribute("term", category.c_str()); } else if (RSS == type) { e_category->SetText(category.c_str()); } return article; } tinyxml2::XMLElement *add_article(std::string const &url, std::string const &title, const std::string &category, time_t updated, time_t published, std::string content) { auto article = add_article(url, title, category, updated, published); if (ATOM == type) { auto e_content = article->InsertNewChildElement("content"); e_content->SetText(content.c_str()); e_content->SetAttribute("type", "html"); } else if (RSS == type) { auto e_content = article->InsertNewChildElement("description"); e_content->SetText(content.c_str()); e_content->SetAttribute("type", "html"); } return article; } }; std::multimap get_sorted_post_list(const std::string &cs_directories, const std::unordered_map &properties, const std::unordered_map & pages) { std::vector paths; std::string::size_type first = 0; auto relpath = cs_directories; std::string::size_type next = relpath.find(","); fs::path path; do { if (next == std::string::npos) next = relpath.length(); std::string spath = relpath.substr(first, next - first); if (spath.at(0) == '/') { // path relative to site root path = fs::canonical(fs::path(properties.at("source_root")).append(spath.substr(1))); } else { // path relative to current directory path = fs::canonical(fs::path(properties.at("current_directory")).append(spath)); } paths.push_back(path); first = next + 1; next = relpath.find(",", first); } while (first < relpath.length()); std::multimap feed_items; for (std::string dir: paths) { for (auto &page: pages) { if (!(page.second.type & (job_type::TEMPLATE | job_type::MARKDOWN))) { continue; } if (page.first.filename() == "index.html" || file_ext(page.first.filename()) == "xml") { continue; } if (page.second.src.string() == properties.at("current_file")) { continue; } if (page.second.src.string().find(dir) != std::string::npos) { // this is a page to add feed_items.insert({page.second.post_date, page.second}); } } } return feed_items; } class rss_feed_plugin : public s2_substitution_plugin { public: std::string hook_name() override {return "feed";}; int perform_substitution(int start, int end, const std::string &invocation, std::string &file_text, const std::unordered_map &properties, const std::unordered_map & pages) override { auto args = get_arguments(invocation, 2); std::string type = args.at(1); std::string relpath = args.at(2); feed_builder::feed_format format; if (type == "rss") { format = feed_builder::feed_format::RSS; } else if (type == "atom") { format = feed_builder::feed_format::ATOM; } auto feed_items = get_sorted_post_list(relpath, properties, pages); feed_builder f {stgen::compute_url(properties.at("current_file"), properties), properties.at("name"), {properties.at("author")}, format}; for (auto entry = feed_items.rbegin(); entry != feed_items.rend(); entry++) { // write into rss feed std::string category = entry->second.src.parent_path().filename().string(); std::string title; if (entry->second.properties.count("title")) title = entry->second.properties.at("title"); else title = entry->second.src.filename().string(); std::string content; if (entry->second.properties.count("original")) { content = entry->second.properties.at("original"); } else { // fall back to full generated page. content = entry->second.properties.at("body"); } f.add_article(stgen::compute_url(entry->second.src, properties), title, category, entry->second.post_date, entry->second.post_date, content); } std::string text = f.str(); file_text.replace(start, end - start, text); return text.length(); } }; class microblog_plugin : public s2_substitution_plugin { public: std::string hook_name() override {return "microblog";}; int perform_substitution(int start, int end, const std::string &invocation, std::string &file_text, const std::unordered_map &properties, const std::unordered_map & pages) override { auto args = get_arguments(invocation, 1); std::string relpath = args.at(1); std::multimap feed_items = get_sorted_post_list(relpath, properties, pages); std::string text = ""; // feed_builder f {compute_url(properties.at("current_file"), properties), properties.at("name"), {properties.at("author")}}; for (auto entry = feed_items.rbegin(); entry != feed_items.rend(); entry++) { // write into rss feed std::string category = entry->second.src.parent_path().filename().string(); std::string content; if (entry->second.properties.count("original")) { content = entry->second.properties.at("original"); } else { // fall back to full generated page. content = entry->second.properties.at("body"); } std::ostringstream ss {}; auto timepoint = std::chrono::system_clock::from_time_t(entry->second.post_date); date::to_stream(ss, properties.at("date-out-format").c_str(), timepoint); text += "\n
"; text += ""; text += "
"; if (entry->second.properties.count("title")) { text += "\n

" + entry->second.properties.at("title") + "

\n"; } text += content; text += "
\n"; } file_text.replace(start, end - start, text); return text.length(); } }; class templater { std::unordered_map substitution_commands {}; std::unordered_map s2_substitution_commands {}; const std::optional> NO_PAGES {}; public: templater() { /* substitution_commands[variable_transclude_plugin::hook_name] = new variable_transclude_plugin {}; substitution_commands[file_index_plugin::hook_name] = new file_index_plugin{}; substitution_commands[ifdef_plugin::hook_name] = new ifdef_plugin{}; substitution_commands[ifndef_plugin::hook_name] = new ifndef_plugin{}; substitution_commands[file_transclude_plugin::hook_name] = new file_transclude_plugin{}; substitution_commands[mmd_snippet_transclude_plugin::hook_name] = new mmd_snippet_transclude_plugin{}; s2_substitution_commands[rss_feed_plugin::hook_name] = new rss_feed_plugin{}; s2_substitution_commands[microblog_plugin::hook_name] = new microblog_plugin{}; */ } templater(std::vector s1, std::vector s2) { for (auto s : s1) { substitution_commands.insert({s->hook_name(), s}); } for (auto s : s2) { s2_substitution_commands.insert({s->hook_name(), s}); } } struct done_subtitution_options { int num; bool recurse = false; }; done_subtitution_options do_substitution(int start, int end, const std::string &invocation, std::string &file_text, const std::unordered_map &properties, const std::optional> &pages) { std::string command_name = invocation.substr(0, invocation.find(":")); bool recurse = true; if (command_name == "feed") recurse = false; if (substitution_commands.count(command_name)) { int num = substitution_commands.at(command_name)->perform_substitution( start, end, invocation, file_text, properties); return {num, recurse}; } if (pages) { if (s2_substitution_commands.count(command_name)) { int num = s2_substitution_commands.at(command_name)->perform_substitution( start, end, invocation, file_text, properties, *pages); return {num, recurse}; } } spdlog::warn("substplugin error: {}", invocation); return {}; } void run_substitution_plugins(std::string &text, const std::unordered_map &properties) { run_substitution_plugins(text, properties, NO_PAGES, true); } void run_substitution_plugins_once(std::string &text, const std::unordered_map &properties) { run_substitution_plugins(text, properties, NO_PAGES, false); } void run_substitution_plugins(std::string &text, const std::unordered_map &properties, const std::optional> &pages) { run_substitution_plugins(text, properties,pages,true); } void run_substitution_plugins(std::string &text, const std::unordered_map &properties, const std::optional> &pages, bool allow_recursion) { std::string::size_type next = text.find(TEMPLATE_CODE_START, 0); for (auto next = text.find(TEMPLATE_CODE_START); next != std::string::npos; next = text.find(TEMPLATE_CODE_START, next)) { // allow escaping inclusion if (next > 0 && text.at(next - 1) == '\\') { next = text.find(TEMPLATE_CODE_END, next); continue; } std::string::size_type end = text.find(TEMPLATE_CODE_END, next); std::string::size_type next_start = text.find(TEMPLATE_CODE_START, next + TEMPLATE_CODE_START.length()); if (end == std::string::npos) { return; } int search_from = next + TEMPLATE_CODE_START.length(); int loops = 0; while (next_start < end) { // we found a nested tag // {{ifdef:tag:hello world {{tag}} }} // next_start = text.find(TEMPLATE_CODE_START, search_from); end = text.find(TEMPLATE_CODE_END, search_from); // spdlog::warn("searching {}", text.substr(search_from, end)); // spdlog::warn("found start {} end {}", next_start - search_from, end - search_from); if (end == std::string::npos) { spdlog::warn("Reached end of file when looking for closing template tag: {}", properties.at("current_file")); return; } if (next_start == std::string::npos) { break; } search_from = end + TEMPLATE_CODE_END.length(); } int ss = next + TEMPLATE_CODE_START.length(); std::string invocation = text.substr(ss, end - ss); int ff = invocation.find_first_not_of(" \n\t"); if (ff != std::string::npos) { int ll = invocation.find_last_not_of(" \n\t"); invocation = invocation.substr(ff, ll - ff + 1); } end += TEMPLATE_CODE_END.length(); auto subst = do_substitution(next, end, invocation, text, properties, pages); //next += subst.num; if (!subst.num) { // unsuccesful next += TEMPLATE_CODE_START.length(); // spdlog::info("Substitution failed, {} in {}", nvocation, properties.at("current_file")); } else if (!allow_recursion || !subst.recurse) { // do not recurse into substituted content next += subst.num; } // at this point next needs to point to the start of the text just // substituted in } } ~templater() { for (auto e : substitution_commands) { delete e.second; } for (auto e: s2_substitution_commands) { delete e.second; } } };