Browse Source

improve threadign

master
alistair 1 year ago
parent
commit
c982d3df77
  1. 14
      source/lib.cpp
  2. 4
      source/lib.hpp
  3. 93
      source/main.cpp

14
source/lib.cpp

@ -63,7 +63,7 @@ auto get_functions( @@ -63,7 +63,7 @@ auto get_functions(
}
source_file::source_file(const std::string &filename, TSParser *parser)
source_file::source_file(const std::string &filename, std::optional<const std::string> filetext, TSParser *parser)
: full_file_path(filename),
file_extension(std::filesystem::path(filename).extension().string()),
file_name(std::filesystem::path(filename).filename().string()),
@ -76,12 +76,17 @@ source_file::source_file(const std::string &filename, TSParser *parser) @@ -76,12 +76,17 @@ source_file::source_file(const std::string &filename, TSParser *parser)
std::ifstream in(path, std::ifstream::ate | std::ifstream::binary);
bytes = static_cast<uint32_t>(in.tellg());
if (filetext) {
full_source = *filetext;
} else {
full_source = "";
}
if (!(is_header || is_c_source)) {
return;
}
full_source = read_file(filename);
tree = ts_parser_parse_string(parser, nullptr, full_source.c_str(),
static_cast<uint32_t>(full_source.size()));
@ -171,9 +176,6 @@ source_file::~source_file() { @@ -171,9 +176,6 @@ source_file::~source_file() {
}
}
source_file::source_file(const std::string &filename)
: source_file(filename, get_new_parser()){}
std::string source_file::as_string() {
return fmt::format(
"{}\n{} bytes, {} lines\ncomments {}: {} lines, {} bytes\nfunctions {}: {} lines, {} bytes",

4
source/lib.hpp

@ -66,8 +66,8 @@ class functions_counter { @@ -66,8 +66,8 @@ class functions_counter {
class source_file {
public:
source_file(const std::string &filename);
source_file(const std::string &filename, TSParser *parser);
source_file(const std::string &filename, std::optional<const std::string> filetext, TSParser *parser);
source_file(const source_file& a);
~source_file();

93
source/main.cpp

@ -25,9 +25,13 @@ @@ -25,9 +25,13 @@
#include "db.hpp"
#include <queue>
struct file_job {
std::filesystem::path path;
std::optional<std::string> text;
};
struct work_queue {
std::queue<std::filesystem::path> queue;
std::queue<file_job> queue;
std::mutex queue_mutex;
std::mutex results_mutex;
std::list<source_file> results;
@ -48,7 +52,7 @@ struct work_queue { @@ -48,7 +52,7 @@ struct work_queue {
closed = true;
}
void add_files(std::vector<std::filesystem::path> path) {
void add_files(std::vector<file_job> path) {
std::lock_guard<std::mutex> lock (queue_mutex);
for (const auto &p: path) {
queue.push(p);
@ -60,10 +64,11 @@ struct work_queue { @@ -60,10 +64,11 @@ struct work_queue {
return queue.empty() && closed;
}
std::vector<std::filesystem::path> pop_files(int batch) {
std::vector<file_job> pop_files(int batch) {
std::lock_guard<std::mutex> lock (queue_mutex);
std::vector<std::filesystem::path> paths {};
std::vector<file_job> paths {};
paths.reserve(batch);
for (int i = 0; i < batch && !queue.empty(); i++) {
paths.emplace_back(queue.front());
queue.pop();
@ -106,24 +111,31 @@ struct work_queue { @@ -106,24 +111,31 @@ struct work_queue {
};
bool
do_work_one(std::shared_ptr<work_queue> queue, int batch, TSParser *parser)
{
if (queue->done()) {
return false;
}
void do_work(std::shared_ptr<work_queue> queue, int batch) {
auto files = queue->pop_files(batch);
std::list<source_file> results;
for (const auto &path: files) {
results.emplace_back(path.path, path.text, parser);
}
while (true) {
if (queue->done()) {
return;
}
queue->add_results(results);
return true;
}
auto files = queue->pop_files(batch);
std::list<source_file> results;
for (const auto &path: files) {
std::stringstream s;
s << std::this_thread::get_id();
results.emplace_back(path);
}
queue->add_results(results);
void do_work(std::shared_ptr<work_queue> queue, int batch) {
auto parser = get_new_parser();
while (do_work_one(queue, batch, parser)) {
;
}
}
@ -138,6 +150,7 @@ auto main(int argc, char **argv) -> int @@ -138,6 +150,7 @@ auto main(int argc, char **argv) -> int
->default_value("database.db"))
("i,input", "The file or direcotry to analyze", cxxopts::value<std::string>())
("t,threads", "The number of threads to use", cxxopts::value<int>()->default_value("1"))
("b,batch", "The number of files to process each batch", cxxopts::value<int>()->default_value("100"))
;
auto result = options.parse(argc, argv);
@ -162,20 +175,25 @@ auto main(int argc, char **argv) -> int @@ -162,20 +175,25 @@ auto main(int argc, char **argv) -> int
if (std::filesystem::is_directory(path)) {
std::vector<std::thread> threads {};
int nthreads = result["threads"].as<int>();
int batch = 1 + static_cast<int>(queue->results_size()) / nthreads;
batch = 100;
int nthreads = result["threads"].as<int>() - 1;
if (nthreads < 0) nthreads = 0;
int batch = result["batch"].as<int>();
if (batch < 1) batch = 1;
for (int i = 0; i < nthreads; i++) {
threads.emplace_back(do_work, queue, batch);
}
/** populate the queue */
std::vector<std::filesystem::path> files;
std::vector<file_job> files;
for (const std::filesystem::directory_entry& dir_entry :
std::filesystem::recursive_directory_iterator(path))
{
files.emplace_back(dir_entry.path());
if (files.size() > 100) {
file_job job = {dir_entry.path(), {}};
if (dir_entry.path().extension() == ".c" || dir_entry.path().extension() == ".h") {
job.text = read_file(dir_entry.path().string());
}
files.emplace_back(job);
if (files.size() >= static_cast<size_t>(batch)) {
queue->add_files(files);
files.clear();
}
@ -184,16 +202,13 @@ auto main(int argc, char **argv) -> int @@ -184,16 +202,13 @@ auto main(int argc, char **argv) -> int
queue->close_queue();
files.clear();
auto parser = get_new_parser();
while (true) {
if (queue->done()) {
break;
}
while (do_work_one(queue, batch, parser)) {
auto size = queue->results_size();
if (size > 10000) {
queue->wait_store_results(database);
queue->store_results(database);
}
std::this_thread::sleep_for(std::chrono::milliseconds(200));
}
for (auto &t: threads) {
@ -201,18 +216,20 @@ auto main(int argc, char **argv) -> int @@ -201,18 +216,20 @@ auto main(int argc, char **argv) -> int
}
queue->wait_store_results(database);
} else {
auto s = source_file(path.string());
fmt::print("{}\n", s.as_string());
if (path.extension() == ".c" || path.extension() == ".h") {
auto s = source_file(path.string(), read_file(path.string()), get_new_parser());
fmt::print("{}\n", s.as_string());
std::string line {};
std::string line {};
while (line != "exit") {
fmt::print("query > ");
std::getline(std::cin, line);
if (line != "exit") {
run_query(s, line);
}
while (line != "exit") {
fmt::print("query > ");
std::getline(std::cin, line);
if (line != "exit") {
run_query(s, line);
}
}
}
}

Loading…
Cancel
Save