|
|
|
@ -25,9 +25,13 @@
@@ -25,9 +25,13 @@
|
|
|
|
|
#include "db.hpp" |
|
|
|
|
#include <queue> |
|
|
|
|
|
|
|
|
|
struct file_job { |
|
|
|
|
std::filesystem::path path; |
|
|
|
|
std::optional<std::string> text; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
struct work_queue { |
|
|
|
|
std::queue<std::filesystem::path> queue; |
|
|
|
|
std::queue<file_job> queue; |
|
|
|
|
std::mutex queue_mutex; |
|
|
|
|
std::mutex results_mutex; |
|
|
|
|
std::list<source_file> results; |
|
|
|
@ -48,7 +52,7 @@ struct work_queue {
@@ -48,7 +52,7 @@ struct work_queue {
|
|
|
|
|
closed = true; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void add_files(std::vector<std::filesystem::path> path) { |
|
|
|
|
void add_files(std::vector<file_job> path) { |
|
|
|
|
std::lock_guard<std::mutex> lock (queue_mutex); |
|
|
|
|
for (const auto &p: path) { |
|
|
|
|
queue.push(p); |
|
|
|
@ -60,10 +64,11 @@ struct work_queue {
@@ -60,10 +64,11 @@ struct work_queue {
|
|
|
|
|
return queue.empty() && closed; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
std::vector<std::filesystem::path> pop_files(int batch) { |
|
|
|
|
std::vector<file_job> pop_files(int batch) { |
|
|
|
|
std::lock_guard<std::mutex> lock (queue_mutex); |
|
|
|
|
|
|
|
|
|
std::vector<std::filesystem::path> paths {}; |
|
|
|
|
std::vector<file_job> paths {}; |
|
|
|
|
paths.reserve(batch); |
|
|
|
|
for (int i = 0; i < batch && !queue.empty(); i++) { |
|
|
|
|
paths.emplace_back(queue.front()); |
|
|
|
|
queue.pop(); |
|
|
|
@ -106,24 +111,31 @@ struct work_queue {
@@ -106,24 +111,31 @@ struct work_queue {
|
|
|
|
|
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
do_work_one(std::shared_ptr<work_queue> queue, int batch, TSParser *parser)
|
|
|
|
|
{ |
|
|
|
|
|
|
|
|
|
if (queue->done()) { |
|
|
|
|
return false; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void do_work(std::shared_ptr<work_queue> queue, int batch) { |
|
|
|
|
auto files = queue->pop_files(batch); |
|
|
|
|
std::list<source_file> results; |
|
|
|
|
for (const auto &path: files) { |
|
|
|
|
results.emplace_back(path.path, path.text, parser); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
while (true) { |
|
|
|
|
if (queue->done()) { |
|
|
|
|
return; |
|
|
|
|
} |
|
|
|
|
queue->add_results(results); |
|
|
|
|
return true; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
auto files = queue->pop_files(batch); |
|
|
|
|
std::list<source_file> results; |
|
|
|
|
for (const auto &path: files) { |
|
|
|
|
std::stringstream s; |
|
|
|
|
s << std::this_thread::get_id(); |
|
|
|
|
results.emplace_back(path); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
queue->add_results(results); |
|
|
|
|
void do_work(std::shared_ptr<work_queue> queue, int batch) { |
|
|
|
|
|
|
|
|
|
auto parser = get_new_parser(); |
|
|
|
|
|
|
|
|
|
while (do_work_one(queue, batch, parser)) { |
|
|
|
|
; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -138,6 +150,7 @@ auto main(int argc, char **argv) -> int
@@ -138,6 +150,7 @@ auto main(int argc, char **argv) -> int
|
|
|
|
|
->default_value("database.db")) |
|
|
|
|
("i,input", "The file or direcotry to analyze", cxxopts::value<std::string>()) |
|
|
|
|
("t,threads", "The number of threads to use", cxxopts::value<int>()->default_value("1")) |
|
|
|
|
("b,batch", "The number of files to process each batch", cxxopts::value<int>()->default_value("100")) |
|
|
|
|
; |
|
|
|
|
|
|
|
|
|
auto result = options.parse(argc, argv); |
|
|
|
@ -162,20 +175,25 @@ auto main(int argc, char **argv) -> int
@@ -162,20 +175,25 @@ auto main(int argc, char **argv) -> int
|
|
|
|
|
if (std::filesystem::is_directory(path)) { |
|
|
|
|
|
|
|
|
|
std::vector<std::thread> threads {}; |
|
|
|
|
int nthreads = result["threads"].as<int>(); |
|
|
|
|
int batch = 1 + static_cast<int>(queue->results_size()) / nthreads; |
|
|
|
|
batch = 100; |
|
|
|
|
int nthreads = result["threads"].as<int>() - 1; |
|
|
|
|
if (nthreads < 0) nthreads = 0; |
|
|
|
|
int batch = result["batch"].as<int>(); |
|
|
|
|
if (batch < 1) batch = 1; |
|
|
|
|
for (int i = 0; i < nthreads; i++) { |
|
|
|
|
threads.emplace_back(do_work, queue, batch); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/** populate the queue */ |
|
|
|
|
std::vector<std::filesystem::path> files; |
|
|
|
|
std::vector<file_job> files; |
|
|
|
|
for (const std::filesystem::directory_entry& dir_entry :
|
|
|
|
|
std::filesystem::recursive_directory_iterator(path)) |
|
|
|
|
{ |
|
|
|
|
files.emplace_back(dir_entry.path()); |
|
|
|
|
if (files.size() > 100) { |
|
|
|
|
file_job job = {dir_entry.path(), {}}; |
|
|
|
|
if (dir_entry.path().extension() == ".c" || dir_entry.path().extension() == ".h") { |
|
|
|
|
job.text = read_file(dir_entry.path().string()); |
|
|
|
|
} |
|
|
|
|
files.emplace_back(job); |
|
|
|
|
if (files.size() >= static_cast<size_t>(batch)) { |
|
|
|
|
queue->add_files(files); |
|
|
|
|
files.clear(); |
|
|
|
|
} |
|
|
|
@ -184,16 +202,13 @@ auto main(int argc, char **argv) -> int
@@ -184,16 +202,13 @@ auto main(int argc, char **argv) -> int
|
|
|
|
|
queue->close_queue(); |
|
|
|
|
files.clear(); |
|
|
|
|
|
|
|
|
|
auto parser = get_new_parser(); |
|
|
|
|
|
|
|
|
|
while (true) { |
|
|
|
|
if (queue->done()) { |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
while (do_work_one(queue, batch, parser)) { |
|
|
|
|
auto size = queue->results_size(); |
|
|
|
|
if (size > 10000) { |
|
|
|
|
queue->wait_store_results(database); |
|
|
|
|
queue->store_results(database); |
|
|
|
|
} |
|
|
|
|
std::this_thread::sleep_for(std::chrono::milliseconds(200)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
for (auto &t: threads) { |
|
|
|
@ -201,18 +216,20 @@ auto main(int argc, char **argv) -> int
@@ -201,18 +216,20 @@ auto main(int argc, char **argv) -> int
|
|
|
|
|
} |
|
|
|
|
queue->wait_store_results(database); |
|
|
|
|
} else { |
|
|
|
|
auto s = source_file(path.string()); |
|
|
|
|
fmt::print("{}\n", s.as_string()); |
|
|
|
|
if (path.extension() == ".c" || path.extension() == ".h") { |
|
|
|
|
auto s = source_file(path.string(), read_file(path.string()), get_new_parser()); |
|
|
|
|
fmt::print("{}\n", s.as_string()); |
|
|
|
|
|
|
|
|
|
std::string line {}; |
|
|
|
|
std::string line {}; |
|
|
|
|
|
|
|
|
|
while (line != "exit") { |
|
|
|
|
fmt::print("query > "); |
|
|
|
|
std::getline(std::cin, line); |
|
|
|
|
if (line != "exit") { |
|
|
|
|
run_query(s, line); |
|
|
|
|
} |
|
|
|
|
while (line != "exit") { |
|
|
|
|
fmt::print("query > "); |
|
|
|
|
std::getline(std::cin, line); |
|
|
|
|
if (line != "exit") { |
|
|
|
|
run_query(s, line); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|