You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
231 lines
7.2 KiB
231 lines
7.2 KiB
import sqlite3 |
|
from treeplot import DataFrame |
|
from treeplot import TreeFrame |
|
from collections import OrderedDict |
|
from collections import defaultdict |
|
|
|
# read from shared memory db created in R |
|
DB_FILE = "../proj/shared-processed-database.db" |
|
DB_FILE_ORIG = "../proj/database.db" |
|
|
|
|
|
def get_from_db(query="SELECT * FROM stats_by_file", db_file=DB_FILE): |
|
print("Reading from db") |
|
with sqlite3.connect(db_file) as con: |
|
cur = con.cursor() |
|
|
|
data = {} |
|
for path, additions, deletions in cur.execute(query): |
|
size = additions + deletions |
|
if size > 0: |
|
weight = additions / size |
|
p = tuple(path.split("/")) |
|
data[p] = DataFrame( |
|
p, size, weight |
|
) |
|
|
|
print(list(data.items())[0:5]) |
|
print("Loaded data", len(data)) |
|
return data |
|
|
|
|
|
def transfer_weight_from_db(tm: TreeFrame, query="SELECT * FROM stats_by_file", db_file=DB_FILE): |
|
with sqlite3.connect(db_file) as con: |
|
cur = con.cursor() |
|
|
|
data = {} |
|
for path, additions, deletions in cur.execute(query): |
|
size = additions + deletions |
|
weight = 0 |
|
if size > 0: |
|
weight = additions / size |
|
key = tuple(path.split("/")) |
|
item: TreeFrame = tm.locate_at_key(key) |
|
if item is None: |
|
print("key not found", key) |
|
continue |
|
item.attribs["adds"] = additions |
|
item.attribs["dels"] = deletions |
|
|
|
print(list(data.items())[0:5]) |
|
print("Loaded data", len(data)) |
|
tm.cumsum_attribute("adds", into_attribute="additions") |
|
tm.cumsum_attribute("dels", into_attribute="deletions") |
|
|
|
max_size = 0 |
|
max_delta = 0 |
|
for node in tm.items(): |
|
node.attribs["total"] = node.attribs["additions"] + node.attribs["deletions"] |
|
if node.attribs["total"] > max_delta: |
|
max_delta = node.attribs["total"] |
|
if node.size > max_size: |
|
max_size = node.size |
|
for node in tm.items(): |
|
node.weight = node.attribs["total"] * (max_size / max_delta) / node.size |
|
print(node.weight) |
|
|
|
def union(*items): |
|
x = set() |
|
for y in items: |
|
if not isinstance(y, set): |
|
for k in y: |
|
x.update(k) |
|
else: |
|
x.update(y) |
|
return x |
|
|
|
def transfer_author_weight_from_db(tm: TreeFrame): |
|
db_file = DB_FILE_ORIG |
|
query = "select author_name, author_email, commit_time, new_file_name, addition_lines, deletion_lines from " \ |
|
"full_deltas" |
|
for node in tm.items(): |
|
node.attribs["this_authors"] = set() |
|
node.attribs["this_commits"] = 0 |
|
|
|
for node in tm.items(): |
|
node.attribs["this_authors"] = set() |
|
|
|
with sqlite3.connect(db_file) as con: |
|
cur = con.cursor() |
|
|
|
data = {} |
|
count = 0 |
|
for author_name, author_email, commit_time, path, additions, deletions in cur.execute(query): |
|
key = tuple(path.split("/")) |
|
item: TreeFrame = tm.locate_at_key(key) |
|
|
|
while len(key) > 1 and item is None: |
|
key = key[0:-1] |
|
item: TreeFrame = tm.locate_at_key(key) |
|
|
|
if item is None: |
|
print("key not found", key) |
|
continue |
|
|
|
if item is None: |
|
print("key not found", key) |
|
continue |
|
|
|
item.attribs["adds"] = additions |
|
item.attribs["dels"] = deletions |
|
item.attribs["this_authors"].add(author_email) |
|
item.attribs["this_commits"] += 1 |
|
count += 1 |
|
|
|
tm.cumsum_attribute("this_commits", into_attribute="commits") |
|
tm.cumsum_attribute("this_authors", into_attribute="authors", op=union) |
|
tm.cumsum_attribute("adds", into_attribute="additions") |
|
tm.cumsum_attribute("dels", into_attribute="deletions") |
|
|
|
max_size = 0 |
|
max_delta = 0 |
|
|
|
for node in tm.items(): |
|
if "authors" not in node.attribs: |
|
node.attribs["num_authors"] = 0 |
|
else: |
|
node.attribs["num_authors"] = len(node.attribs["authors"]) |
|
|
|
max_authors = max(len(x.attribs["authors"]) for x in tm.items()) |
|
max_this_authors = max(len(x.attribs["this_authors"]) for x in tm.items()) |
|
|
|
for node in tm.items(): |
|
node.weight = len(node.attribs["this_authors"]) |
|
|
|
|
|
print("done") |
|
|
|
|
|
|
|
|
|
def transfer_author_weight_from_db_days(tm: TreeFrame): |
|
db_file = DB_FILE |
|
query = "select * from delta_stats_files_time order by `floor_date(as.Date(date), unit = \"week\")`" |
|
|
|
all_dates = OrderedDict() |
|
for node in tm.items(): |
|
node.attribs["additions_date"] = defaultdict(int) |
|
node.attribs["deletions_date"] = defaultdict(int) |
|
|
|
with sqlite3.connect(db_file) as con: |
|
cur = con.cursor() |
|
|
|
data = {} |
|
count = 0 |
|
for path, date, additions, deletions in cur.execute(query): |
|
key = tuple(path.split("/")) |
|
item: TreeFrame = tm.locate_at_key(key) |
|
|
|
while len(key) > 1 and item is None: |
|
key = key[0:-1] |
|
item: TreeFrame = tm.locate_at_key(key) |
|
|
|
if item is None: |
|
print("key not found", key) |
|
continue |
|
|
|
if item is None: |
|
print("key not found", key) |
|
continue |
|
|
|
all_dates[date] = None |
|
item.attribs["additions_date"][date] += additions |
|
item.attribs["deletions_date"][date] += deletions |
|
count += 1 |
|
|
|
for k in tm.items(): |
|
k.weight = 0 |
|
|
|
print("done") |
|
return list(all_dates.keys()) |
|
|
|
|
|
def transfer_source_analysis_from_db(tm: TreeFrame): |
|
db_file = "../ts/build/database.db" |
|
query = "select * from source_file;" |
|
|
|
for node in tm.items(): |
|
for k in ["is_source", "is_header", "lines", "bytes", "comments", "comment_lines", "functions", "function_lines"]: |
|
node.attribs[k] = 0 |
|
|
|
with sqlite3.connect(db_file) as con: |
|
cur = con.cursor() |
|
|
|
for x in cur.execute(query): |
|
print(x) |
|
kid, path, extension, is_source, is_header, lines, bytes, comments, comment_lines, functions, function_lines = x |
|
|
|
key = tuple(path.replace("/run/media/alistair/storj/linux/", "").split("/")) |
|
item: TreeFrame = tm.locate_at_key(key) |
|
|
|
while len(key) > 1 and item is None: |
|
print("Actual key not found", path) |
|
key = key[0:-1] |
|
item: TreeFrame = tm.locate_at_key(key) |
|
|
|
if item is None: |
|
print("key not found", key, path) |
|
continue |
|
|
|
if item is None: |
|
print("key not found", key, path) |
|
continue |
|
print("Key found: ", key) |
|
|
|
item.attribs["is_source"] += is_source |
|
item.attribs["is_header"] += is_header |
|
item.attribs["lines"] += lines |
|
item.attribs["function_lines"] += function_lines |
|
item.attribs["comment_lines"] += comment_lines |
|
item.attribs["comments"] += comments |
|
item.attribs["functions"] += functions |
|
|
|
tm.cumsum_attribute("lines") |
|
tm.cumsum_attribute("comments") |
|
tm.cumsum_attribute("comment_lines") |
|
tm.cumsum_attribute("functions") |
|
tm.cumsum_attribute("function_lines") |
|
for k in tm.items(): |
|
k.weight = 0 |
|
|
|
print("done") |