You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

231 lines
7.2 KiB

import sqlite3
from treeplot import DataFrame
from treeplot import TreeFrame
from collections import OrderedDict
from collections import defaultdict
# read from shared memory db created in R
DB_FILE = "../proj/shared-processed-database.db"
DB_FILE_ORIG = "../proj/database.db"
def get_from_db(query="SELECT * FROM stats_by_file", db_file=DB_FILE):
print("Reading from db")
with sqlite3.connect(db_file) as con:
cur = con.cursor()
data = {}
for path, additions, deletions in cur.execute(query):
size = additions + deletions
if size > 0:
weight = additions / size
p = tuple(path.split("/"))
data[p] = DataFrame(
p, size, weight
)
print(list(data.items())[0:5])
print("Loaded data", len(data))
return data
def transfer_weight_from_db(tm: TreeFrame, query="SELECT * FROM stats_by_file", db_file=DB_FILE):
with sqlite3.connect(db_file) as con:
cur = con.cursor()
data = {}
for path, additions, deletions in cur.execute(query):
size = additions + deletions
weight = 0
if size > 0:
weight = additions / size
key = tuple(path.split("/"))
item: TreeFrame = tm.locate_at_key(key)
if item is None:
print("key not found", key)
continue
item.attribs["adds"] = additions
item.attribs["dels"] = deletions
print(list(data.items())[0:5])
print("Loaded data", len(data))
tm.cumsum_attribute("adds", into_attribute="additions")
tm.cumsum_attribute("dels", into_attribute="deletions")
max_size = 0
max_delta = 0
for node in tm.items():
node.attribs["total"] = node.attribs["additions"] + node.attribs["deletions"]
if node.attribs["total"] > max_delta:
max_delta = node.attribs["total"]
if node.size > max_size:
max_size = node.size
for node in tm.items():
node.weight = node.attribs["total"] * (max_size / max_delta) / node.size
print(node.weight)
def union(*items):
x = set()
for y in items:
if not isinstance(y, set):
for k in y:
x.update(k)
else:
x.update(y)
return x
def transfer_author_weight_from_db(tm: TreeFrame):
db_file = DB_FILE_ORIG
query = "select author_name, author_email, commit_time, new_file_name, addition_lines, deletion_lines from " \
"full_deltas"
for node in tm.items():
node.attribs["this_authors"] = set()
node.attribs["this_commits"] = 0
for node in tm.items():
node.attribs["this_authors"] = set()
with sqlite3.connect(db_file) as con:
cur = con.cursor()
data = {}
count = 0
for author_name, author_email, commit_time, path, additions, deletions in cur.execute(query):
key = tuple(path.split("/"))
item: TreeFrame = tm.locate_at_key(key)
while len(key) > 1 and item is None:
key = key[0:-1]
item: TreeFrame = tm.locate_at_key(key)
if item is None:
print("key not found", key)
continue
if item is None:
print("key not found", key)
continue
item.attribs["adds"] = additions
item.attribs["dels"] = deletions
item.attribs["this_authors"].add(author_email)
item.attribs["this_commits"] += 1
count += 1
tm.cumsum_attribute("this_commits", into_attribute="commits")
tm.cumsum_attribute("this_authors", into_attribute="authors", op=union)
tm.cumsum_attribute("adds", into_attribute="additions")
tm.cumsum_attribute("dels", into_attribute="deletions")
max_size = 0
max_delta = 0
for node in tm.items():
if "authors" not in node.attribs:
node.attribs["num_authors"] = 0
else:
node.attribs["num_authors"] = len(node.attribs["authors"])
max_authors = max(len(x.attribs["authors"]) for x in tm.items())
max_this_authors = max(len(x.attribs["this_authors"]) for x in tm.items())
for node in tm.items():
node.weight = len(node.attribs["this_authors"])
print("done")
def transfer_author_weight_from_db_days(tm: TreeFrame):
db_file = DB_FILE
query = "select * from delta_stats_files_time order by `floor_date(as.Date(date), unit = \"week\")`"
all_dates = OrderedDict()
for node in tm.items():
node.attribs["additions_date"] = defaultdict(int)
node.attribs["deletions_date"] = defaultdict(int)
with sqlite3.connect(db_file) as con:
cur = con.cursor()
data = {}
count = 0
for path, date, additions, deletions in cur.execute(query):
key = tuple(path.split("/"))
item: TreeFrame = tm.locate_at_key(key)
while len(key) > 1 and item is None:
key = key[0:-1]
item: TreeFrame = tm.locate_at_key(key)
if item is None:
print("key not found", key)
continue
if item is None:
print("key not found", key)
continue
all_dates[date] = None
item.attribs["additions_date"][date] += additions
item.attribs["deletions_date"][date] += deletions
count += 1
for k in tm.items():
k.weight = 0
print("done")
return list(all_dates.keys())
def transfer_source_analysis_from_db(tm: TreeFrame):
db_file = "../ts/build/database.db"
query = "select * from source_file;"
for node in tm.items():
for k in ["is_source", "is_header", "lines", "bytes", "comments", "comment_lines", "functions", "function_lines"]:
node.attribs[k] = 0
with sqlite3.connect(db_file) as con:
cur = con.cursor()
for x in cur.execute(query):
print(x)
kid, path, extension, is_source, is_header, lines, bytes, comments, comment_lines, functions, function_lines = x
key = tuple(path.replace("/run/media/alistair/storj/linux/", "").split("/"))
item: TreeFrame = tm.locate_at_key(key)
while len(key) > 1 and item is None:
print("Actual key not found", path)
key = key[0:-1]
item: TreeFrame = tm.locate_at_key(key)
if item is None:
print("key not found", key, path)
continue
if item is None:
print("key not found", key, path)
continue
print("Key found: ", key)
item.attribs["is_source"] += is_source
item.attribs["is_header"] += is_header
item.attribs["lines"] += lines
item.attribs["function_lines"] += function_lines
item.attribs["comment_lines"] += comment_lines
item.attribs["comments"] += comments
item.attribs["functions"] += functions
tm.cumsum_attribute("lines")
tm.cumsum_attribute("comments")
tm.cumsum_attribute("comment_lines")
tm.cumsum_attribute("functions")
tm.cumsum_attribute("function_lines")
for k in tm.items():
k.weight = 0
print("done")