mirror of
https://github.com/fork-maintainers/iceraven-browser
synced 2024-11-19 09:25:34 +00:00
[fenix] Update visual-metric code.
This commit is contained in:
parent
ef885fe482
commit
46b74a49ee
@ -27,12 +27,16 @@ from voluptuous import ALLOW_EXTRA, Required, Schema
|
||||
#: The directory where artifacts from this job will be placed.
|
||||
OUTPUT_DIR = Path("/", "builds", "worker", "artifacts")
|
||||
|
||||
|
||||
#: A job to process through visualmetrics.py
|
||||
@attr.s
|
||||
class Job:
|
||||
#: The name of the test.
|
||||
test_name = attr.ib(type=str)
|
||||
|
||||
#: The extra options for this job.
|
||||
extra_options = attr.ib(type=str)
|
||||
|
||||
#: json_path: The path to the ``browsertime.json`` file on disk.
|
||||
json_path = attr.ib(type=Path)
|
||||
|
||||
@ -44,7 +48,11 @@ class Job:
|
||||
JOB_SCHEMA = Schema(
|
||||
{
|
||||
Required("jobs"): [
|
||||
{Required("test_name"): str, Required("browsertime_json_path"): str}
|
||||
{
|
||||
Required("test_name"): str,
|
||||
Required("browsertime_json_path"): str,
|
||||
Required("extra_options"): [str],
|
||||
}
|
||||
],
|
||||
Required("application"): {Required("name"): str, "version": str},
|
||||
Required("extra_options"): [str],
|
||||
@ -80,7 +88,7 @@ def run_command(log, cmd):
|
||||
return e.returncode, e.output
|
||||
|
||||
|
||||
def append_result(log, suites, test_name, name, result):
|
||||
def append_result(log, suites, test_name, name, result, extra_options):
|
||||
"""Appends a ``name`` metrics result in the ``test_name`` suite.
|
||||
|
||||
Args:
|
||||
@ -98,10 +106,16 @@ def append_result(log, suites, test_name, name, result):
|
||||
log.error("Could not convert value", name=name)
|
||||
log.error("%s" % result)
|
||||
result = 0
|
||||
if test_name not in suites:
|
||||
suites[test_name] = {"name": test_name, "subtests": {}}
|
||||
|
||||
subtests = suites[test_name]["subtests"]
|
||||
if test_name in suites and suites[test_name]["extraOptions"] != extra_options:
|
||||
missing = set(extra_options) - set(suites[test_name]["extraOptions"])
|
||||
test_name = test_name + "-".join(list(missing))
|
||||
|
||||
subtests = suites.setdefault(
|
||||
test_name,
|
||||
{"name": test_name, "subtests": {}, "extraOptions": extra_options}
|
||||
)["subtests"]
|
||||
|
||||
if name not in subtests:
|
||||
subtests[name] = {
|
||||
"name": name,
|
||||
@ -241,6 +255,8 @@ def main(log, args):
|
||||
jobs.append(
|
||||
Job(
|
||||
test_name=job["test_name"],
|
||||
extra_options=len(job["extra_options"]) > 0 and
|
||||
job["extra_options"] or jobs_json["extra_options"],
|
||||
json_path=browsertime_json_path,
|
||||
video_path=browsertime_json_path.parent / video,
|
||||
)
|
||||
@ -273,45 +289,34 @@ def main(log, args):
|
||||
# Python 3.5 requires a str object (not 3.6+)
|
||||
res = json.loads(res.decode("utf8"))
|
||||
for name, value in res.items():
|
||||
append_result(log, suites, job.test_name, name, value)
|
||||
append_result(log, suites, job.test_name, name, value, job.extra_options)
|
||||
|
||||
suites = [get_suite(suite) for suite in suites.values()]
|
||||
|
||||
perf_data = {
|
||||
"framework": {"name": "browsertime"},
|
||||
"application": jobs_json["application"],
|
||||
"type": "vismet",
|
||||
"type": "pageload",
|
||||
"suites": suites,
|
||||
}
|
||||
for entry in suites:
|
||||
entry["extraOptions"] = jobs_json["extra_options"]
|
||||
|
||||
# Try to get the similarity for all possible tests, this means that we
|
||||
# will also get a comparison of recorded vs. live sites to check
|
||||
# the on-going quality of our recordings.
|
||||
similarity = None
|
||||
if "android" in os.getenv("TC_PLATFORM", ""):
|
||||
try:
|
||||
from similarity import calculate_similarity
|
||||
similarity = calculate_similarity(jobs_json, fetch_dir, OUTPUT_DIR, log)
|
||||
except Exception:
|
||||
log.info("Failed to calculate similarity score", exc_info=True)
|
||||
|
||||
if similarity:
|
||||
suites[0]["subtests"].append({
|
||||
"name": "Similarity3D",
|
||||
"value": similarity[0],
|
||||
"replicates": [similarity[0]],
|
||||
"lowerIsBetter": False,
|
||||
"unit": "a.u.",
|
||||
})
|
||||
suites[0]["subtests"].append({
|
||||
"name": "Similarity2D",
|
||||
"value": similarity[1],
|
||||
"replicates": [similarity[1]],
|
||||
"lowerIsBetter": False,
|
||||
"unit": "a.u.",
|
||||
})
|
||||
try:
|
||||
from similarity import calculate_similarity
|
||||
for name, value in calculate_similarity(jobs_json, fetch_dir, OUTPUT_DIR).items():
|
||||
if value is None:
|
||||
continue
|
||||
suites[0]["subtests"].append({
|
||||
"name": name,
|
||||
"value": value,
|
||||
"replicates": [value],
|
||||
"lowerIsBetter": False,
|
||||
"unit": "a.u.",
|
||||
})
|
||||
except Exception:
|
||||
log.info("Failed to calculate similarity score", exc_info=True)
|
||||
|
||||
# Validates the perf data complies with perfherder schema.
|
||||
# The perfherder schema uses jsonschema so we can't use voluptuous here.
|
||||
|
@ -10,6 +10,7 @@ import os
|
||||
import pathlib
|
||||
import shutil
|
||||
import socket
|
||||
import structlog
|
||||
import tarfile
|
||||
import tempfile
|
||||
import urllib
|
||||
@ -19,8 +20,24 @@ from matplotlib import pyplot as plt
|
||||
from scipy.stats import spearmanr
|
||||
|
||||
|
||||
def open_data(file):
|
||||
return cv2.VideoCapture(str(file))
|
||||
log = None
|
||||
|
||||
|
||||
# We add the `and` conditions to it later
|
||||
base_ad_query = {
|
||||
"from": "task",
|
||||
"limit": 1000,
|
||||
"where": {
|
||||
"and": []
|
||||
},
|
||||
"select": [
|
||||
"action.start_time",
|
||||
"run.name",
|
||||
"task.artifacts",
|
||||
"task.group.id",
|
||||
"task.id"
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def socket_timeout(value=120):
|
||||
@ -38,8 +55,12 @@ def socket_timeout(value=120):
|
||||
return _socket_timeout
|
||||
|
||||
|
||||
def _open_data(file):
|
||||
return cv2.VideoCapture(str(file))
|
||||
|
||||
|
||||
@socket_timeout(120)
|
||||
def query_activedata(query_json, log):
|
||||
def _query_activedata(query_json):
|
||||
"""Used to run queries on active data."""
|
||||
active_data_url = "http://activedata.allizom.org/query"
|
||||
|
||||
@ -59,7 +80,7 @@ def query_activedata(query_json, log):
|
||||
|
||||
|
||||
@socket_timeout(120)
|
||||
def download(url, loc, log):
|
||||
def _download(url, loc):
|
||||
"""Downloads from a url (with a timeout)."""
|
||||
log.info("Downloading %s" % url)
|
||||
try:
|
||||
@ -70,7 +91,7 @@ def download(url, loc, log):
|
||||
return True
|
||||
|
||||
|
||||
def get_frames(video):
|
||||
def _get_frames(video):
|
||||
"""Gets all frames from a video into a list."""
|
||||
allframes = []
|
||||
while video.isOpened():
|
||||
@ -84,77 +105,11 @@ def get_frames(video):
|
||||
return allframes
|
||||
|
||||
|
||||
def calculate_similarity(jobs_json, fetch_dir, output, log):
|
||||
"""Calculates the similarity score against the last live site test.
|
||||
|
||||
The technique works as follows:
|
||||
1. Get the last live site test.
|
||||
2. For each 15x15 video pairings, build a cross-correlation matrix:
|
||||
1. Get each of the videos and calculate their histograms
|
||||
across the full videos.
|
||||
2. Calculate the correlation coefficient between these two.
|
||||
3. Average the cross-correlation matrix to obtain the score.
|
||||
|
||||
The 2D similarity score is the same, except that it builds a histogram
|
||||
from the final frame instead of the full video.
|
||||
|
||||
For finding the last live site, we use active-data. We search for
|
||||
PGO android builds since this metric is only available for live sites that
|
||||
run on android in mozilla-cental. Given that live sites currently
|
||||
run on cron 3 days a week, then it's also reasonable to look for tasks
|
||||
which have occurred before today and within the last two weeks at most.
|
||||
But this is a TODO for future work, since we need to determine a better
|
||||
way of selecting the last task (HG push logs?) - there's a lot that factors
|
||||
into these choices, so it might require a multi-faceted approach.
|
||||
|
||||
Args:
|
||||
jobs_json: The jobs JSON that holds extra information.
|
||||
fetch_dir: The fetch directory that holds the new videos.
|
||||
log: The logger.
|
||||
Returns:
|
||||
Two similarity scores (3D, 2D) as a float, or None if there was an issue.
|
||||
"""
|
||||
app = jobs_json["application"]["name"]
|
||||
test = jobs_json["jobs"][0]["test_name"]
|
||||
splittest = test.split("-cold")
|
||||
|
||||
cold = ""
|
||||
if len(splittest) > 0:
|
||||
cold = ".*cold"
|
||||
test = splittest[0]
|
||||
|
||||
# PGO vs. OPT shouldn't matter much, but we restrict it to PGO builds here
|
||||
# for android, and desktop tests have the opt/pgo restriction removed
|
||||
plat = os.getenv("TC_PLATFORM", "")
|
||||
if "android" in plat:
|
||||
plat = plat.replace("/opt", "/pgo")
|
||||
else:
|
||||
plat = plat.replace("/opt", "").replace("/pgo", "")
|
||||
ad_query = {
|
||||
"from": "task",
|
||||
"limit": 1000,
|
||||
"where": {
|
||||
"and": [
|
||||
{
|
||||
"regexp": {
|
||||
"run.name": ".*%s.*browsertime.*-live.*%s%s.*%s.*"
|
||||
% (plat, app, cold, test)
|
||||
}
|
||||
},
|
||||
{"not": {"prefix": {"run.name": "test-vismet"}}},
|
||||
{"in": {"repo.branch.name": ["mozilla-central"]}},
|
||||
{"gte": {"action.start_time": {"date": "today-week-week"}}},
|
||||
{"lt": {"action.start_time": {"date": "today-1day"}}},
|
||||
{"in": {"task.run.state": ["completed"]}},
|
||||
]
|
||||
},
|
||||
"select": ["action.start_time", "run.name", "task.artifacts"],
|
||||
}
|
||||
|
||||
# Run the AD query and find the browsertime videos to download
|
||||
def _get_browsertime_results(query):
|
||||
"""Used to run an AD query and extract the browsertime results if they exist."""
|
||||
failed = False
|
||||
try:
|
||||
data = query_activedata(ad_query, log)
|
||||
data = _query_activedata(query)
|
||||
except Exception as e:
|
||||
log.info(str(e))
|
||||
failed = True
|
||||
@ -162,6 +117,7 @@ def calculate_similarity(jobs_json, fetch_dir, output, log):
|
||||
log.info("Couldn't get activedata data")
|
||||
return None
|
||||
|
||||
# Find the newest browsertime task
|
||||
log.info("Found %s datums" % str(len(data["action.start_time"])))
|
||||
maxind = np.argmax([float(t) for t in data["action.start_time"]])
|
||||
artifacts = data["task.artifacts"][maxind]
|
||||
@ -171,13 +127,20 @@ def calculate_similarity(jobs_json, fetch_dir, output, log):
|
||||
btime_artifact = art["url"]
|
||||
break
|
||||
if not btime_artifact:
|
||||
log.info("Can't find an older live site")
|
||||
log.info("Can't find an older site test")
|
||||
return None
|
||||
|
||||
log.info("Comparing videos to TASK_GROUP=%s, TASK_ID=%s" % (
|
||||
data["task.group.id"][maxind], data["task.id"][maxind]
|
||||
))
|
||||
|
||||
# Download the browsertime videos and untar them
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
loc = os.path.join(tmpdir, "tmpfile.tgz")
|
||||
if not download(btime_artifact, loc, log):
|
||||
if not _download(btime_artifact, loc):
|
||||
log.info(
|
||||
"Failed to download browsertime-results artifact from %s" % btime_artifact
|
||||
)
|
||||
return None
|
||||
tmploc = tempfile.mkdtemp()
|
||||
try:
|
||||
@ -191,22 +154,90 @@ def calculate_similarity(jobs_json, fetch_dir, output, log):
|
||||
)
|
||||
return None
|
||||
|
||||
# Find all the videos
|
||||
oldmp4s = [str(f) for f in pathlib.Path(tmploc).rglob("*.mp4")]
|
||||
log.info("Found %s old videos" % str(len(oldmp4s)))
|
||||
newmp4s = [str(f) for f in pathlib.Path(fetch_dir).rglob("*.mp4")]
|
||||
log.info("Found %s new videos" % str(len(newmp4s)))
|
||||
return tmploc
|
||||
|
||||
# Finally, calculate the 2D/3D score
|
||||
|
||||
def _data_from_last_task(label):
|
||||
"""Gets the data from the last PGO/OPT task with the same label.
|
||||
|
||||
We look for both OPT and PGO tasks. The difference
|
||||
between them should be minimal. This method also provides
|
||||
a way to compare recordings from this task to another
|
||||
known task based on the TC_GROUP_ID environment varible.
|
||||
"""
|
||||
label_opt = label.replace("/pgo", "/opt")
|
||||
label_pgo = label.replace("/opt", "/pgo")
|
||||
|
||||
base_ad_query["where"]["and"] = [
|
||||
{"in": {"task.run.state": ["completed"]}},
|
||||
{"or": [
|
||||
{"eq": {"run.name": label_pgo}},
|
||||
{"eq": {"run.name": label_opt}}
|
||||
]}
|
||||
]
|
||||
|
||||
task_group_id = os.getenv("TC_GROUP_ID", "")
|
||||
if task_group_id:
|
||||
base_ad_query["where"]["and"].append(
|
||||
{"eq": {"task.group.id": task_group_id}}
|
||||
)
|
||||
else:
|
||||
base_ad_query["where"]["and"].extend([
|
||||
{"in": {"repo.branch.name": ["mozilla-central"]}},
|
||||
{"gte": {"action.start_time": {"date": "today-week-week"}}},
|
||||
])
|
||||
|
||||
return _get_browsertime_results(base_ad_query)
|
||||
|
||||
|
||||
def _data_from_last_live_task(label):
|
||||
"""Gets the data from the last live site PGO task."""
|
||||
label_live = label.replace("/opt", "/pgo").replace("tp6m", "tp6m-live")
|
||||
|
||||
base_ad_query["where"]["and"] = [
|
||||
{"in": {"repo.branch.name": ["mozilla-central"]}},
|
||||
{"gte": {"action.start_time": {"date": "today-week-week"}}},
|
||||
{"in": {"task.run.state": ["completed"]}},
|
||||
{"eq": {"run.name": label_live}},
|
||||
]
|
||||
|
||||
return _get_browsertime_results(base_ad_query)
|
||||
|
||||
|
||||
def _get_similarity(old_videos_info, new_videos_info, output, prefix=""):
|
||||
"""Calculates a similarity score for two groupings of videos.
|
||||
|
||||
The technique works as follows:
|
||||
1. Get the last live site test.
|
||||
2. For each 15x15 video pairings, build a cross-correlation matrix:
|
||||
1. Get each of the videos and calculate their histograms
|
||||
across the full videos.
|
||||
2. Calculate the correlation coefficient between these two.
|
||||
3. Average the cross-correlation matrix to obtain the score.
|
||||
|
||||
The 2D similarity score is the same, except that it builds a histogram
|
||||
from the final frame instead of the full video.
|
||||
|
||||
Args:
|
||||
old_videos: List of old videos.
|
||||
new_videos: List of new videos (from this task).
|
||||
output: Location to output videos with low similarity scores.
|
||||
prefix: Prefix a string to the output.
|
||||
Returns:
|
||||
Two similarity scores (3D, 2D) as a float.
|
||||
"""
|
||||
nhists = []
|
||||
nhists2d = []
|
||||
|
||||
total_vids = min(len(oldmp4s), len(newmp4s))
|
||||
old_videos = [entry["data"] for entry in old_videos_info]
|
||||
new_videos = [entry["data"] for entry in new_videos_info]
|
||||
|
||||
total_vids = min(len(old_videos), len(new_videos))
|
||||
xcorr = np.zeros((total_vids, total_vids))
|
||||
xcorr2d = np.zeros((total_vids, total_vids))
|
||||
|
||||
for i in range(total_vids):
|
||||
datao = np.asarray(get_frames(open_data(oldmp4s[i])))
|
||||
datao = np.asarray(_get_frames(old_videos[i]))
|
||||
|
||||
histo, _, _ = plt.hist(datao.flatten(), bins=255)
|
||||
histo2d, _, _ = plt.hist(datao[-1, :, :].flatten(), bins=255)
|
||||
@ -214,7 +245,7 @@ def calculate_similarity(jobs_json, fetch_dir, output, log):
|
||||
for j in range(total_vids):
|
||||
if i == 0:
|
||||
# Only calculate the histograms once; it takes time
|
||||
datan = np.asarray(get_frames(open_data(newmp4s[j])))
|
||||
datan = np.asarray(_get_frames(new_videos[j]))
|
||||
|
||||
histn, _, _ = plt.hist(datan.flatten(), bins=255)
|
||||
histn2d, _, _ = plt.hist(datan[-1, :, :].flatten(), bins=255)
|
||||
@ -237,15 +268,93 @@ def calculate_similarity(jobs_json, fetch_dir, output, log):
|
||||
log.info("Average 3D similarity: %s" % str(np.round(similarity, 5)))
|
||||
log.info("Average 2D similarity: %s" % str(np.round(similarity2d, 5)))
|
||||
|
||||
if similarity < 0.5:
|
||||
# For really low correlations, output the worst video pairing
|
||||
if np.round(similarity, 1) <= 0.7 or np.round(similarity2d, 1) <= 0.7:
|
||||
# For low correlations, output the worst video pairing
|
||||
# so that we can visually see what the issue was
|
||||
minind = np.unravel_index(np.argmin(xcorr, axis=None), xcorr.shape)
|
||||
|
||||
oldvid = oldmp4s[minind[0]]
|
||||
shutil.copyfile(oldvid, str(pathlib.Path(output, "old_video.mp4")))
|
||||
oldvid = old_videos_info[minind[0]]["path"]
|
||||
shutil.copyfile(oldvid, str(pathlib.Path(output, "%sold_video.mp4" % prefix)))
|
||||
|
||||
newvid = newmp4s[minind[1]]
|
||||
shutil.copyfile(newvid, str(pathlib.Path(output, "new_video.mp4")))
|
||||
newvid = new_videos_info[minind[1]]["path"]
|
||||
shutil.copyfile(newvid, str(pathlib.Path(output, "%snew_video.mp4" % prefix)))
|
||||
|
||||
return np.round(similarity, 5), np.round(similarity2d, 5)
|
||||
|
||||
|
||||
def calculate_similarity(jobs_json, fetch_dir, output):
|
||||
"""Calculates the similarity score for this task.
|
||||
|
||||
Here we use activedata to find the last live site that ran and
|
||||
to find the last task (with the same label) that ran. Those two
|
||||
tasks are then compared to the current one and 4 metrics are produced.
|
||||
|
||||
For live sites, we only calculate 2 of these metrics, since the
|
||||
playback similarity is not applicable to it.
|
||||
|
||||
Args:
|
||||
jobs_json: The jobs JSON that holds extra information.
|
||||
fetch_dir: The fetch directory that holds the new videos.
|
||||
output: The output directory.
|
||||
Returns:
|
||||
A dictionary containing up to 4 different metrics (their values default
|
||||
to None if a metric couldn't be calculated):
|
||||
PlaybackSimilarity: Similarity of the full playback to a live site test.
|
||||
PlaybackSimilarity2D: - // - (but for the final frame only)
|
||||
Similarity: Similarity of the tests video recording to its last run.
|
||||
Similarity2D: - // - (but for the final frame only)
|
||||
"""
|
||||
global log
|
||||
log = structlog.get_logger()
|
||||
|
||||
label = os.getenv("TC_LABEL", "")
|
||||
if not label:
|
||||
log.info("TC_LABEL is undefined, cannot calculate similarity metrics")
|
||||
return {}
|
||||
|
||||
# Get all the newest videos from this task
|
||||
new_btime_videos = [
|
||||
{"data": _open_data(str(f)), "path": str(f)}
|
||||
for f in pathlib.Path(fetch_dir).rglob("*.mp4")
|
||||
]
|
||||
log.info("Found %s new videos" % str(len(new_btime_videos)))
|
||||
|
||||
# Get the similarity against the last task
|
||||
old_btime_res = _data_from_last_task(label)
|
||||
old_sim = old_sim2d = None
|
||||
if old_btime_res:
|
||||
old_btime_videos = [
|
||||
{"data": _open_data(str(f)), "path": str(f)}
|
||||
for f in pathlib.Path(old_btime_res).rglob("*.mp4")
|
||||
]
|
||||
log.info("Found %s old videos" % str(len(old_btime_videos)))
|
||||
|
||||
old_sim, old_sim2d = _get_similarity(
|
||||
old_btime_videos, new_btime_videos, output
|
||||
)
|
||||
else:
|
||||
log.info("Failed to find an older test task")
|
||||
|
||||
# Compare recordings to their live site variant if it exists
|
||||
live_sim = live_sim2d = None
|
||||
if "live" not in jobs_json["extra_options"]:
|
||||
live_btime_res = _data_from_last_live_task(label)
|
||||
if live_btime_res:
|
||||
live_btime_videos = [
|
||||
{"data": _open_data(str(f)), "path": str(f)}
|
||||
for f in pathlib.Path(live_btime_res).rglob("*.mp4")
|
||||
]
|
||||
log.info("Found %s live videos" % str(len(live_btime_videos)))
|
||||
|
||||
live_sim, live_sim2d = _get_similarity(
|
||||
live_btime_videos, new_btime_videos, output, prefix="live_"
|
||||
)
|
||||
else:
|
||||
log.info("Failed to find a live site variant")
|
||||
|
||||
return {
|
||||
"PlaybackSimilarity": live_sim,
|
||||
"PlaybackSimilarity2D": live_sim2d,
|
||||
"Similarity": old_sim,
|
||||
"Similarity2D": old_sim2d,
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user