diff --git a/taskcluster/ci/browsertime/kind.yml b/taskcluster/ci/browsertime/kind.yml index 3889b17dd..ab6fbbb87 100644 --- a/taskcluster/ci/browsertime/kind.yml +++ b/taskcluster/ci/browsertime/kind.yml @@ -37,7 +37,7 @@ job-defaults: subject: '[{product_name}] Raptor-Browsertime job "{task_name}" failed' to-addresses: [perftest-alerts@mozilla.com] default: {} - run-on-tasks-for: [] + run-on-tasks-for: [github-pull-request] treeherder: kind: test tier: 2 @@ -98,7 +98,7 @@ job-defaults: - linux64-ffmpeg-4.1.4 - linux64-geckodriver - linux64-minidump-stackwalk - - linux64-node + - linux64-node-16 jobs: tp6m: diff --git a/taskcluster/ci/docker-image/kind.yml b/taskcluster/ci/docker-image/kind.yml index 9be32ec33..1cdb97883 100644 --- a/taskcluster/ci/docker-image/kind.yml +++ b/taskcluster/ci/docker-image/kind.yml @@ -22,6 +22,3 @@ jobs: ui-tests: parent: base symbol: I(ui-tests) - visual-metrics: - parent: base - symbol: I(visual-metrics) diff --git a/taskcluster/ci/toolchain/gecko-derived.yml b/taskcluster/ci/toolchain/gecko-derived.yml index 4d3d7889a..a5dfff510 100644 --- a/taskcluster/ci/toolchain/gecko-derived.yml +++ b/taskcluster/ci/toolchain/gecko-derived.yml @@ -49,10 +49,10 @@ linux64-node: index-search: - gecko.cache.level-3.toolchains.v3.linux64-node-12.latest -visual-metrics: +linux64-node-16: attributes: - toolchain-artifact: public/visualmetrics.py - description: "Browsertime visual metrics analysis script" + toolchain-artifact: public/build/node.tar.zst + description: "Node.js toolchain" run: index-search: - - gecko.cache.level-3.content.v1.visual-metrics.latest + - gecko.cache.level-3.toolchains.v3.linux64-node-16.latest diff --git a/taskcluster/ci/visual-metrics/kind.yml b/taskcluster/ci/visual-metrics/kind.yml deleted file mode 100644 index 946b9b6fd..000000000 --- a/taskcluster/ci/visual-metrics/kind.yml +++ /dev/null @@ -1,51 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. ---- -loader: fenix_taskgraph.loader.multi_dep:loader - -kind-dependencies: - - browsertime - - toolchain - -primary-dependency: - - browsertime - -group-by: attributes - -only-for-attributes: - - run-visual-metrics - -transforms: - - fenix_taskgraph.transforms.visual_metrics:transforms - - taskgraph.transforms.job:transforms - - taskgraph.transforms.task:transforms - -job-template: - attributes: - nightly: true - description: "Run visual metrics calculations on Raptor" - run-on-projects: [] - run-on-tasks-for: [] - worker-type: b-android - treeherder: - tier: 2 - kind: other - worker: - docker-image: {in-tree: visual-metrics} - max-run-time: 900 - artifacts: - - type: file - name: public/perfherder-data.json - path: /builds/worker/artifacts/perfherder-data.json - - type: file - name: public/summary.json - path: /builds/worker/artifacts/summary.json - fetches: - toolchain: - - visual-metrics - run: - using: run-task - command: /builds/worker/bin/run-visual-metrics.py -- --orange --perceptual --contentful --force --renderignore 5 --json --viewport - checkout: false - run-as-root: true diff --git a/taskcluster/docker/visual-metrics/Dockerfile b/taskcluster/docker/visual-metrics/Dockerfile deleted file mode 100644 index ae216aed5..000000000 --- a/taskcluster/docker/visual-metrics/Dockerfile +++ /dev/null @@ -1,30 +0,0 @@ -FROM $DOCKER_IMAGE_PARENT -MAINTAINER Gregory Mierzwinski - -# run-task expects to run as root -USER root - -RUN apt-get update -qq && \ - apt-get install -y \ - ffmpeg \ - imagemagick \ - pyssim \ - python \ - python-pil - -WORKDIR /builds/worker - -USER worker:worker - -COPY requirements.txt /builds/worker/requirements.txt -RUN pip3 install --require-hashes -r /builds/worker/requirements.txt && \ - rm /builds/worker/requirements.txt - -COPY similarity.py /builds/worker/bin/similarity.py -COPY run-visual-metrics.py /builds/worker/bin/run-visual-metrics.py -COPY performance-artifact-schema.json /builds/worker/performance-artifact-schema.json - -USER root -RUN chmod +x /builds/worker/bin/run-visual-metrics.py - -VOLUME /builds/worker/artifacts/ diff --git a/taskcluster/docker/visual-metrics/performance-artifact-schema.json b/taskcluster/docker/visual-metrics/performance-artifact-schema.json deleted file mode 100644 index aaf4312d0..000000000 --- a/taskcluster/docker/visual-metrics/performance-artifact-schema.json +++ /dev/null @@ -1,230 +0,0 @@ -{ - "definitions": { - "application_schema": { - "properties": { - "name": { - "title": "Application under performance test", - "enum": [ - "firefox", - "chrome", - "chrome-m", - "chromium", - "fennec", - "geckoview", - "refbrow", - "fenix" - ], - "maxLength": 10, - "type": "string" - }, - "version": { - "title": "Application's version", - "maxLength": 40, - "type": "string" - } - }, - "required": ["name"], - "type": "object" - }, - "framework_schema": { - "properties": { - "name": { - "title": "Framework name", - "type": "string" - } - }, - "type": "object" - }, - "subtest_schema": { - "properties": { - "name": { - "title": "Subtest name", - "type": "string" - }, - "publicName": { - "title": "Public subtest name", - "description": "Allows renaming test's name, without breaking existing performance data series", - "maxLength": 30, - "type": "string" - }, - "value": { - "description": "Summary value for subtest", - "title": "Subtest value", - "type": "number", - "minimum": -1000000000000.0, - "maximum": 1000000000000.0 - }, - "unit": { - "title": "Measurement unit", - "type": "string", - "minLength": 1, - "maxLength": 20 - }, - "lowerIsBetter": { - "description": "Whether lower values are better for subtest", - "title": "Lower is better", - "type": "boolean" - }, - "shouldAlert": { - "description": "Whether we should alert", - "title": "Should alert", - "type": "boolean" - }, - "alertThreshold": { - "description": "% change threshold before alerting", - "title": "Alert threshold", - "type": "number", - "minimum": 0.0, - "maximum": 1000.0 - }, - "minBackWindow": { - "description": "Minimum back window to use for alerting", - "title": "Minimum back window", - "type": "number", - "minimum": 1, - "maximum": 255 - }, - "maxBackWindow": { - "description": "Maximum back window to use for alerting", - "title": "Maximum back window", - "type": "number", - "minimum": 1, - "maximum": 255 - }, - "foreWindow": { - "description": "Fore window to use for alerting", - "title": "Fore window", - "type": "number", - "minimum": 1, - "maximum": 255 - } - }, - "required": [ - "name", - "value" - ], - "type": "object" - }, - "suite_schema": { - "properties": { - "name": { - "title": "Suite name", - "type": "string" - }, - "publicName": { - "title": "Public suite name", - "description": "Allows renaming suite's name, without breaking existing performance data series", - "maxLength": 30, - "type": "string" - }, - "tags": { - "type": "array", - "title": "Free form tags, which ease the grouping & searching of performance tests", - "description": "Similar to extraOptions, except it does not break existing performance data series", - "items": { - "type": "string", - "pattern": "^[a-zA-Z0-9-]{1,24}$" - }, - "uniqueItems": true, - "maxItems": 14 - }, - "extraOptions": { - "type": "array", - "title": "Extra options used in running suite", - "items": { - "type": "string", - "maxLength": 100 - }, - "uniqueItems": true, - "maxItems": 8 - }, - "subtests": { - "items": { - "$ref": "#/definitions/subtest_schema" - }, - "title": "Subtests", - "type": "array" - }, - "value": { - "title": "Suite value", - "type": "number", - "minimum": -1000000000000.0, - "maximum": 1000000000000.0 - }, - "unit": { - "title": "Measurement unit", - "type": "string", - "minLength": 1, - "maxLength": 20 - }, - "lowerIsBetter": { - "description": "Whether lower values are better for suite", - "title": "Lower is better", - "type": "boolean" - }, - "shouldAlert": { - "description": "Whether we should alert on this suite (overrides default behaviour)", - "title": "Should alert", - "type": "boolean" - }, - "alertThreshold": { - "description": "% change threshold before alerting", - "title": "Alert threshold", - "type": "number", - "minimum": 0.0, - "maximum": 1000.0 - }, - "minBackWindow": { - "description": "Minimum back window to use for alerting", - "title": "Minimum back window", - "type": "integer", - "minimum": 1, - "maximum": 255 - }, - "maxBackWindow": { - "description": "Maximum back window to use for alerting", - "title": "Maximum back window", - "type": "integer", - "minimum": 1, - "maximum": 255 - }, - "foreWindow": { - "description": "Fore window to use for alerting", - "title": "Fore window", - "type": "integer", - "minimum": 1, - "maximum": 255 - } - }, - "required": [ - "name", - "subtests" - ], - "type": "object" - } - }, - "description": "Structure for submitting performance data as part of a job", - "id": "https://treeherder.mozilla.org/schemas/v1/performance-artifact.json#", - "properties": { - "application":{ - "$ref": "#/definitions/application_schema" - }, - "framework": { - "$ref": "#/definitions/framework_schema" - }, - "suites": { - "description": "List of suite-level data submitted as part of this structure", - "items": { - "$ref": "#/definitions/suite_schema" - }, - "title": "Performance suites", - "type": "array" - } - }, - "required": [ - "framework", - "suites" - ], - "title": "Perfherder Schema", - "type": "object" -} diff --git a/taskcluster/docker/visual-metrics/requirements.txt b/taskcluster/docker/visual-metrics/requirements.txt deleted file mode 100644 index 560a0d008..000000000 --- a/taskcluster/docker/visual-metrics/requirements.txt +++ /dev/null @@ -1,23 +0,0 @@ -# Dependency hashes must be for python3.6 - -# Direct dependencies -attrs==19.1.0 --hash=sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79 -structlog==19.1.0 --hash=sha256:db441b81c65b0f104a7ce5d86c5432be099956b98b8a2c8be0b3fb3a7a0b1536 -voluptuous==0.11.5 --hash=sha256:303542b3fc07fb52ec3d7a1c614b329cdbee13a9d681935353d8ea56a7bfa9f1 -jsonschema==3.2.0 --hash=sha256:4e5b3cf8216f577bee9ce139cbe72eca3ea4f292ec60928ff24758ce626cd163 -numpy==1.18.3 --hash=sha256:a551d8cc267c634774830086da42e4ba157fa41dd3b93982bc9501b284b0c689 -scipy==1.4.1 --hash=sha256:386086e2972ed2db17cebf88610aab7d7f6e2c0ca30042dc9a89cf18dcc363fa -matplotlib==3.0.3 --hash=sha256:e8d1939262aa6b36d0c51f50a50a43a04b9618d20db31e6c0192b1463067aeef -opencv-python==4.2.0.34 --hash=sha256:dcb8da8c5ebaa6360c8555547a4c7beb6cd983dd95ba895bb78b86cc8cf3de2b - -# Transitive dependencies -importlib_metadata==1.1.0 --hash=sha256:e6ac600a142cf2db707b1998382cc7fc3b02befb7273876e01b8ad10b9652742 -more_itertools==8.0.0 --hash=sha256:a0ea684c39bc4315ba7aae406596ef191fd84f873d2d2751f84d64e81a7a2d45 -pyrsistent==0.15.6 --hash=sha256:f3b280d030afb652f79d67c5586157c5c1355c9a58dfc7940566e28d28f3df1b -six==1.12.0 --hash=sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c -zipp==0.6.0 --hash=sha256:f06903e9f1f43b12d371004b4ac7b06ab39a44adc747266928ae6debfa7b3335 -cycler==0.10.0 --hash=sha256:1d8a5ae1ff6c5cf9b93e8811e581232ad8920aeec647c37316ceac982b08cb2d -kiwisolver==1.1.0 --hash=sha256:400599c0fe58d21522cae0e8b22318e09d9729451b17ee61ba8e1e7c0346565c -pyparsing==2.4.7 --hash=sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b -python-dateutil==2.8.1 --hash=sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a -setuptools==46.1.3 --hash=sha256:4fe404eec2738c20ab5841fa2d791902d2a645f32318a7850ef26f8d7215a8ee diff --git a/taskcluster/docker/visual-metrics/run-visual-metrics.py b/taskcluster/docker/visual-metrics/run-visual-metrics.py deleted file mode 100755 index 7db40a8bc..000000000 --- a/taskcluster/docker/visual-metrics/run-visual-metrics.py +++ /dev/null @@ -1,496 +0,0 @@ -#!/usr/bin/env python3 -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -"""Instrument visualmetrics.py to run in parallel.""" - -import argparse -import json -import logging -import os -import statistics -import subprocess -import sys -import tarfile -import time -from concurrent.futures import ProcessPoolExecutor -from functools import partial -from multiprocessing import cpu_count -from pathlib import Path - -import attr -import structlog -from jsonschema import validate -from voluptuous import ALLOW_EXTRA, Required, Schema - - -#: The max run time for a command (5 minutes) -MAX_TIME = 300 - - -#: The directory where artifacts from this job will be placed. -OUTPUT_DIR = Path("/", "builds", "worker", "artifacts") - - -#: A job to process through visualmetrics.py -@attr.s -class Job: - #: The name of the test. - test_name = attr.ib(type=str) - - #: A unique number for the job. - count = attr.ib(type=int) - - #: The tags for this job. - tags = attr.ib(type=str) - - #: The extra options for this job. - extra_options = attr.ib(type=str) - - #: If true, we allow 0's in the vismet results - accept_zero_vismet = attr.ib(type=bool) - - #: json_path: The path to the ``browsertime.json`` file on disk. - json_path = attr.ib(type=Path) - - #: video_path: The path of the video file on disk. - video_path = attr.ib(type=Path) - - -#: The schema for validating jobs. -JOB_SCHEMA = Schema( - { - Required("jobs"): [ - { - Required("test_name"): str, - Required("browsertime_json_path"): str, - Required("tags"): [str], - Required("extra_options"): [str], - Required("accept_zero_vismet"): bool, - } - ], - Required("application"): {Required("name"): str, "version": str}, - Required("extra_options"): [str], - } -) - -#: A partial schema for browsertime.json files. -BROWSERTIME_SCHEMA = Schema( - [{Required("files"): {Required("video"): [str]}}], extra=ALLOW_EXTRA -) - -SHOULD_ALERT = { - "ContentfulSpeedIndex": True, - "FirstVisualChange": True, - "LastVisualChange": True, - "PerceptualSpeedIndex": True, - "SpeedIndex": True, - "videoRecordingStart": False, -} - -with Path("/", "builds", "worker", "performance-artifact-schema.json").open() as f: - PERFHERDER_SCHEMA = json.loads(f.read()) - - -def run_command(log, cmd, job_count): - """Run a command using subprocess.check_output - - Args: - log: The structlog logger instance. - cmd: the command to run as a list of strings. - - Returns: - A tuple of the process' exit status and standard output. - """ - log.info("Running command", cmd=cmd) - process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - - lines = [] - res = None - start = time.time() - while time.time() - start <= MAX_TIME: - time.sleep(0.1) - output = process.stdout.readline() - if output == b"" and process.poll() is not None: - break - if output: - res = output.strip() - lines.append(res.decode("utf-8", "ignore")) - else: - time.sleep(5) - - if time.time() - start > MAX_TIME: - log.error( - "TEST-UNEXPECTED-FAIL | Timed out waiting for response from command", - cmd=cmd, - ) - return 1, "Timed out" - - rc = process.poll() - job_prefix = "[JOB-" + str(job_count) + "] " - for line in lines: - # Some output doesn't start with the levels because it comes - # from FFMPEG rather than the script itself - if line.startswith(("[INFO]", "[WARNING]", "[CRITICAL]", "[ERROR]")): - splitline = line.split(" - ") - level = splitline[0] - line = " - ".join(splitline[1:]) - else: - level = "[INFO]" - - newline = job_prefix + line - if level.strip() in ("[ERROR]", "[CRITICAL]"): - if rc == 0: - rc = 1 - log.error("TEST-UNEXPECTED-FAIL | " + newline) - elif level == "[WARNING]": - log.warning(newline) - else: - log.info(newline) - - return rc, res - - -def append_result(log, suites, test_name, name, result, tags, extra_options): - """Appends a ``name`` metrics result in the ``test_name`` suite. - - Args: - log: The structlog logger instance. - suites: A mapping containing the suites. - test_name: The name of the test. - name: The name of the metrics. - result: The value to append. - """ - if name.endswith("Progress"): - return - try: - result = int(result) - except ValueError: - log.error("Could not convert value", name=name) - log.error("%s" % result) - result = 0 - - orig_test_name = test_name - if test_name in suites and suites[test_name]["extraOptions"] != extra_options: - missing = set(extra_options) - set(suites[test_name]["extraOptions"]) - test_name = test_name + "-".join(list(missing)) - - subtests = suites.setdefault( - test_name, - { - "name": orig_test_name, - "tags": extra_options + tags + ["visual"], - "subtests": {}, - "extraOptions": extra_options, - }, - )["subtests"] - - if name not in subtests: - subtests[name] = { - "name": name, - "replicates": [result], - "lowerIsBetter": True, - "unit": "ms", - "shouldAlert": SHOULD_ALERT.get(name, False), - } - else: - subtests[name]["replicates"].append(result) - - -def compute_median(subtest): - """Adds in the subtest the ``value`` field, which is the average of all - replicates. - - Args: - subtest: The subtest containing all replicates. - - Returns: - The subtest. - """ - if "replicates" not in subtest: - return subtest - subtest["value"] = statistics.median(subtest["replicates"]) - return subtest - - -def get_suite(suite): - """Returns the suite with computed medians in its subtests. - - Args: - suite: The suite to convert. - - Returns: - The suite. - """ - suite["subtests"] = [ - compute_median(subtest) for subtest in suite["subtests"].values() - ] - return suite - - -def read_json(json_path, schema): - """Read the given json file and verify against the provided schema. - - Args: - json_path: Path of json file to parse. - schema: A callable to validate the JSON's schema. - - Returns: - The contents of the file at ``json_path`` interpreted as JSON. - """ - try: - with open(str(json_path), "r", encoding="utf-8", errors="ignore") as f: - data = json.load(f) - except Exception: - log.error("Could not read JSON file", path=json_path, exc_info=True) - raise - - log.info("Loaded JSON from file", path=json_path) - - try: - schema(data) - except Exception: - log.error("JSON failed to validate", exc_info=True) - raise - - return data - - -def main(log, args): - """Run visualmetrics.py in parallel. - - Args: - log: The structlog logger instance. - args: The parsed arguments from the argument parser. - - Returns: - The return code that the program will exit with. - """ - fetch_dir = os.getenv("MOZ_FETCHES_DIR") - if not fetch_dir: - log.error("Expected MOZ_FETCHES_DIR environment variable.") - return 1 - - fetch_dir = Path(fetch_dir) - - visualmetrics_path = fetch_dir / "visualmetrics.py" - if not visualmetrics_path.exists(): - log.error( - "Could not locate visualmetrics.py", expected_path=str(visualmetrics_path) - ) - return 1 - - browsertime_results_path = fetch_dir / "browsertime-results.tgz" - - try: - with tarfile.open(str(browsertime_results_path)) as tar: - tar.extractall(path=str(fetch_dir)) - except Exception: - log.error( - "Could not read/extract browsertime results archive", - path=browsertime_results_path, - exc_info=True, - ) - return 1 - log.info("Extracted browsertime results", path=browsertime_results_path) - - try: - jobs_json_path = fetch_dir / "browsertime-results" / "jobs.json" - jobs_json = read_json(jobs_json_path, JOB_SCHEMA) - except Exception: - log.error( - "Could not open the jobs.json file", path=jobs_json_path, exc_info=True - ) - return 1 - - jobs = [] - count = 0 - - for job in jobs_json["jobs"]: - browsertime_json_path = fetch_dir / job["browsertime_json_path"] - - try: - browsertime_json = read_json(browsertime_json_path, BROWSERTIME_SCHEMA) - except Exception: - log.error( - "Could not open a browsertime.json file", - path=browsertime_json_path, - exc_info=True, - ) - return 1 - - for site in browsertime_json: - for video in site["files"]["video"]: - count += 1 - name = job["test_name"] - if "alias" in site["info"] and site["info"]["alias"].strip() != "": - name = "%s.%s" % (name, site["info"]["alias"]) - jobs.append( - Job( - test_name=name, - tags=job["tags"], - extra_options=len(job["extra_options"]) > 0 - and job["extra_options"] - or jobs_json["extra_options"], - accept_zero_vismet=job["accept_zero_vismet"], - json_path=browsertime_json_path, - video_path=browsertime_json_path.parent / video, - count=count, - ) - ) - - failed_runs = 0 - suites = {} - - with ProcessPoolExecutor(max_workers=cpu_count()) as executor: - for job, result in zip( - jobs, - executor.map( - partial( - run_visual_metrics, - visualmetrics_path=visualmetrics_path, - options=args.visual_metrics_options, - ), - jobs, - ), - ): - returncode, res = result - if returncode != 0: - log.error( - "Failed to run visualmetrics.py", - video_path=job.video_path, - error=res, - ) - failed_runs += 1 - else: - for name, value in res.items(): - append_result( - log, - suites, - job.test_name, - name, - value, - job.tags, - job.extra_options, - ) - - suites = [get_suite(suite) for suite in suites.values()] - - perf_data = { - "framework": {"name": "browsertime"}, - "application": jobs_json["application"], - "type": "pageload", - "suites": suites, - } - - # TODO: Try to get the similarity for all possible tests, this means that we - # will also get a comparison of recorded vs. live sites to check the on-going - # quality of our recordings. - # Bug 1674927 - Similarity metric is disabled until we figure out - # why it had a huge increase in run time. - - # Validates the perf data complies with perfherder schema. - # The perfherder schema uses jsonschema so we can't use voluptuous here. - validate(perf_data, PERFHERDER_SCHEMA) - - raw_perf_data = json.dumps(perf_data) - with Path(OUTPUT_DIR, "perfherder-data.json").open("w") as f: - f.write(raw_perf_data) - # Prints the data in logs for Perfherder to pick it up. - log.info("PERFHERDER_DATA: %s" % raw_perf_data) - - # Lists the number of processed jobs, failures, and successes. - with Path(OUTPUT_DIR, "summary.json").open("w") as f: - json.dump( - { - "total_jobs": len(jobs), - "successful_runs": len(jobs) - failed_runs, - "failed_runs": failed_runs, - }, - f, - ) - - # If there's one failure along the way, we want to return > 0 - # to trigger a red job in TC. - return failed_runs - - -def run_visual_metrics(job, visualmetrics_path, options): - """Run visualmetrics.py on the input job. - - Returns: - A returncode and a string containing the output of visualmetrics.py - """ - cmd = [ - "/usr/bin/python", - str(visualmetrics_path), - "-vvv", - "--logformat", - "[%(levelname)s] - %(message)s", - "--video", - str(job.video_path), - ] - cmd.extend(options) - rc, res = run_command(log, cmd, job.count) - - if rc == 0: - # Python 3.5 requires a str object (not 3.6+) - res = json.loads(res.decode("utf8")) - - failed_tests = [] - if not job.accept_zero_vismet: - # Ensure that none of these values are at 0 which - # is indicative of a failling test - monitored_tests = [ - "contentfulspeedindex", - "lastvisualchange", - "perceptualspeedindex", - "speedindex", - ] - for metric, val in res.items(): - if metric.lower() in monitored_tests and val == 0: - failed_tests.append(metric) - - if failed_tests: - log.error( - "TEST-UNEXPECTED-FAIL | Some visual metrics have an erroneous value of 0." - ) - log.info("Tests which failed: %s" % str(failed_tests)) - rc += 1 - - return rc, res - - -if __name__ == "__main__": - logging.basicConfig(format="%(levelname)s - %(message)s", level=logging.INFO) - structlog.configure( - processors=[ - structlog.processors.format_exc_info, - structlog.dev.ConsoleRenderer(colors=False), - ], - logger_factory=structlog.stdlib.LoggerFactory(), - cache_logger_on_first_use=True, - ) - - parser = argparse.ArgumentParser( - description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter - ) - - parser.add_argument( - "visual_metrics_options", - type=str, - metavar="VISUAL-METRICS-OPTIONS", - help="Options to pass to visualmetrics.py", - nargs="*", - ) - - args = parser.parse_args() - log = structlog.get_logger() - - try: - sys.exit(main(log, args)) - except Exception as e: - log.error("Unhandled exception: %s" % e, exc_info=True) - sys.exit(1) diff --git a/taskcluster/docker/visual-metrics/similarity.py b/taskcluster/docker/visual-metrics/similarity.py deleted file mode 100644 index f56e15875..000000000 --- a/taskcluster/docker/visual-metrics/similarity.py +++ /dev/null @@ -1,360 +0,0 @@ -#!/usr/bin/env python3 -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. -import cv2 -import json -import numpy as np -import os -import pathlib -import shutil -import socket -import structlog -import tarfile -import tempfile -import urllib - -from functools import wraps -from matplotlib import pyplot as plt -from scipy.stats import spearmanr - - -log = None - - -# We add the `and` conditions to it later -base_ad_query = { - "from": "task", - "limit": 1000, - "where": { - "and": [] - }, - "select": [ - "action.start_time", - "run.name", - "task.artifacts", - "task.group.id", - "task.id" - ], -} - - -def socket_timeout(value=120): - """Decorator for socket timeouts.""" - def _socket_timeout(func): - @wraps(func) - def __socket_timeout(*args, **kw): - old = socket.getdefaulttimeout() - socket.setdefaulttimeout(value) - try: - return func(*args, **kw) - finally: - socket.setdefaulttimeout(old) - return __socket_timeout - return _socket_timeout - - -def _open_data(file): - return cv2.VideoCapture(str(file)) - - -@socket_timeout(120) -def _query_activedata(query_json): - """Used to run queries on active data.""" - active_data_url = "http://activedata.allizom.org/query" - - req = urllib.request.Request(active_data_url) - req.add_header("Content-Type", "application/json") - jsondata = json.dumps(query_json) - - jsondataasbytes = jsondata.encode("utf-8") - req.add_header("Content-Length", len(jsondataasbytes)) - - log.info("Querying Active-data...") - response = urllib.request.urlopen(req, jsondataasbytes) - log.info("Status: %s" % {str(response.getcode())}) - - data = json.loads(response.read().decode("utf8").replace("'", '"'))["data"] - return data - - -@socket_timeout(120) -def _download(url, loc): - """Downloads from a url (with a timeout).""" - log.info("Downloading %s" % url) - try: - urllib.request.urlretrieve(url, loc) - except Exception as e: - log.info(str(e)) - return False - return True - - -def _get_frames(video): - """Gets all frames from a video into a list.""" - allframes = [] - while video.isOpened(): - ret, frame = video.read() - if ret: - # Convert to gray to simplify the process - allframes.append(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)) - else: - video.release() - break - return allframes - - -def _get_browsertime_results(query): - """Used to run an AD query and extract the browsertime results if they exist.""" - failed = False - try: - data = _query_activedata(query) - except Exception as e: - log.info(str(e)) - failed = True - if failed or not data: - log.info("Couldn't get activedata data") - return None - - # Find the newest browsertime task - log.info("Found %s datums" % str(len(data["action.start_time"]))) - maxind = np.argmax([float(t) for t in data["action.start_time"]]) - artifacts = data["task.artifacts"][maxind] - btime_artifact = None - for art in artifacts: - if "browsertime-results" in art["name"]: - btime_artifact = art["url"] - break - if not btime_artifact: - log.info("Can't find an older site test") - return None - - log.info("Comparing videos to TASK_GROUP=%s, TASK_ID=%s" % ( - data["task.group.id"][maxind], data["task.id"][maxind] - )) - - # Download the browsertime videos and untar them - tmpdir = tempfile.mkdtemp() - loc = os.path.join(tmpdir, "tmpfile.tgz") - if not _download(btime_artifact, loc): - log.info( - "Failed to download browsertime-results artifact from %s" % btime_artifact - ) - return None - tmploc = tempfile.mkdtemp() - try: - with tarfile.open(str(loc)) as tar: - tar.extractall(path=tmploc) - except Exception: - log.info( - "Could not read/extract old browsertime results archive", - path=loc, - exc_info=True, - ) - return None - - return tmploc - - -def _data_from_last_task(label): - """Gets the data from the last PGO/OPT task with the same label. - - We look for both OPT and PGO tasks. The difference - between them should be minimal. This method also provides - a way to compare recordings from this task to another - known task based on the TC_GROUP_ID environment varible. - """ - label_opt = label.replace("/pgo", "/opt") - label_pgo = label.replace("/opt", "/pgo") - - base_ad_query["where"]["and"] = [ - {"in": {"task.run.state": ["completed"]}}, - {"or": [ - {"eq": {"run.name": label_pgo}}, - {"eq": {"run.name": label_opt}} - ]} - ] - - task_group_id = os.getenv("TC_GROUP_ID", "") - if task_group_id: - base_ad_query["where"]["and"].append( - {"eq": {"task.group.id": task_group_id}} - ) - else: - base_ad_query["where"]["and"].extend([ - {"in": {"repo.branch.name": ["mozilla-central"]}}, - {"gte": {"action.start_time": {"date": "today-week-week"}}}, - ]) - - return _get_browsertime_results(base_ad_query) - - -def _data_from_last_live_task(label): - """Gets the data from the last live site PGO task.""" - label_live = label.replace("/opt", "/pgo").replace("tp6m", "tp6m-live") - - base_ad_query["where"]["and"] = [ - {"in": {"repo.branch.name": ["mozilla-central"]}}, - {"gte": {"action.start_time": {"date": "today-week-week"}}}, - {"in": {"task.run.state": ["completed"]}}, - {"eq": {"run.name": label_live}}, - ] - - return _get_browsertime_results(base_ad_query) - - -def _get_similarity(old_videos_info, new_videos_info, output, prefix=""): - """Calculates a similarity score for two groupings of videos. - - The technique works as follows: - 1. Get the last live site test. - 2. For each 15x15 video pairings, build a cross-correlation matrix: - 1. Get each of the videos and calculate their histograms - across the full videos. - 2. Calculate the correlation coefficient between these two. - 3. Average the cross-correlation matrix to obtain the score. - - The 2D similarity score is the same, except that it builds a histogram - from the final frame instead of the full video. - - Args: - old_videos: List of old videos. - new_videos: List of new videos (from this task). - output: Location to output videos with low similarity scores. - prefix: Prefix a string to the output. - Returns: - Two similarity scores (3D, 2D) as a float. - """ - nhists = [] - nhists2d = [] - - old_videos = [entry["data"] for entry in old_videos_info] - new_videos = [entry["data"] for entry in new_videos_info] - - total_vids = min(len(old_videos), len(new_videos)) - xcorr = np.zeros((total_vids, total_vids)) - xcorr2d = np.zeros((total_vids, total_vids)) - - for i in range(total_vids): - datao = np.asarray(_get_frames(old_videos[i])) - - histo, _, _ = plt.hist(datao.flatten(), bins=255) - histo2d, _, _ = plt.hist(datao[-1, :, :].flatten(), bins=255) - - for j in range(total_vids): - if i == 0: - # Only calculate the histograms once; it takes time - datan = np.asarray(_get_frames(new_videos[j])) - - histn, _, _ = plt.hist(datan.flatten(), bins=255) - histn2d, _, _ = plt.hist(datan[-1, :, :].flatten(), bins=255) - - nhists.append(histn) - nhists2d.append(histn2d) - else: - histn = nhists[j] - histn2d = nhists2d[j] - - rho, _ = spearmanr(histn, histo) - rho2d, _ = spearmanr(histn2d, histo2d) - - xcorr[i, j] = rho - xcorr2d[i, j] = rho2d - - similarity = np.mean(xcorr) - similarity2d = np.mean(xcorr2d) - - log.info("Average 3D similarity: %s" % str(np.round(similarity, 5))) - log.info("Average 2D similarity: %s" % str(np.round(similarity2d, 5))) - - if np.round(similarity, 1) <= 0.7 or np.round(similarity2d, 1) <= 0.7: - # For low correlations, output the worst video pairing - # so that we can visually see what the issue was - minind = np.unravel_index(np.argmin(xcorr, axis=None), xcorr.shape) - - oldvid = old_videos_info[minind[0]]["path"] - shutil.copyfile(oldvid, str(pathlib.Path(output, "%sold_video.mp4" % prefix))) - - newvid = new_videos_info[minind[1]]["path"] - shutil.copyfile(newvid, str(pathlib.Path(output, "%snew_video.mp4" % prefix))) - - return np.round(similarity, 5), np.round(similarity2d, 5) - - -def calculate_similarity(jobs_json, fetch_dir, output): - """Calculates the similarity score for this task. - - Here we use activedata to find the last live site that ran and - to find the last task (with the same label) that ran. Those two - tasks are then compared to the current one and 4 metrics are produced. - - For live sites, we only calculate 2 of these metrics, since the - playback similarity is not applicable to it. - - Args: - jobs_json: The jobs JSON that holds extra information. - fetch_dir: The fetch directory that holds the new videos. - output: The output directory. - Returns: - A dictionary containing up to 4 different metrics (their values default - to None if a metric couldn't be calculated): - PlaybackSimilarity: Similarity of the full playback to a live site test. - PlaybackSimilarity2D: - // - (but for the final frame only) - Similarity: Similarity of the tests video recording to its last run. - Similarity2D: - // - (but for the final frame only) - """ - global log - log = structlog.get_logger() - - label = os.getenv("TC_LABEL", "") - if not label: - log.info("TC_LABEL is undefined, cannot calculate similarity metrics") - return {} - - # Get all the newest videos from this task - new_btime_videos = [ - {"data": _open_data(str(f)), "path": str(f)} - for f in pathlib.Path(fetch_dir).rglob("*.mp4") - ] - log.info("Found %s new videos" % str(len(new_btime_videos))) - - # Get the similarity against the last task - old_btime_res = _data_from_last_task(label) - old_sim = old_sim2d = None - if old_btime_res: - old_btime_videos = [ - {"data": _open_data(str(f)), "path": str(f)} - for f in pathlib.Path(old_btime_res).rglob("*.mp4") - ] - log.info("Found %s old videos" % str(len(old_btime_videos))) - - old_sim, old_sim2d = _get_similarity( - old_btime_videos, new_btime_videos, output - ) - else: - log.info("Failed to find an older test task") - - # Compare recordings to their live site variant if it exists - live_sim = live_sim2d = None - if "live" not in jobs_json["extra_options"]: - live_btime_res = _data_from_last_live_task(label) - if live_btime_res: - live_btime_videos = [ - {"data": _open_data(str(f)), "path": str(f)} - for f in pathlib.Path(live_btime_res).rglob("*.mp4") - ] - log.info("Found %s live videos" % str(len(live_btime_videos))) - - live_sim, live_sim2d = _get_similarity( - live_btime_videos, new_btime_videos, output, prefix="live_" - ) - else: - log.info("Failed to find a live site variant") - - return { - "PlaybackSimilarity": live_sim, - "PlaybackSimilarity2D": live_sim2d, - "Similarity": old_sim, - "Similarity2D": old_sim2d, - } diff --git a/taskcluster/fenix_taskgraph/transforms/browsertime.py b/taskcluster/fenix_taskgraph/transforms/browsertime.py index bce1a4e96..a18dc8527 100644 --- a/taskcluster/fenix_taskgraph/transforms/browsertime.py +++ b/taskcluster/fenix_taskgraph/transforms/browsertime.py @@ -134,8 +134,8 @@ def build_browsertime_task(config, tasks): run_visual_metrics = task.pop("run-visual-metrics", False) if run_visual_metrics: task["run"]["command"].append("--browsertime-video") + task["run"]["command"].append("--browsertime-visualmetrics") task["run"]["command"].append("--browsertime-no-ffwindowrecorder") - task["attributes"]["run-visual-metrics"] = True # Build taskcluster group and symol task["treeherder"]["symbol"] = "Btime(%s)" % symbol diff --git a/taskcluster/fenix_taskgraph/transforms/visual_metrics.py b/taskcluster/fenix_taskgraph/transforms/visual_metrics.py deleted file mode 100644 index fdec58a70..000000000 --- a/taskcluster/fenix_taskgraph/transforms/visual_metrics.py +++ /dev/null @@ -1,91 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. -""" -Generate labels for tasks without names, consistently. -Uses attributes from `primary-dependency`. -""" - -from taskgraph.transforms.base import TransformSequence - -transforms = TransformSequence() - -SYMBOL = "{groupSymbol}({symbol}-vismet)" -# the test- prefix makes the task SETA-optimized. -LABEL = "test-vismet-{platform}-{label}" - - -@transforms.add -def make_label(config, jobs): - """Generate a sane label for a new task constructed from a dependency - Using attributes from the dependent job and the current task kind""" - for job in jobs: - dep_job = job["primary-dependency"] - attr = dep_job.attributes.get - - if attr("locale", job.get("locale")): - template = "{kind}-{locale}-{build_platform}/{build_type}" - elif attr("l10n_chunk"): - template = "{kind}-{build_platform}-{l10n_chunk}/{build_type}" - elif config.kind.startswith("release-eme-free") or config.kind.startswith( - "release-partner-repack" - ): - suffix = job.get("extra", {}).get("repack_suffix", None) or job.get( - "extra", {} - ).get("repack_id", None) - template = "{kind}-{build_platform}" - if suffix: - template += "-{}".format(suffix.replace("/", "-")) - else: - template = "{kind}-{build_platform}/{build_type}" - job["label"] = template.format( - kind=config.kind, - build_platform=attr("build_platform"), - build_type=attr("build_type"), - locale=attr("locale", job.get("locale", "")), # Locale can be absent - l10n_chunk=attr("l10n_chunk", ""), # Can be empty - ) - - yield job - - -@transforms.add -def run_visual_metrics(config, jobs): - for job in jobs: - dep_job = job.pop("primary-dependency", None) - if dep_job is not None: - platform = dep_job.task["extra"]["treeherder-platform"] - job["dependencies"] = {dep_job.label: dep_job.label} - - # Add the artifact to be processed as a fetches artifact - job["fetches"][dep_job.label] = [ - {"artifact": "browsertime-results.tgz", "extract": True} - ] - - # vismet runs on Linux but we want to have it displayed - # alongside the job it was triggered by to make it easier for - # people to find it back. - job["label"] = LABEL.format(platform=platform, label=dep_job.label) - treeherder_info = dict(dep_job.task["extra"]["treeherder"]) - job["treeherder"]["platform"] = platform - job["treeherder"]["symbol"] = SYMBOL.format( - groupSymbol=treeherder_info["groupSymbol"], - symbol=treeherder_info["symbol"], - ) - - # Store the platform name so we can use it to calculate - # the similarity metric against other tasks - job["worker"].setdefault("env", {})["TC_PLATFORM"] = platform - - # run-on-projects needs to be set based on the dependent task - attributes = dict(dep_job.attributes) - job["run-on-projects"] = attributes["run_on_projects"] - - # The run-on-tasks-for also needs to be setup here - job["run-on-tasks-for"] = attributes.get("run_on_tasks_for", []) - - # We can't use the multi_dep transforms which remove this - # field, so we remove the dependent-tasks entry here - del job["dependent-tasks"] - - yield job