2b658cb29039048ff6ab3dac31f10519b3349bcc braney Tue Apr 28 09:06:06 2026 -0700 quickLiftBench: add --phases for per-phase timing breakdown, refs #37445 Add --phases flag that writes phases.tsv alongside results.tsv and summary.tsv. Long-form rows of every marker emitted by hgTracks (chromAliasSetup, trackDbLoad, parallel data fetch, image generation, cart write, etc.), one row per (case, variant, iteration, phase). A per-(case, variant, phase) median+p90 summary is appended. Useful for localizing where total_ms gaps come from. The first run on bench1_hgwdev surfaced the missing parallel-fetch span on the lifted variant (filed as #37470). Drop the now-unused parse_overall_total helper; total_ms is read directly from parse_phase_timings using the OVERALL_TIMING_LABEL key. Co-Authored-By: Claude Opus 4.7 (1M context) diff --git src/utils/qa/quickLiftBench/quickLiftBench.py src/utils/qa/quickLiftBench/quickLiftBench.py index d3ebc098ff9..24542039b3e 100755 --- src/utils/qa/quickLiftBench/quickLiftBench.py +++ src/utils/qa/quickLiftBench/quickLiftBench.py @@ -87,30 +87,37 @@ type=int, default=None, help="Override defaults.warmup", ) p.add_argument( "--out", default=None, help="Output directory (default: ./results//)", ) p.add_argument( "--verbose", "-v", action="store_true", help="Print per-iteration timings to stderr", ) + p.add_argument( + "--phases", + action="store_true", + help="Also write phases.tsv with per-iteration phase timings parsed " + "from markers (chromAliasSetup, " + "trackDbLoad, parallel data fetch, etc.)", + ) return p.parse_args() def load_config(path): with open(path) as f: cfg = yaml.safe_load(f) if "cases" not in cfg or not cfg["cases"]: sys.exit(f"{path}: no `cases` defined") cfg.setdefault("defaults", {}) cfg["defaults"].setdefault("iterations", 5) cfg["defaults"].setdefault("warmup", 1) cfg["defaults"].setdefault("timeout", 60) cfg["defaults"].setdefault("servers", {}) return cfg @@ -183,37 +190,30 @@ def parse_phase_timings(html): """Pull all label: NNN millis entries.""" if not html: return {} out = {} for m in TIMING_SPAN_RE.finditer(html): label = m.group(1).strip() # Strip HTML tags that sneak into labels (e.g. format strings) label = re.sub(r"<[^>]+>", "", label).strip() out[label] = int(m.group(2)) return out -def parse_overall_total(html): - """Return the 'Overall total time' value from the footer timing span, - or None if not found.""" - phases = parse_phase_timings(html) - return phases.get(OVERALL_TIMING_LABEL) - - def detect_block(html): """Return a short error reason if the response indicates bot-block / error.""" if html is None: return "no-response" lower = html.lower() if "you've been blocked" in lower or "you have been blocked" in lower: return "bot-blocked" if "error" in lower or "errabort" in lower: return "errabort" return None def run_request(session, url, timeout): """Fetch a URL once. Return (http_ms, status_code, html, error).""" start = time.monotonic() @@ -267,39 +267,45 @@ if requested_ids: cases = [c for c in cases if c["id"] in requested_ids] missing = requested_ids - {c["id"] for c in cases} if missing: sys.exit(f"unknown case id(s): {sorted(missing)}") if not cases: sys.exit("no cases selected") timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") out_dir = args.out or os.path.join(os.path.dirname(os.path.abspath(__file__)), "results", timestamp) os.makedirs(out_dir, exist_ok=True) results_path = os.path.join(out_dir, "results.tsv") summary_path = os.path.join(out_dir, "summary.tsv") + phases_path = os.path.join(out_dir, "phases.tsv") if args.phases else None print(f"writing results to {out_dir}", file=sys.stderr) results_fields = [ "case", "variant", "server", "user", "session", "iteration", "http_ms", "load_ms_sum", "draw_ms_sum", "n_tracks", "total_ms", "status_code", "error", ] + phases_fields = [ + "case", "variant", "server", "user", "session", "iteration", + "phase", "ms", + ] all_rows = [] + all_phase_rows = [] with open(results_path, "w", newline="") as rf: writer = csv.DictWriter(rf, fieldnames=results_fields, delimiter="\t") writer.writeheader() for case in cases: cid = case["id"] description = case.get("description", "") print(f"\n=== {cid}: {description}", file=sys.stderr) try: server_key, server_url = resolve_server(case, defaults, args.server_override) except ValueError as e: print(f" {e}", file=sys.stderr) continue @@ -323,72 +329,91 @@ url = build_url(server_url, user, session_name) if args.verbose: print(f" URL: {url}", file=sys.stderr) for _ in range(warmup): run_request(session, url, timeout) for it in range(1, iterations + 1): http_ms, code, html, err = run_request(session, url, timeout) block = detect_block(html) if not err else None if block and not err: err = block load_sum = draw_sum = total_ms = None n_tracks = None + phases = {} if html and not err: rows = parse_track_timing_rows(html) n_tracks = len(rows) if rows: load_sum = sum(r[1] for r in rows) draw_sum = sum(r[2] for r in rows) - total_ms = parse_overall_total(html) + phases = parse_phase_timings(html) + total_ms = phases.get(OVERALL_TIMING_LABEL) if total_ms is None and not rows: err = "no-timing" row = { "case": cid, "variant": vname, "server": server_key, "user": user, "session": session_name, "iteration": it, "http_ms": http_ms, "load_ms_sum": load_sum if load_sum is not None else "", "draw_ms_sum": draw_sum if draw_sum is not None else "", "n_tracks": n_tracks if n_tracks is not None else "", "total_ms": total_ms if total_ms is not None else "", "status_code": code if code is not None else "", "error": err or "", } writer.writerow(row) rf.flush() all_rows.append(row) + + if args.phases: + for label, ms in phases.items(): + all_phase_rows.append({ + "case": cid, + "variant": vname, + "server": server_key, + "user": user, + "session": session_name, + "iteration": it, + "phase": label, + "ms": ms, + }) if args.verbose: print( f" {vname:8s} it={it} " f"http={fmt_ms(http_ms)} " f"load_sum={fmt_ms(load_sum)} draw_sum={fmt_ms(draw_sum)} " f"tracks={fmt_ms(n_tracks)} " f"total={fmt_ms(total_ms)} {err or ''}", file=sys.stderr, ) write_summary(cases, all_rows, summary_path) print(f"\nresults: {results_path}", file=sys.stderr) print(f"summary: {summary_path}", file=sys.stderr) + if phases_path: + write_phases(all_phase_rows, phases_fields, phases_path) + print(f"phases: {phases_path}", file=sys.stderr) + def _to_int(v): if v == "" or v is None: return None return int(v) def write_summary(cases, rows, path): """ Per (case, variant): compute median + p90 of (http_ms, load_ms_sum, draw_ms_sum, total_ms). Then for each compare-pair, emit median ratios. """ by_case_var = {} for r in rows: by_case_var.setdefault(r["case"], {}).setdefault(r["variant"], []).append(r) @@ -475,17 +500,50 @@ ) if summary_lines: print("\nPairwise summary (right/left ratio of overall total render time):", file=sys.stderr) for line in summary_lines: print(line, file=sys.stderr) def ratio(num, denom): if num is None or denom is None or denom == 0: return "" return f"{num / denom:.2f}" +def write_phases(rows, fields, path): + """Write per-iteration phase timings, then a per-(case, variant, phase) + median + n summary appended below.""" + with open(path, "w", newline="") as f: + w = csv.DictWriter(f, fieldnames=fields, delimiter="\t") + w.writeheader() + for r in rows: + w.writerow(r) + + if not rows: + return + + # Per-(case, variant, phase) median for quick eyeballing. + groups = {} + for r in rows: + key = (r["case"], r["variant"], r["phase"]) + groups.setdefault(key, []).append(r["ms"]) + + f.write("\n# Per-(case, variant, phase) median ms across iterations\n") + summary_fields = ["case", "variant", "phase", "n", "median_ms", "p90_ms"] + sw = csv.DictWriter(f, fieldnames=summary_fields, delimiter="\t") + sw.writeheader() + for (cid, vname, phase), values in sorted(groups.items()): + sw.writerow({ + "case": cid, + "variant": vname, + "phase": phase, + "n": len(values), + "median_ms": median_or_none(values), + "p90_ms": p90(values), + }) + + if __name__ == "__main__": main()