/* Any copyright is dedicated to the Public Domain. http://creativecommons.org/publicdomain/zero/1.0/ */ "use strict"; const ENGINES = { intent: { engineId: "intent", taskName: "text-classification", modelId: "Mozilla/mobilebert-uncased-finetuned-LoRA-intent-classifier", modelRevision: "main", modelHubUrlTemplate: "{model}/{revision}", dtype: "q8", device: "wasm", request: { args: [["restaurants in seattle, wa"]], }, }, suggest: { engineId: "suggest", taskName: "token-classification", modelId: "Mozilla/distilbert-uncased-NER-LoRA", modelRevision: "main", dtype: "q8", device: "wasm", request: { args: [["restaurants in seattle, wa"]], }, }, engine3: { engineId: "engine3", taskName: "feature-extraction", modelId: "Xenova/all-MiniLM-L6-v2", modelRevision: "main", dtype: "q8", device: "wasm", request: { args: [["Yet another example sentence", "Checking sentence handling"]], options: { pooling: "mean", normalize: true, }, }, }, engine4: { engineId: "engine4", taskName: "feature-extraction", modelId: "Xenova/all-MiniLM-L6-v2", modelRevision: "main", dtype: "q8", device: "wasm", request: { args: [["Final example sentence", "Ensuring unique inputs"]], options: { pooling: "mean", normalize: true, }, }, }, }; const BASE_METRICS = [ PIPELINE_READY_LATENCY, INITIALIZATION_LATENCY, MODEL_RUN_LATENCY, ]; // Generate prefixed metrics for each engine const METRICS = []; for (let engineKey of Object.keys(ENGINES)) { for (let metric of BASE_METRICS) { METRICS.push(`${engineKey}-${metric}`); } } METRICS.push(TOTAL_MEMORY_USAGE); const journal = {}; for (let metric of METRICS) { journal[metric] = []; } const perfMetadata = { owner: "GenAI Team", name: "browser_ml_engine_multi_perf.js", description: "Testing model execution concurrently", options: { default: { perfherder: true, perfherder_metrics: [ { name: "latency", unit: "ms", shouldAlert: false, }, { name: "memory", unit: "MiB", shouldAlert: false, }, ], verbose: true, manifest: "perftest.toml", manifest_flavor: "browser-chrome", try_platform: ["linux", "mac", "win"], }, }, }; for (let metric of METRICS) { perfMetadata.options.default.perfherder_metrics.push({ name: metric, unit: metric.includes("latency") ? "ms" : "MiB", shouldAlert: false, }); } requestLongerTimeout(10); async function runEngineWithMetrics( engineInstance, engineConfig, iterations = 1 ) { const journal = {}; const engine = engineInstance.engine; for (let i = 0; i < iterations; i++) { const res = await engine.run(engineConfig.request); let metrics = fetchMetrics(res.metrics); // Collect metrics, prefixing each metric name with engineId for (const [metricName, metricVal] of Object.entries(metrics)) { const prefixedMetricName = `${engineConfig.engineId}-${metricName}`; if (!journal[prefixedMetricName]) { journal[prefixedMetricName] = []; } journal[prefixedMetricName].push(metricVal); } } return journal; } /** * Runs inference on an initialized engine instance using the specified request configuration * and collects metrics, prefixed with the engineId. * * @param {object} engineInstance - The engine instance on which to run inference. * @param {EngineConfig} engineConfig - Configuration object with request details for the engine. * @param {number} iterations - Number of times to run the inference for metrics collection. * @returns {Promise