/* Any copyright is dedicated to the Public Domain.
http://creativecommons.org/publicdomain/zero/1.0/ */
"use strict";

const rootDataUrl =
  "chrome://mochitests/content/browser/toolkit/components/ml/tests/browser/data/articles";

async function fetchArticle(url) {
  const response = await fetch(url);
  return await response.text();
}

let testData = [];

const distilBartModel = {
  taskName: "summarization",
  modelId: "Mozilla/distilbart-cnn-12-6",
  dtype: "q8",
  // To keep history, we reuse xenova in the perf name
  perfModelId: "Xenova/distilbart-cnn-12-6",
};

const qwenModel = {
  taskName: "text-generation",
  modelId: "Mozilla/Qwen2.5-0.5B-Instruct",
  dtype: "q8",
  // To keep history, we reuse onnx-community in the perf name
  perfModelId: "onnx-community/Qwen2.5-0.5B-Instruct",
};

const articles = [{ data: `${rootDataUrl}/big.txt`, type: "big" }];

let numEngines = 0;

for (const model of [distilBartModel, qwenModel]) {
  for (const article of articles) {
    // Replace all non-alphabnumeric or dash or underscore by underscore
    const perfName = `${model.perfModelId.replace(/\//g, "-")}_${article.type}`;

    const engineId = `engine-${numEngines}`;

    const options = { ...model, article: article.data, engineId, perfName };

    numEngines += 1;

    options.trackPeakMemory = false;
    testData.push(options);
  }
}

const perfMetadata = {
  owner: "GenAI Team",
  name: "browser_ml_summarizer_perf.js",
  description: "Template test for latency for Summarizer model",
  options: {
    default: {
      perfherder: true,
      perfherder_metrics: [
        {
          name: "latency",
          unit: "ms",
          shouldAlert: true,
        },
        {
          name: "memory",
          unit: "MiB",
          shouldAlert: true,
        },
        {
          name: "tokenSpeed",
          unit: "tokens/s",
          shouldAlert: true,
          lowerIsBetter: false,
        },
        {
          name: "charactersSpeed",
          unit: "chars/s",
          shouldAlert: true,
          lowerIsBetter: false,
        },
      ],
      verbose: true,
      manifest: "perftest.toml",
      manifest_flavor: "browser-chrome",
      try_platform: ["linux", "mac", "win"],
    },
  },
};

requestLongerTimeout(60);

// To run locally
// pip install huggingface-hub
// huggingface-cli download {model_id} --local-dir MOZ_ML_LOCAL_DIR/onnx-models/{model_id}/{revision}

// Update your test in
// Then run:  ./mach lint -l perfdocs --fix .
// This will auto-generate docs
async function run_summarizer_with_perf({
  taskName,
  modelId,
  article,
  dtype,
  engineId,
  perfName,
  trackPeakMemory,
  browserPrefs = null,
}) {
  let chatInput = await fetchArticle(article);

  const minNewTokens = 195;
  const maxNewTokens = 200;

  let requestOptions = {
    max_new_tokens: minNewTokens,
    min_new_tokens: maxNewTokens,
  };

  const options = new PipelineOptions({
    engineId,
    taskName,
    modelHubUrlTemplate: "{model}/{revision}",
    modelId,
    modelRevision: "main",
    dtype,
    useExternalDataFormat: true,
    timeoutMS: -1,
  });

  if (taskName === "text-generation") {
    chatInput = [
      {
        role: "system",
        content:
          "Your role is to summarize the provided content as succinctly as possible while retaining the most important information",
      },
      {
        role: "user",
        content: chatInput,
      },
    ];

    requestOptions = {
      max_new_tokens: minNewTokens,
      min_new_tokens: maxNewTokens,
      return_full_text: true,
      return_tensors: false,
      do_sample: false,
    };
  }

  const request = {
    args: [chatInput],
    options: requestOptions,
  };

  info(`is request null | ${request === null || request === undefined}`);

  await perfTest({
    name: `sum-${perfName}`,
    options,
    request,
    trackPeakMemory,
    browserPrefs,
  });
}

/*
 * distilbart Model
 */
add_task(async function test_ml_distilbart_tiny_article() {
  await run_summarizer_with_perf(testData[0]);
});

add_task(async function test_ml_distilbart_tiny_article_mem() {
  await run_summarizer_with_perf({ ...testData[0], trackPeakMemory: true });
});

add_task(async function test_ml_distilbart_tiny_article_mem_no_ion() {
  await run_summarizer_with_perf({
    ...testData[0],
    trackPeakMemory: true,
    browserPrefs: [["javascript.options.wasm_optimizingjit", false]],
  });
});

/*
 * Qwen model
 */
add_task(async function test_ml_qwen_big_article() {
  await run_summarizer_with_perf(testData[1]);
});

add_task(async function test_ml_qwen_big_article_with_mem() {
  await run_summarizer_with_perf({ ...testData[1], trackPeakMemory: true });
});