// META: title=Language Model Prompt Multimodal
// META: script=/resources/testdriver.js
// META: script=/resources/testdriver-vendor.js
// META: script=../resources/util.js
// META: timeout=long

'use strict';

const kImagePrompt = 'describe this';
const kAudioPrompt = 'transcribe this';

const kValidImagePath = '/images/computer.jpg';
const kValidImageKeywords =
    ['image', 'computer', 'keyboard', 'desk', 'PC', 'monitor', 'screen'];
const kValidImageRegex = matchKeywordsRegex(kValidImageKeywords);

const kValidAudioPath = '/media/speech.wav';
const kValidAudioKeywords =
    ['audio', 'speech', 'sentence', 'single', 'segment'];
const kValidAudioRegex = matchKeywordsRegex(kValidAudioKeywords);

const kValidSVGImagePath = '/images/pattern.svg';
const kValidSVGImageKeywords =
    ['image', 'color', 'red', 'green', 'blue', 'black'];
const kValidSVGImageRegex = matchKeywordsRegex(kValidSVGImageKeywords);

const kValidVideoPath = '/media/test.webm';
const kValidVideoKeywords = [
  'image', 'color', 'bip', 'black', 'white', 'yellow', 'green', 'blue', 'red',
  'video', 'screen'
];
const kValidVideoRegex = matchKeywordsRegex(kValidVideoKeywords);

const kValidCanvasImageKeywords = ['image', 'black', 'square', 'blank'];
const kValidCanvasImageRegex = matchKeywordsRegex(kValidCanvasImageKeywords);

const kImageOptions = {expectedInputs: [{type: 'image'}]};
const kAudioOptions = {expectedInputs: [{type: 'audio'}]};

function messageWithContent(prompt, type, value) {
  return [{
    role: 'user',
    content: [{type: 'text', value: prompt}, {type: type, value: value}]
  }];
}

// Helper function to create a regex from some keywords.
function matchKeywordsRegex(keywords) {
  const keywordsPattern = keywords.join('|');
  return new RegExp(`(${keywordsPattern})`, 'i');
}

/*****************************************
 * General tests
 *****************************************/

promise_test(async t => {
  await ensureLanguageModel(kImageOptions);
  const newImage = new Image();
  newImage.src = kValidImagePath;
  const session = await createLanguageModel(kImageOptions);
  // TODO(crbug.com/409615288): Expect a TypeError according to the spec.
  return promise_rejects_dom(
      t, 'SyntaxError',
      session.prompt(messageWithContent(kImagePrompt, 'text', newImage)));
}, 'Prompt with type:"text" and image content should reject');

promise_test(async t => {
  await ensureLanguageModel(kImageOptions);
  const newImage = new Image();
  newImage.src = kValidImagePath;
  const session = await createLanguageModel(kImageOptions);
  return promise_rejects_dom(t, 'NotSupportedError', session.prompt([
    {role: 'assistant', content: [{type: 'image', value: newImage}]}
  ]));
}, 'Prompt with assistant role should reject with multimodal input');

/*****************************************
 * Image tests
 *****************************************/

promise_test(async (t) => {
  await ensureLanguageModel();
  const newImage = new Image();
  newImage.src = kValidImagePath;
  const session = await createLanguageModel();
  return promise_rejects_dom(
      t, 'NotSupportedError',
      session.prompt(messageWithContent(kImagePrompt, 'image', newImage)));
}, 'Prompt image without `image` expectedInput');

promise_test(async () => {
  const blob = await (await fetch(kValidImagePath)).blob();
  const options = {
    expectedInputs: [{type: 'image'}],
    initialPrompts: messageWithContent(kImagePrompt, 'image', blob)
  };
  await ensureLanguageModel(options);
  const session = await LanguageModel.create(options);
  const tokenLength = await session.measureInputUsage(options.initialPrompts);
  assert_greater_than(tokenLength, 0);
  assert_true(isValueInRange(session.inputUsage, tokenLength));
  assert_regexp_match(
      await session.prompt([{role: 'system', content: ''}]), kValidImageRegex);
}, 'Test Image initialPrompt');

promise_test(async () => {
  await ensureLanguageModel(kImageOptions);
  const blob = await (await fetch(kValidImagePath)).blob();
  const session = await createLanguageModel(kImageOptions);
  const result =
      await session.prompt(messageWithContent(kImagePrompt, 'image', blob));
  assert_regexp_match(result, kValidImageRegex);
}, 'Prompt with Blob image content');

promise_test(async () => {
  await ensureLanguageModel(kImageOptions);
  const blob = await (await fetch(kValidImagePath)).blob();
  const bitmap = await createImageBitmap(blob);
  const session = await createLanguageModel(kImageOptions);
  const result =
      await session.prompt(messageWithContent(kImagePrompt, 'image', bitmap));
  assert_regexp_match(result, kValidImageRegex);
}, 'Prompt with ImageBitmap image content');

promise_test(async () => {
  await ensureLanguageModel(kImageOptions);
  const blob = await (await fetch(kValidImagePath)).blob();
  const bitmap = await createImageBitmap(blob);
  const frame = new VideoFrame(bitmap, {timestamp: 1});
  const session = await createLanguageModel(kImageOptions);
  const result =
      await session.prompt(messageWithContent(kImagePrompt, 'image', frame));
  frame.close();  // Avoid JS garbage collection warning.
  assert_regexp_match(result, kValidImageRegex);
}, 'Prompt with VideoFrame image content');

promise_test(async () => {
  await ensureLanguageModel(kImageOptions);
  const canvas = new OffscreenCanvas(512, 512);
  // Requires a context to convert to a bitmap.
  var context = canvas.getContext('2d');
  context.fillRect(10, 10, 200, 200);
  const session = await createLanguageModel(kImageOptions);
  const result =
      await session.prompt(messageWithContent(kImagePrompt, 'image', canvas));
  assert_regexp_match(result, kValidCanvasImageRegex);
}, 'Prompt with OffscreenCanvas image content');

promise_test(async () => {
  await ensureLanguageModel(kImageOptions);
  const session = await createLanguageModel(kImageOptions);
  const result = await session.prompt(
      messageWithContent(kImagePrompt, 'image', new ImageData(256, 256)));
  assert_regexp_match(result, kValidImageRegex);
}, 'Prompt with ImageData image content');

promise_test(async () => {
  await ensureLanguageModel(kImageOptions);
  const newImage = new Image();
  newImage.src = kValidImagePath;
  const session = await createLanguageModel(kImageOptions);
  const result =
      await session.prompt(messageWithContent(kImagePrompt, 'image', newImage));
  assert_regexp_match(result, kValidImageRegex);
}, 'Prompt with HTMLImageElement image content');

promise_test(async () => {
  await ensureLanguageModel(kImageOptions);
  var canvas = document.createElement('canvas');
  canvas.width = 1224;
  canvas.height = 768;
  const session = await createLanguageModel(kImageOptions);
  const result =
      await session.prompt(messageWithContent(kImagePrompt, 'image', canvas));
  assert_regexp_match(result, kValidCanvasImageRegex);
}, 'Prompt with HTMLCanvasElement image content');

promise_test(async () => {
  await ensureLanguageModel(kImageOptions);
  const imageData = await fetch(kValidImagePath);
  const session = await createLanguageModel(kImageOptions);
  const result = await session.prompt(
      messageWithContent(kImagePrompt, 'image', await imageData.arrayBuffer()));
  assert_regexp_match(result, kValidImageRegex);
}, 'Prompt with ArrayBuffer image content');

promise_test(async () => {
  await ensureLanguageModel(kImageOptions);
  const imageData = await fetch(kValidImagePath);
  const session = await createLanguageModel(kImageOptions);
  const result = await session.prompt(messageWithContent(
      kImagePrompt, 'image', new DataView(await imageData.arrayBuffer())));
  assert_regexp_match(result, kValidImageRegex);
}, 'Prompt with ArrayBufferView image content');

promise_test(async (t) => {
  await ensureLanguageModel(kImageOptions);
  const imageData = await fetch(kValidImagePath);
  const session = await createLanguageModel(kImageOptions);
  const buffer = await imageData.arrayBuffer();
  // Add 256 bytes of padding in front of the image data.
  const bufferView = new Uint8Array(buffer);
  const newBufferArray = new ArrayBuffer(256 + buffer.byteLength);
  const imageView = new Uint8Array(newBufferArray, 256, buffer.byteLength);
  imageView.set(bufferView);

  const result = await session.prompt(
      messageWithContent(kImagePrompt, 'image', imageView));
  assert_regexp_match(result, kValidImageRegex);

  // Offset causes 56 bytes of blank data, resulting in a decoding error.
  await promise_rejects_dom(
      t, 'InvalidStateError',
      session.prompt(messageWithContent(
          kImagePrompt, 'image',
          new Uint8Array(newBufferArray, 200, buffer.byteLength))));
}, 'Prompt with ArrayBufferView image content with an offset.');


promise_test(async () => {
  await ensureLanguageModel(kImageOptions);
  const newImage = new Image();
  newImage.src = kValidSVGImagePath;
  const session = await createLanguageModel(kImageOptions);
  const result =
      await session.prompt(messageWithContent(kImagePrompt, 'image', newImage));
  assert_regexp_match(result, kValidSVGImageRegex);
}, 'Prompt with HTMLImageElement image content (with SVG)');


promise_test(async () => {
  await ensureLanguageModel(kImageOptions);
  const svg = document.createElementNS('http://www.w3.org/2000/svg', 'svg');
  svg.setAttribute('width', '100');
  svg.setAttribute('height', '100');
  const svgImage =
      document.createElementNS('http://www.w3.org/2000/svg', 'image');
  svgImage.setAttribute('href', kValidImagePath);
  svgImage.setAttribute('decoding', 'sync');
  svg.appendChild(svgImage);
  document.body.appendChild(svg);

  // Must wait for the SVG and image to load first.
  // TODO(crbug.com/417260923): Make prompt Api await the image to be loaded.
  const {promise, resolve} = Promise.withResolvers();
  svgImage.addEventListener('load', resolve);
  await promise;
  const session = await createLanguageModel(kImageOptions);
  const result =
      await session.prompt(messageWithContent(kImagePrompt, 'image', svgImage));
  assert_regexp_match(result, kValidImageRegex);
}, 'Prompt with SVGImageElement image content');

promise_test(async () => {
  await ensureLanguageModel(kImageOptions);
  var video = document.createElement('video');
  video.src = kValidVideoPath;
  video.width = 1224;
  video.height = 768;
  // Make sure the video plays without requiring a gesture.
  video.muted = true;
  video.playsInline = true;
  video.autoplay = true;
  // Video must have frames fetched. See crbug.com/417249941#comment3
  await video.play();
  const session = await createLanguageModel(kImageOptions);
  const result =
      await session.prompt(messageWithContent(kImagePrompt, 'image', video));
  assert_regexp_match(result, kValidVideoRegex);
}, 'Prompt with HTMLVideoElement image content');

/*****************************************
 * Audio tests
 *****************************************/

promise_test(async (t) => {
  await ensureLanguageModel();
  const blob = await (await fetch(kValidAudioPath)).blob();
  const session = await createLanguageModel();
  return promise_rejects_dom(
      t, 'NotSupportedError',
      session.prompt(messageWithContent(kImagePrompt, 'audio', blob)));
}, 'Prompt audio without `audio` expectedInput');

promise_test(async () => {
  const blob = await (await fetch(kValidAudioPath)).blob();
  const options = {
    expectedInputs: [{type: 'audio'}],
    initialPrompts: messageWithContent(kAudioPrompt, 'audio', blob)
  };
  await ensureLanguageModel(options);
  const session = await LanguageModel.create(options);
  const tokenLength = await session.measureInputUsage(options.initialPrompts);
  assert_greater_than(tokenLength, 0);
  assert_true(isValueInRange(session.inputUsage, tokenLength));
  assert_regexp_match(
      await session.prompt([{role: 'system', content: ''}]), kValidAudioRegex);
}, 'Test Audio initialPrompt');

promise_test(async () => {
  await ensureLanguageModel(kAudioOptions);
  const blob = await (await fetch(kValidAudioPath)).blob();
  const session = await createLanguageModel(kAudioOptions);
  const result =
      await session.prompt(messageWithContent(kAudioPrompt, 'audio', blob));
  assert_regexp_match(result, kValidAudioRegex);
}, 'Prompt with Blob audio content');

promise_test(async (t) => {
  await ensureLanguageModel(kAudioOptions);
  const blob = await (await fetch(kValidImagePath)).blob();
  const session = await createLanguageModel(kAudioOptions);
  // TODO(crbug.com/409615288): Expect a TypeError according to the spec.
  return promise_rejects_dom(
      t, 'DataError',
      session.prompt(messageWithContent(kImagePrompt, 'audio', blob)));
}, 'Prompt audio with blob containing invalid audio data.');

promise_test(async () => {
  await ensureLanguageModel(kAudioOptions);
  const audio_data = await fetch(kValidAudioPath);
  const audioCtx = new AudioContext();
  const buffer = await audioCtx.decodeAudioData(await audio_data.arrayBuffer());
  const session = await createLanguageModel(kAudioOptions);
  const result =
      await session.prompt(messageWithContent(kAudioPrompt, 'audio', buffer));
  assert_regexp_match(result, kValidAudioRegex);
}, 'Prompt with AudioBuffer');

promise_test(async () => {
  await ensureLanguageModel(kAudioOptions);
  const audio_data = await fetch(kValidAudioPath);
  const session = await createLanguageModel(kAudioOptions);
  const result = await session.prompt(messageWithContent(
      kAudioPrompt, 'audio', await audio_data.arrayBuffer()));
  assert_regexp_match(result, kValidAudioRegex);
}, 'Prompt with BufferSource - ArrayBuffer');