/**
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */

/**
 * SearchBrowsingHistoryDomainBoost
 *
 * Temporary heuristic for general-category queries (games, movies, news, etc.)
 * when semantic embeddings over title/description are insufficient.
 *
 * Safe to remove once richer embeddings or better intent classification lands.
 */

export const CATEGORIES_JSON = {
  language: "en",
  categories: [
    {
      id: "games",
      terms: [
        "game",
        "games",
        "video game",
        "video games",
        "pc games",
        "console games",
      ],
      domains: [
        "store.steampowered.com",
        "roblox.com",
        "ign.com",
        "gamespot.com",
        "polygon.com",
        "metacritic.com",
        "epicgames.com",
        "store.playstation.com",
        "xbox.com",
        "nintendo.com",
      ],
    },
    {
      id: "movies",
      terms: ["movie", "movies", "film", "films", "cinema"],
      domains: [
        "imdb.com",
        "rottentomatoes.com",
        "metacritic.com",
        "letterboxd.com",
        "netflix.com",
        "primevideo.com",
        "disneyplus.com",
        "hulu.com",
        "max.com",
      ],
    },
    {
      id: "tv",
      terms: ["tv show", "tv shows", "show", "shows", "series", "tv series"],
      domains: [
        "imdb.com",
        "rottentomatoes.com",
        "metacritic.com",
        "tvmaze.com",
        "thetvdb.com",
        "netflix.com",
        "primevideo.com",
        "disneyplus.com",
        "hulu.com",
        "max.com",
      ],
    },
    {
      id: "books",
      terms: ["book", "books", "novel", "novels"],
      domains: [
        "goodreads.com",
        "gutenberg.org",
        "openlibrary.org",
        "barnesandnoble.com",
        "indigo.ca",
      ],
    },
    {
      id: "anime",
      terms: ["anime", "manga"],
      domains: [
        "myanimelist.net",
        "anilist.co",
        "kitsu.app",
        "crunchyroll.com",
      ],
    },
    {
      id: "music",
      terms: ["music", "song", "songs", "album", "albums", "lyrics"],
      domains: [
        "spotify.com",
        "music.apple.com",
        "soundcloud.com",
        "bandcamp.com",
        "music.youtube.com",
      ],
    },
    {
      id: "podcasts",
      terms: ["podcast", "podcasts"],
      domains: [
        "podcasts.apple.com",
        "overcast.fm",
        "pocketcasts.com",
        "castbox.fm",
      ],
    },
    {
      id: "papers_research",
      terms: [
        "paper",
        "papers",
        "research paper",
        "research papers",
        "academic paper",
        "academic papers",
        "journal",
        "journals",
        "study",
        "studies",
        "publication",
        "publications",
      ],
      domains: [
        "scholar.google.com",
        "arxiv.org",
        "semanticscholar.org",
        "pubmed.ncbi.nlm.nih.gov",
        "researchgate.net",
        "ieeexplore.ieee.org",
        "dl.acm.org",
        "springer.com",
        "nature.com",
        "science.org",
      ],
    },
    {
      id: "tech_news",
      terms: ["tech news", "technology news", "startup news"],
      domains: [
        "theverge.com",
        "techcrunch.com",
        "wired.com",
        "arstechnica.com",
        "engadget.com",
      ],
    },
    {
      id: "finance_news",
      terms: ["finance news", "business news", "market news", "stock news"],
      domains: [
        "bloomberg.com",
        "wsj.com",
        "ft.com",
        "reuters.com",
        "cnbc.com",
      ],
    },
    {
      id: "news",
      terms: [
        "news",
        "headline",
        "headlines",
        "breaking news",
        "world news",
        "latest news",
      ],
      domains: [
        "reuters.com",
        "apnews.com",
        "bbc.com",
        "cnn.com",
        "nytimes.com",
        "theguardian.com",
        "washingtonpost.com",
        "aljazeera.com",
        "npr.org",
        "wsj.com",
        "bloomberg.com",
        "ft.com",
      ],
    },
    {
      id: "recipes",
      terms: [
        "recipe",
        "recipes",
        "cooking",
        "food",
        "dinner ideas",
        "meal prep",
      ],
      domains: [
        "allrecipes.com",
        "seriouseats.com",
        "foodnetwork.com",
        "bbcgoodfood.com",
        "epicurious.com",
        "nytcooking.com",
      ],
    },
    {
      id: "travel",
      terms: ["travel", "hotels", "places", "destinations", "things to do"],
      domains: [
        "tripadvisor.com",
        "booking.com",
        "expedia.com",
        "airbnb.com",
        "lonelyplanet.com",
      ],
    },
  ],
};

/**
 * Normalizes a query string into a lowercase, space-separated form suitable for matching
 * and comparison.
 *
 * @param {string} s
 * @returns {string}
 */
function normalizeQuery(s) {
  return (s || "")
    .toLowerCase()
    .replace(/[^\p{L}\p{N}]+/gu, " ")
    .replace(/\s+/g, " ")
    .trim();
}

/**
 * Returns the matched category domains if searchTerm looks like a general category query.
 * Uses phrase matching on normalized query string.
 *
 * @param {string} searchTerm
 * @param {object} [categoriesJson=CATEGORIES_JSON]
 * @returns {string[]|null}
 */
export function matchDomains(searchTerm, categoriesJson = CATEGORIES_JSON) {
  const q = ` ${normalizeQuery(searchTerm)} `;
  if (!q.trim()) {
    return null;
  }

  for (const cat of categoriesJson.categories) {
    for (const t of cat.terms) {
      // Pad with spaces to enable whole-token phrase matching via includes.
      const tt = ` ${normalizeQuery(t)} `;
      if (tt.trim() && q.includes(tt)) {
        return cat.domains;
      }
    }
  }

  return null;
}

/**
 * Builds a SQL WHERE clause for matching `http`/`https` URLs belonging
 * to the given root domains and their `www` variants.
 *
 * @param {string[]} domains
 * @returns {{ where: string, params: object }}
 */
function buildDomainUrlWhere(domains) {
  const clauses = [];
  const params = {};
  let i = 0;

  for (const raw of domains || []) {
    const d = String(raw).toLowerCase();
    if (!d) {
      continue;
    }

    // - https://domain/...
    // - https://www.domain/...
    params[`d${i}`] = `%://${d}/%`;
    clauses.push(`lower(url) LIKE :d${i++}`);

    params[`d${i}`] = `%://www.${d}/%`;
    clauses.push(`lower(url) LIKE :d${i++}`);
  }

  return {
    where: clauses.length ? `(${clauses.join(" OR ")})` : "0",
    params,
  };
}

/**
 * Domain-filtered moz_places query (time-windowed).
 *
 * @param {object} params
 * @param {object} params.conn
 * @param {string[]} params.domains
 * @param {number|null} params.startTs
 * @param {number|null} params.endTs
 * @param {number} params.historyLimit
 * @param {Function} params.buildHistoryRow
 * @returns {Promise<object[]>}
 */
export async function searchByDomains({
  conn,
  domains,
  startTs,
  endTs,
  historyLimit,
  buildHistoryRow,
}) {
  if (!conn || !Array.isArray(domains) || !domains.length) {
    return [];
  }

  const { where, params } = buildDomainUrlWhere(domains);

  const results = await conn.executeCached(
    `
      SELECT id,
             title,
             url,
             NULL AS distance,
             visit_count,
             frecency,
             last_visit_date,
             preview_image_url
      FROM moz_places
      WHERE frecency <> 0
        AND (:startTs IS NULL OR last_visit_date >= :startTs)
        AND (:endTs IS NULL OR last_visit_date <= :endTs)
        AND ${where}
      ORDER BY last_visit_date DESC, frecency DESC
      LIMIT :limit
    `,
    {
      startTs,
      endTs,
      limit: historyLimit,
      ...params,
    }
  );

  const rows = [];
  for (const row of results) {
    rows.push(await buildHistoryRow(row));
  }
  return rows;
}

/**
 * Merge two result lists, keeping `primary` order, then topping up from `secondary`,
 * while de-duping by url (fallback to id).
 *
 * @param {object[]} primary
 * @param {object[]} secondary
 * @param {number} limit
 * @returns {object[]}
 */
export function mergeDedupe(primary, secondary, limit) {
  const seen = new Set();
  const out = [];

  const keyOf = r => r?.url || r?.id;

  for (const r of primary || []) {
    const k = keyOf(r);
    if (!seen.has(k)) {
      seen.add(k);
      out.push(r);
      if (out.length >= limit) {
        return out;
      }
    }
  }

  for (const r of secondary || []) {
    const k = keyOf(r);
    if (!seen.has(k)) {
      seen.add(k);
      out.push(r);
      if (out.length >= limit) {
        return out;
      }
    }
  }

  return out;
}

export const SearchBrowsingHistoryDomainBoost = Object.freeze({
  matchDomains,
  searchByDomains,
  mergeDedupe,
});