/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /** * This file has classes to combine New Tab feature events (aggregated from a sqlLite table) into an interest model. */ import { FORMAT, AggregateResultKeys, SPECIAL_FEATURE_CLICK, } from "resource://newtab/lib/InferredModel/InferredConstants.sys.mjs"; export const DAYS_TO_MS = 60 * 60 * 24 * 1000; const MAX_INT_32 = 2 ** 32; /** * Divides numerator fields by the denominator. Value is set to 0 if denominator is missing or 0. * Adds 0 value for all situations where there is a denominator but no numerator value. * * @param {{[key: string]: number}} numerator * @param {{[key: string]: number}} denominator * @returns {{[key: string]: number}} */ export function divideDict(numerator, denominator) { const result = {}; Object.keys(numerator).forEach(k => { result[k] = denominator[k] ? numerator[k] / denominator[k] : 0; }); Object.keys(denominator).forEach(k => { if (!(k in result)) { result[k] = 0.0; } }); return result; } /** * Unary encoding with randomized response for differential privacy. * The output must be decoded to back to an integer when aggregating a historgram on a server * * @param {number} x - Integer input (0 <= x < N) * @param {number} N - Number of values (see above) * @param {number} p - Probability of keeping a 1-bit as 1 (after one-hot encoding the output) * @param {number} q - Probability of flipping a 0-bit to 1 * @returns {string} - Bitstring after unary encoding and randomized response */ export function unaryEncodeDiffPrivacy(x, N, p, q) { const bitstring = []; const randomValues = new Uint32Array(N); crypto.getRandomValues(randomValues); for (let i = 0; i < N; i++) { const trueBit = i === x ? 1 : 0; const rand = randomValues[i] / MAX_INT_32; if (trueBit === 1) { bitstring.push(rand <= p ? "1" : "0"); } else { bitstring.push(rand <= q ? "1" : "0"); } } return bitstring.join(""); } /** * Adds value to all a particular key in a dictionary. If the key is missing it sets the value. * * @param {object} dict - The dictionary to modify. * @param {string} key - The key whose value should be added or set. * @param {number} value - The value to add to the key. */ export function dictAdd(dict, key, value) { if (key in dict) { dict[key] += value; } else { dict[key] = value; } } /** * Apply function to all keys in dictionary, returning new dictionary. * * @param {object} obj - The object whose values should be transformed. * @param {Function} fn - The function to apply to each value. * @returns {object} A new object with the transformed values. */ export function dictApply(obj, fn) { return Object.fromEntries( Object.entries(obj).map(([key, value]) => [key, fn(value)]) ); } /** * Class for re-scaling events based on time passed. */ export class DayTimeWeighting { /** * Instantiate class based on a series of day periods in the past. * * @param {int[]} pastDays Series of number of days, indicating days ago intervals in reverse chronological order. * Intervals are added: If the first value is 1 and the second is 5, then the first interval is 0-1 and second interval is between 1 and 6. * @param {number[]} relativeWeight Relative weight of each period. Must be same length as pastDays */ constructor(pastDays, relativeWeight) { this.pastDays = pastDays; this.relativeWeight = relativeWeight; } static fromJSON(json) { return new DayTimeWeighting(json.days, json.relative_weight); } /** * Get a series of interval pairs in the past based on the pastDays. * * @param {number} curTimeMs Base time time in MS. Usually current time. * @returns */ getDateIntervals(curTimeMs) { let curEndTime = curTimeMs; const res = this.pastDays.map(daysAgo => { const start = new Date(curEndTime - daysAgo * DAYS_TO_MS); const end = new Date(curEndTime); curEndTime = start; return { start, end }; }); return res; } /** * Get relative weight of current index. * * @param {int} weightIndex Index * @returns {number} Weight at index, or 0 if index out of range. */ getRelativeWeight(weightIndex) { if (weightIndex >= this.pastDays.length) { return 0; } return this.relativeWeight[weightIndex]; } } /** * Describes the mapping from a set of aggregated events to a single interest feature */ export class InterestFeatures { constructor( name, featureWeights, thresholds = null, diff_p = 0.5, diff_q = 0.5 ) { this.name = name; this.featureWeights = featureWeights; // Thresholds must be in ascending order this.thresholds = thresholds; this.diff_p = diff_p; this.diff_q = diff_q; } static fromJSON(name, json) { return new InterestFeatures( name, json.features, json.thresholds || null, json.diff_p, json.diff_q ); } /** * Quantize a feature value based on the thresholds specified in the class. * * @param {number} inValue Value computed by model for the feature. * @returns Quantized value. A value between 0 and number of thresholds specified (inclusive) */ applyThresholds(inValue, overrideValue = null) { if (!this.thresholds) { return inValue; } if ( Number.isFinite(overrideValue) && overrideValue >= 0 && overrideValue <= this.thresholds.length ) { return overrideValue; } for (let k = 0; k < this.thresholds.length; k++) { if (inValue < this.thresholds[k]) { return k; } } return this.thresholds.length; } /** * Applies Differential Privacy Unary Encoding method, outputting a one-hot encoded vector with randomization. * Accurate historgrams of values can be computed with reasonable accuracy. * If the class has no or 0 p/q values set for differential privacy, then response is original number non-encoded. * * @param {number} inValue Value to randomize * @returns Bitfield as a string, that is the same as the thresholds length + 1 */ applyDifferentialPrivacy(inValue) { if (!this.thresholds || !this.diff_p) { return inValue; } return unaryEncodeDiffPrivacy( inValue, this.thresholds.length + 1, this.diff_p, this.diff_q ); } } /** * Manages relative tile importance */ export class TileImportance { constructor(tileImportanceMappings) { this.mappings = {}; for (const [formatKey, formatId] of Object.entries(FORMAT)) { if (formatKey in tileImportanceMappings) { this.mappings[formatId] = tileImportanceMappings[formatKey]; } } } getRelativeCTRForTile(tileType) { return this.mappings[tileType] || 1; } static fromJSON(json) { return new TileImportance(json); } } /*** * A simple model for aggregating features */ export class FeatureModel { /** * * @param {string} modelId * @param {object} dayTimeWeighting Data for day time weighting class * @param {object} interestVectorModel Data for interest model * @param {object} tileImportance Data for tile importance * @param {boolean} rescale Whether to rescale to max value * @param {boolean} logScale Whether to apply natural log (ln(x+ 1)) before rescaling */ constructor({ modelId, dayTimeWeighting, interestVectorModel, tileImportance, modelType, rescale = false, logScale = false, normalize = false, normalizeL1 = false, privateFeatures = [], }) { this.modelId = modelId; this.tileImportance = tileImportance; this.dayTimeWeighting = dayTimeWeighting; this.interestVectorModel = interestVectorModel; this.rescale = rescale; this.logScale = logScale; this.normalize = normalize; this.normalizeL1 = normalizeL1; this.modelType = modelType; this.privateFeatures = privateFeatures; } static fromJSON(json) { const dayTimeWeighting = DayTimeWeighting.fromJSON(json.day_time_weighting); const interestVectorModel = {}; const tileImportance = TileImportance.fromJSON(json.tile_importance || {}); for (const [name, featureJson] of Object.entries(json.interest_vector)) { interestVectorModel[name] = InterestFeatures.fromJSON(name, featureJson); } return new FeatureModel({ dayTimeWeighting, tileImportance, interestVectorModel, normalize: json.normalize, normalizeL1: json.normalize_l1, rescale: json.rescale, logScale: json.log_scale, clickScale: json.clickScale, modelType: json.model_type, privateFeatures: json.private_features ?? null, }); } getInterestFeaturesSupported() { /** * Returns the number of discrete values for each feature in the model, based on the thresholds specified. * This is used for a debugging UI */ const output = {}; for (const [name, feature] of Object.entries(this.interestVectorModel)) { if (feature.thresholds) { output[name] = { numValues: feature.thresholds.length + 1 }; } } return output; } supportsCoarseInterests() { return Object.values(this.interestVectorModel).every( fm => fm.thresholds && fm.thresholds.length ); } supportsCoarsePrivateInterests() { return Object.values(this.interestVectorModel).every( fm => fm.thresholds && fm.thresholds.length && "diff_p" in fm && "diff_q" in fm ); } /** * Return date intervals for the query */ getDateIntervals(curTimeMs) { return this.dayTimeWeighting.getDateIntervals(curTimeMs); } /** * Computes an interest vector or aggregate based on the model and raw sql inout. * * @param {object} config * @param {Array.>} config.dataForIntervals Raw aggregate output from SQL query. Could be clicks or impressions * @param {{[key: string]: number}} config.indexSchema Map of keys to indices in each sub-array in dataForIntervals * @param {boolean} [config.applyThresholding=false] Whether to apply thresholds * @param {boolean} [config.applyDifferntialPrivacy=false] Whether to apply differential privacy. This will be used for sending to telemetry. * @returns */ computeInterestVector({ dataForIntervals, indexSchema, applyPostProcessing = false, applyThresholding = false, applyDifferentialPrivacy = false, }) { const processedPerTimeInterval = dataForIntervals.map( (intervalData, idx) => { const intervalRawTotal = {}; const perPeriodTotals = {}; intervalData.forEach(aggElement => { const feature = aggElement[indexSchema[AggregateResultKeys.FEATURE]]; let value = aggElement[indexSchema[AggregateResultKeys.VALUE]]; // In the future we could support format here dictAdd(intervalRawTotal, feature, value); }); const weight = this.dayTimeWeighting.getRelativeWeight(idx); // Weight for this time interval Object.values(this.interestVectorModel).forEach(interestFeature => { for (const featureUsed of Object.keys( interestFeature.featureWeights )) { if (featureUsed in intervalRawTotal) { dictAdd( perPeriodTotals, interestFeature.name, intervalRawTotal[featureUsed] * weight * interestFeature.featureWeights[featureUsed] ); } } }); return perPeriodTotals; } ); // Since we are doing linear combinations, it is fine to do the day-time weighting at this step let totalResults = {}; processedPerTimeInterval.forEach(intervalTotals => { for (const key of Object.keys(intervalTotals)) { dictAdd(totalResults, key, intervalTotals[key]); } }); let numClicks = -1; // If clicks is a feature, it's handled as special case if (SPECIAL_FEATURE_CLICK in totalResults) { numClicks = totalResults[SPECIAL_FEATURE_CLICK]; delete totalResults[SPECIAL_FEATURE_CLICK]; } if (applyPostProcessing) { totalResults = this.applyPostProcessing(totalResults); } if (this.clickScale && numClicks > 0) { totalResults = dictApply(totalResults, x => x / numClicks); } const zeroFilledResult = {}; // Set non-click or impression values in a way that preserves original key order Object.values(this.interestVectorModel).forEach(interestFeature => { zeroFilledResult[interestFeature.name] = totalResults[interestFeature.name] || 0; }); totalResults = zeroFilledResult; if (numClicks >= 0) { // Optional totalResults[SPECIAL_FEATURE_CLICK] = numClicks; } if (applyThresholding) { this.applyThresholding(totalResults, applyDifferentialPrivacy); } return totalResults; } /** * Convert float to discrete values, based on threshold parmaters for each feature in the model. * Values are modifified in place on provided dictionary. * * @param {object} valueDict of all values in model * @param {boolean} applyDifferentialPrivacy whether to apply differential privacy as well as thresholding. * @param {object} coarseValueDictionary Optional dictionary of values to use for thresholding instead of the valueDict. This is used for testing purposes to apply thresholding based on a different set of values, such as the original un-noised values when applying thresholding to a differentially private vector. */ applyThresholding( valueDict, applyDifferentialPrivacy = false, coarseValueDictionary = null ) { for (const key of Object.keys(valueDict)) { if (key in this.interestVectorModel) { valueDict[key] = this.interestVectorModel[key].applyThresholds( valueDict[key], coarseValueDictionary ? coarseValueDictionary[key] : null ); if (applyDifferentialPrivacy) { valueDict[key] = this.interestVectorModel[ key ].applyDifferentialPrivacy(valueDict[key]); } } } } applyPostProcessing(valueDict) { let res = valueDict; if (this.logScale) { res = dictApply(valueDict, x => Math.log(x + 1)); } if (this.rescale) { let divisor = Math.max(...Object.values(res)); if (divisor <= 1e-6) { divisor = 1e-6; } res = dictApply(res, x => x / divisor); } if (this.normalizeL1) { let magnitude = Object.values(res).reduce((sum, c) => sum + c, 0); if (magnitude <= 1e-6) { magnitude = 1e-6; } res = dictApply(res, x => x / magnitude); } if (this.normalize) { let magnitude = Math.sqrt( Object.values(res).reduce((sum, c) => sum + c ** 2, 0) ); if (magnitude <= 1e-6) { magnitude = 1e-6; } res = dictApply(res, x => x / magnitude); } return res; } /** * Computes interest vectors based on click-through rate (CTR) by dividing the click dictionary * by the impression dictionary. Applies differential privacy using Laplace noise, and optionally * computes coarse (without noise) and coarse-private interest vectors if supported by the model. * * In all cases model_id is returned. * * @param {object} params - Function parameters. * @param {{[key: string]: number}} params.clickDict - A dictionary of interest keys to click counts. * @param {{[key: string]: number}} params.impressionDict - A dictionary of interest keys to impression counts. * @param {string} [params.model_id="unknown"] - Identifier for the model used in generating the vectors. * @param {boolean} [params.condensePrivateValues=true] - If true, condenses coarse private interest values into an array format. * * @returns {object} result - An object containing one or more of the following: * @returns {object} result.inferredInterest - A dictionary of private inferred interest scores * @returns {object} [result.coarseInferredInterests] - A dictionary of thresholded interest scores (non-private), if supported. * @returns {object} [result.coarsePrivateInferredInterests] - A dictionary of thresholded interest scores with differential privacy, if supported. */ computeCTRInterestVectors({ clicks, impressions, model_id = "unknown", condensePrivateValues = true, timeZoneOffset, debugOverrideCoarseValueDictionary = null, }) { let inferredInterests = divideDict(clicks, impressions); const originalInterestValues = { ...inferredInterests }; const resultObject = { inferredInterests: { ...inferredInterests, model_id }, }; if (this.supportsCoarseInterests()) { // always true const coarseValues = this.applyPostProcessing({ ...originalInterestValues, }); // Time zone offset special case only in coarse / private interests if (timeZoneOffset && "timeZoneOffset" in this.interestVectorModel) { coarseValues.timeZoneOffset = timeZoneOffset; } this.applyThresholding( coarseValues, false, debugOverrideCoarseValueDictionary ); resultObject.coarseInferredInterests = { ...coarseValues, model_id }; } if (this.supportsCoarsePrivateInterests()) { let coarsePrivateValues = { ...originalInterestValues }; if (this.privateFeatures) { // filter here for private features coarsePrivateValues = Object.fromEntries( Object.entries(coarsePrivateValues).filter(([key]) => this.privateFeatures.includes(key) ) ); } coarsePrivateValues = this.applyPostProcessing(coarsePrivateValues); if ( timeZoneOffset && (!this.privateFeatures || this.privateFeatures.includes("timeZoneOffset")) ) { coarsePrivateValues.timeZoneOffset = timeZoneOffset; } this.applyThresholding( coarsePrivateValues, true, debugOverrideCoarseValueDictionary ); if (condensePrivateValues) { resultObject.coarsePrivateInferredInterests = { // Key order preserved in Gecko values: Object.values(coarsePrivateValues), model_id, }; } else { resultObject.coarsePrivateInferredInterests = { ...coarsePrivateValues, model_id, }; } } return resultObject; } /** * Computes various types of interest vectors from user interaction data across intervals. * Returns standard inferred interests (with Laplace noise), and optionally returns * coarse-grained and private-coarse versions depending on model support. * * @param {object} params - The function parameters. * @param {Array} params.dataForIntervals - An array of data points grouped by time intervals (e.g., clicks, impressions). * @param {object} params.indexSchema - Schema that defines how interest indices should be computed. * @param {string} [params.model_id="unknown"] - Identifier for the model used to produce these vectors. * @param {boolean} [params.condensePrivateValues=true] - If true, condenses coarse private interest values into an array format. * * @returns {object} result - An object containing the computed interest vectors. * @returns {object} result.inferredInterests - A dictionary of private inferred interest values, with `model_id`. * @returns {object} [result.coarseInferredInterests] - Coarse thresholded (non-private) interest vector, if supported. * @returns {object | {values: Array, model_id: string}} [result.coarsePrivateInferredInterests] - Coarse and differentially private interests. * If `condensePrivateValues` is true, returned as an object with a `values` array; otherwise, as a dictionary. */ computeInterestVectors({ dataForIntervals, indexSchema, model_id = "unknown", condensePrivateValues = true, }) { const result = {}; let inferredInterests; let coarseInferredInterests; let coarsePrivateInferredInterests; inferredInterests = this.computeInterestVector({ dataForIntervals, indexSchema, }); result.inferredInterests = { ...inferredInterests }; if (this.supportsCoarseInterests()) { coarseInferredInterests = this.computeInterestVector({ dataForIntervals, indexSchema, applyThresholding: true, }); if (coarseInferredInterests) { result.coarseInferredInterests = { ...coarseInferredInterests, model_id, }; } } if (this.supportsCoarsePrivateInterests()) { coarsePrivateInferredInterests = this.computeInterestVector({ dataForIntervals, indexSchema, applyThresholding: true, applyDifferentialPrivacy: true, }); if (coarsePrivateInferredInterests) { if (condensePrivateValues) { result.coarsePrivateInferredInterests = { // Key order preserved in Gecko values: Object.values(coarsePrivateInferredInterests), model_id, }; } else { result.coarsePrivateInferredInterests = { ...coarsePrivateInferredInterests, model_id, }; } } } return result; } }