/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ "use strict"; const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm"); ChromeUtils.defineModuleGetter( this, "PlacesUtils", "resource://gre/modules/PlacesUtils.jsm" ); const DEFAULT_TIME_SEGMENTS = [ { id: "hour", startTime: 3600, endTime: 0, weightPosition: 1 }, { id: "day", startTime: 86400, endTime: 3600, weightPosition: 0.75 }, { id: "week", startTime: 604800, endTime: 86400, weightPosition: 0.5 }, { id: "weekPlus", startTime: 0, endTime: 604800, weightPosition: 0.25 }, { id: "alltime", startTime: 0, endTime: 0, weightPosition: 0.25 }, ]; const DEFAULT_PARAMETER_SETS = { "linear-frequency": { recencyFactor: 0.4, frequencyFactor: 0.5, combinedDomainFactor: 0.5, perfectFrequencyVisits: 10, perfectCombinedDomainScore: 2, multiDomainBoost: 0.1, itemScoreFactor: 0, }, }; const DEFAULT_MAX_HISTORY_QUERY_RESULTS = 1000; function merge(...args) { return Object.assign.apply(this, args); } /** * Provides functionality to personalize content recommendations by calculating * user domain affinity scores. These scores are used to calculate relevance * scores for items/recs/stories that have domain affinities. * * The algorithm works as follows: * * - The recommendation endpoint returns a settings object containing * timeSegments and parametersets. * * - For every time segment we calculate the corresponding domain visit counts, * yielding result objects of the following structure: {"mozilla.org": 12, * "mozilla.com": 34} (see UserDomainAffinityProvider#queryVisits) * * - These visit counts are transformed to domain affinity scores for all * provided parameter sets: {"mozilla.org": {"paramSet1": 0.8, * "paramSet2": 0.9}, "mozilla.org": {"paramSet1": 1, "paramSet2": 0.9}} * (see UserDomainAffinityProvider#calculateScoresForParameterSets) * * - The parameter sets provide factors for weighting which allows for * flexible targeting. The functionality to calculate final scores can * be seen in UserDomainAffinityProvider#calculateScores * * - The user domain affinity scores are summed up across all time segments * see UserDomainAffinityProvider#calculateAllUserDomainAffinityScores * * - An item's domain affinities are matched to the user's domain affinity * scores by calculating an item relevance score * (see UserDomainAffinityProvider#calculateItemRelevanceScore) * * - The item relevance scores are used to sort items (see TopStoriesFeed for * more details) * * - The data structure was chosen to allow for fast cache lookups during * relevance score calculation. While user domain affinities are calculated * infrequently (i.e. only once a day), the item relevance score (potentially) * needs to be calculated every time the feed updates. Therefore allowing cache * lookups of scores[domain][parameterSet] is beneficial */ this.UserDomainAffinityProvider = class UserDomainAffinityProvider { constructor( timeSegments = DEFAULT_TIME_SEGMENTS, parameterSets = DEFAULT_PARAMETER_SETS, maxHistoryQueryResults = DEFAULT_MAX_HISTORY_QUERY_RESULTS, version, scores ) { this.timeSegments = timeSegments; this.maxHistoryQueryResults = maxHistoryQueryResults; this.version = version; if (scores) { this.parameterSets = parameterSets; this.scores = scores; } else { this.parameterSets = this.prepareParameterSets(parameterSets); this.scores = this.calculateAllUserDomainAffinityScores(); } } /** * Adds dynamic parameters to the given parameter sets that need to be * computed based on time segments. * * @param ps The parameter sets * @return Updated parameter sets with additional fields (i.e. timeSegmentWeights) */ prepareParameterSets(ps) { return ( Object.keys(ps) // Add timeSegmentWeight fields to param sets e.g. timeSegmentWeights: {"hour": 1, "day": 0.8915, ...} .map(k => ({ [k]: merge(ps[k], { timeSegmentWeights: this.calculateTimeSegmentWeights( ps[k].recencyFactor ), }), })) .reduce((acc, cur) => merge(acc, cur)) ); } /** * Calculates a time segment weight based on the provided recencyFactor. * * @param recencyFactor The recency factor indicating how to weigh recency * @return An object containing time segment weights: {"hour": 0.987, "day": 1} */ calculateTimeSegmentWeights(recencyFactor) { return this.timeSegments.reduce( (acc, cur) => merge(acc, { [cur.id]: this.calculateScore(cur.weightPosition, 1, recencyFactor), }), {} ); } /** * Calculates user domain affinity scores based on browsing history and the * available times segments and parameter sets. */ calculateAllUserDomainAffinityScores() { return ( this.timeSegments // Calculate parameter set specific domain scores for each time segment // => [{"a.com": {"ps1": 12, "ps2": 34}, "b.com": {"ps1": 56, "ps2": 78}}, ...] .map(ts => this.calculateUserDomainAffinityScores(ts)) // Keep format, but reduce to single object, with combined scores across all time segments // => "{a.com":{"ps1":2,"ps2":2}, "b.com":{"ps1":3,"ps2":3}}"" .reduce((acc, cur) => this._combineScores(acc, cur)) ); } /** * Calculates the user domain affinity scores for the given time segment. * * @param ts The time segment * @return The parameter specific scores for all domains with visits in * this time segment: {"a.com": {"ps1": 12, "ps2": 34}, "b.com" ...} */ calculateUserDomainAffinityScores(ts) { // Returns domains and visit counts for this time segment: {"a.com": 1, "b.com": 2} let visits = this.queryVisits(ts); return Object.keys(visits).reduce( (acc, d) => merge(acc, { [d]: this.calculateScoresForParameterSets(ts, visits[d]), }), {} ); } /** * Calculates the scores for all parameter sets for the given time segment * and domain visit count. * * @param ts The time segment * @param vc The domain visit count in the given time segment * @return The parameter specific scores for the visit count in * this time segment: {"ps1": 12, "ps2": 34} */ calculateScoresForParameterSets(ts, vc) { return Object.keys(this.parameterSets).reduce( (acc, ps) => merge(acc, { [ps]: this.calculateScoreForParameterSet( ts, vc, this.parameterSets[ps] ), }), {} ); } /** * Calculates the final affinity score in the given time segment for the given parameter set * * @param timeSegment The time segment * @param visitCount The domain visit count in the given time segment * @param parameterSet The parameter set to use for scoring * @return The final score */ calculateScoreForParameterSet(timeSegment, visitCount, parameterSet) { return this.calculateScore( visitCount * parameterSet.timeSegmentWeights[timeSegment.id], parameterSet.perfectFrequencyVisits, parameterSet.frequencyFactor ); } /** * Keeps the same format, but reduces the two objects to a single object, with * combined scores across all time segments => {a.com":{"ps1":2,"ps2":2}, * "b.com":{"ps1":3,"ps2":3}} */ _combineScores(a, b) { // Merge both score objects so we get a combined object holding all domains. // This is so we can combine them without missing domains that are in a and not in b and vice versa. const c = merge({}, a, b); return Object.keys(c).reduce( (acc, d) => merge(acc, this._combine(a, b, c, d)), {} ); } _combine(a, b, c, d) { return ( Object.keys(c[d]) // Summing up the parameter set specific scores of each domain .map(ps => ({ [d]: { [ps]: Math.min( 1, ((a[d] && a[d][ps]) || 0) + ((b[d] && b[d][ps]) || 0) ), }, })) // Reducing from an array of objects with a single parameter set to a single object // [{"a.com":{"ps1":11}}, {"a.com: {"ps2":12}}] => {"a.com":{"ps1":11,"ps2":12}} .reduce((acc, cur) => ({ [d]: merge(acc[d], cur[d]) })) ); } /** * Calculates a value on the curve described by the provided parameters. The curve we're using is * (a^(b*x) - 1) / (a^b - 1): https://www.desmos.com/calculator/maqhpttupp * * @param {number} score A value between 0 and maxScore, representing x. * @param {number} maxScore Highest possible score. * @param {number} factor The slope describing the curve to get to maxScore. A low slope value * [0, 0.5] results in a log-shaped curve, a high slope [0.5, 1] results in a exp-shaped curve, * a slope of exactly 0.5 is linear. * @param {number} ease Adjusts how much bend is in the curve i.e. how dramatic the maximum * effect of the slope can be. This represents b in the formula above. * @return {number} the final score */ calculateScore(score, maxScore, factor, ease = 2) { let a = 0; let x = Math.max(0, score / maxScore); if (x >= 1) { return 1; } if (factor === 0.5) { return x; } if (factor < 0.5) { // We want a log-shaped curve so we scale "a" between 0 and .99 a = (factor / 0.5) * 0.49; } else if (factor > 0.5) { // We want an exp-shaped curve so we scale "a" between 1.01 and 10 a = 1 + ((factor - 0.5) / 0.5) * 9; } return (Math.pow(a, ease * x) - 1) / (Math.pow(a, ease) - 1); } /** * Queries the visit counts in the given time segment. * * @param ts the time segment * @return the visit count object: {"a.com": 1, "b.com": 2} */ queryVisits(ts) { const visitCounts = {}; const query = PlacesUtils.history.getNewQuery(); if (!query) { return visitCounts; } const wwwRegEx = /^www\./; query.beginTimeReference = query.TIME_RELATIVE_NOW; query.beginTime = ts.startTime && ts.startTime !== 0 ? -(ts.startTime * 1000 * 1000) : -(Date.now() * 1000); query.endTimeReference = query.TIME_RELATIVE_NOW; query.endTime = ts.endTime && ts.endTime !== 0 ? -(ts.endTime * 1000 * 1000) : 0; const options = PlacesUtils.history.getNewQueryOptions(); options.sortingMode = options.SORT_BY_VISITCOUNT_DESCENDING; options.maxResults = this.maxHistoryQueryResults; const { root } = PlacesUtils.history.executeQuery(query, options); root.containerOpen = true; for (let i = 0; i < root.childCount; i++) { let node = root.getChild(i); let host = Services.io.newURI(node.uri).host.replace(wwwRegEx, ""); if (!visitCounts[host]) { visitCounts[host] = 0; } visitCounts[host] += node.accessCount; } root.containerOpen = false; return visitCounts; } /** * Calculates an item's relevance score. * * @param item the item (story), must contain domain affinities, otherwise a * score of 1 is returned. * @return the calculated item's score or 1 if item has no domain_affinities * or references an unknown parameter set. */ calculateItemRelevanceScore(item) { const params = this.parameterSets[item.parameter_set]; if (!item.domain_affinities || !params) { return item.item_score; } const scores = Object.keys(item.domain_affinities).reduce( (acc, d) => { let userDomainAffinityScore = this.scores[d] ? this.scores[d][item.parameter_set] : false; if (userDomainAffinityScore) { acc.combinedDomainScore += userDomainAffinityScore * item.domain_affinities[d]; acc.matchingDomainsCount++; } return acc; }, { combinedDomainScore: 0, matchingDomainsCount: 0 } ); // Boost the score as configured in the provided parameter set const boostedCombinedDomainScore = scores.combinedDomainScore * Math.pow(params.multiDomainBoost + 1, scores.matchingDomainsCount); // Calculate what the score would be if the item score is ignored const normalizedCombinedDomainScore = this.calculateScore( boostedCombinedDomainScore, params.perfectCombinedDomainScore, params.combinedDomainFactor ); // Calculate the final relevance score using the itemScoreFactor. The itemScoreFactor // allows weighting the item score in relation to the normalizedCombinedDomainScore: // An itemScoreFactor of 1 results in the item score and ignores the combined domain score // An itemScoreFactor of 0.5 results in the the average of item score and combined domain score // An itemScoreFactor of 0 results in the combined domain score and ignores the item score return ( params.itemScoreFactor * (item.item_score - normalizedCombinedDomainScore) + normalizedCombinedDomainScore ); } /** * Returns an object holding the settings and affinity scores of this provider instance. */ getAffinities() { return { timeSegments: this.timeSegments, parameterSets: this.parameterSets, maxHistoryQueryResults: this.maxHistoryQueryResults, version: this.version, scores: this.scores, }; } }; const EXPORTED_SYMBOLS = ["UserDomainAffinityProvider"];