diff options
Diffstat (limited to 'browser/components/newtab/lib/UserDomainAffinityProvider.jsm')
-rw-r--r-- | browser/components/newtab/lib/UserDomainAffinityProvider.jsm | 390 |
1 files changed, 390 insertions, 0 deletions
diff --git a/browser/components/newtab/lib/UserDomainAffinityProvider.jsm b/browser/components/newtab/lib/UserDomainAffinityProvider.jsm new file mode 100644 index 0000000000..f165aad1e5 --- /dev/null +++ b/browser/components/newtab/lib/UserDomainAffinityProvider.jsm @@ -0,0 +1,390 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +"use strict"; + +const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm"); + +ChromeUtils.defineModuleGetter( + this, + "PlacesUtils", + "resource://gre/modules/PlacesUtils.jsm" +); + +const DEFAULT_TIME_SEGMENTS = [ + { id: "hour", startTime: 3600, endTime: 0, weightPosition: 1 }, + { id: "day", startTime: 86400, endTime: 3600, weightPosition: 0.75 }, + { id: "week", startTime: 604800, endTime: 86400, weightPosition: 0.5 }, + { id: "weekPlus", startTime: 0, endTime: 604800, weightPosition: 0.25 }, + { id: "alltime", startTime: 0, endTime: 0, weightPosition: 0.25 }, +]; + +const DEFAULT_PARAMETER_SETS = { + "linear-frequency": { + recencyFactor: 0.4, + frequencyFactor: 0.5, + combinedDomainFactor: 0.5, + perfectFrequencyVisits: 10, + perfectCombinedDomainScore: 2, + multiDomainBoost: 0.1, + itemScoreFactor: 0, + }, +}; + +const DEFAULT_MAX_HISTORY_QUERY_RESULTS = 1000; + +function merge(...args) { + return Object.assign.apply(this, args); +} + +/** + * Provides functionality to personalize content recommendations by calculating + * user domain affinity scores. These scores are used to calculate relevance + * scores for items/recs/stories that have domain affinities. + * + * The algorithm works as follows: + * + * - The recommendation endpoint returns a settings object containing + * timeSegments and parametersets. + * + * - For every time segment we calculate the corresponding domain visit counts, + * yielding result objects of the following structure: {"mozilla.org": 12, + * "mozilla.com": 34} (see UserDomainAffinityProvider#queryVisits) + * + * - These visit counts are transformed to domain affinity scores for all + * provided parameter sets: {"mozilla.org": {"paramSet1": 0.8, + * "paramSet2": 0.9}, "mozilla.org": {"paramSet1": 1, "paramSet2": 0.9}} + * (see UserDomainAffinityProvider#calculateScoresForParameterSets) + * + * - The parameter sets provide factors for weighting which allows for + * flexible targeting. The functionality to calculate final scores can + * be seen in UserDomainAffinityProvider#calculateScores + * + * - The user domain affinity scores are summed up across all time segments + * see UserDomainAffinityProvider#calculateAllUserDomainAffinityScores + * + * - An item's domain affinities are matched to the user's domain affinity + * scores by calculating an item relevance score + * (see UserDomainAffinityProvider#calculateItemRelevanceScore) + * + * - The item relevance scores are used to sort items (see TopStoriesFeed for + * more details) + * + * - The data structure was chosen to allow for fast cache lookups during + * relevance score calculation. While user domain affinities are calculated + * infrequently (i.e. only once a day), the item relevance score (potentially) + * needs to be calculated every time the feed updates. Therefore allowing cache + * lookups of scores[domain][parameterSet] is beneficial + */ +this.UserDomainAffinityProvider = class UserDomainAffinityProvider { + constructor( + timeSegments = DEFAULT_TIME_SEGMENTS, + parameterSets = DEFAULT_PARAMETER_SETS, + maxHistoryQueryResults = DEFAULT_MAX_HISTORY_QUERY_RESULTS, + version, + scores + ) { + this.timeSegments = timeSegments; + this.maxHistoryQueryResults = maxHistoryQueryResults; + this.version = version; + if (scores) { + this.parameterSets = parameterSets; + this.scores = scores; + } else { + this.parameterSets = this.prepareParameterSets(parameterSets); + this.scores = this.calculateAllUserDomainAffinityScores(); + } + } + + /** + * Adds dynamic parameters to the given parameter sets that need to be + * computed based on time segments. + * + * @param ps The parameter sets + * @return Updated parameter sets with additional fields (i.e. timeSegmentWeights) + */ + prepareParameterSets(ps) { + return ( + Object.keys(ps) + // Add timeSegmentWeight fields to param sets e.g. timeSegmentWeights: {"hour": 1, "day": 0.8915, ...} + .map(k => ({ + [k]: merge(ps[k], { + timeSegmentWeights: this.calculateTimeSegmentWeights( + ps[k].recencyFactor + ), + }), + })) + .reduce((acc, cur) => merge(acc, cur)) + ); + } + + /** + * Calculates a time segment weight based on the provided recencyFactor. + * + * @param recencyFactor The recency factor indicating how to weigh recency + * @return An object containing time segment weights: {"hour": 0.987, "day": 1} + */ + calculateTimeSegmentWeights(recencyFactor) { + return this.timeSegments.reduce( + (acc, cur) => + merge(acc, { + [cur.id]: this.calculateScore(cur.weightPosition, 1, recencyFactor), + }), + {} + ); + } + + /** + * Calculates user domain affinity scores based on browsing history and the + * available times segments and parameter sets. + */ + calculateAllUserDomainAffinityScores() { + return ( + this.timeSegments + // Calculate parameter set specific domain scores for each time segment + // => [{"a.com": {"ps1": 12, "ps2": 34}, "b.com": {"ps1": 56, "ps2": 78}}, ...] + .map(ts => this.calculateUserDomainAffinityScores(ts)) + // Keep format, but reduce to single object, with combined scores across all time segments + // => "{a.com":{"ps1":2,"ps2":2}, "b.com":{"ps1":3,"ps2":3}}"" + .reduce((acc, cur) => this._combineScores(acc, cur)) + ); + } + + /** + * Calculates the user domain affinity scores for the given time segment. + * + * @param ts The time segment + * @return The parameter specific scores for all domains with visits in + * this time segment: {"a.com": {"ps1": 12, "ps2": 34}, "b.com" ...} + */ + calculateUserDomainAffinityScores(ts) { + // Returns domains and visit counts for this time segment: {"a.com": 1, "b.com": 2} + let visits = this.queryVisits(ts); + + return Object.keys(visits).reduce( + (acc, d) => + merge(acc, { + [d]: this.calculateScoresForParameterSets(ts, visits[d]), + }), + {} + ); + } + + /** + * Calculates the scores for all parameter sets for the given time segment + * and domain visit count. + * + * @param ts The time segment + * @param vc The domain visit count in the given time segment + * @return The parameter specific scores for the visit count in + * this time segment: {"ps1": 12, "ps2": 34} + */ + calculateScoresForParameterSets(ts, vc) { + return Object.keys(this.parameterSets).reduce( + (acc, ps) => + merge(acc, { + [ps]: this.calculateScoreForParameterSet( + ts, + vc, + this.parameterSets[ps] + ), + }), + {} + ); + } + + /** + * Calculates the final affinity score in the given time segment for the given parameter set + * + * @param timeSegment The time segment + * @param visitCount The domain visit count in the given time segment + * @param parameterSet The parameter set to use for scoring + * @return The final score + */ + calculateScoreForParameterSet(timeSegment, visitCount, parameterSet) { + return this.calculateScore( + visitCount * parameterSet.timeSegmentWeights[timeSegment.id], + parameterSet.perfectFrequencyVisits, + parameterSet.frequencyFactor + ); + } + + /** + * Keeps the same format, but reduces the two objects to a single object, with + * combined scores across all time segments => {a.com":{"ps1":2,"ps2":2}, + * "b.com":{"ps1":3,"ps2":3}} + */ + _combineScores(a, b) { + // Merge both score objects so we get a combined object holding all domains. + // This is so we can combine them without missing domains that are in a and not in b and vice versa. + const c = merge({}, a, b); + return Object.keys(c).reduce( + (acc, d) => merge(acc, this._combine(a, b, c, d)), + {} + ); + } + + _combine(a, b, c, d) { + return ( + Object.keys(c[d]) + // Summing up the parameter set specific scores of each domain + .map(ps => ({ + [d]: { + [ps]: Math.min( + 1, + ((a[d] && a[d][ps]) || 0) + ((b[d] && b[d][ps]) || 0) + ), + }, + })) + // Reducing from an array of objects with a single parameter set to a single object + // [{"a.com":{"ps1":11}}, {"a.com: {"ps2":12}}] => {"a.com":{"ps1":11,"ps2":12}} + .reduce((acc, cur) => ({ [d]: merge(acc[d], cur[d]) })) + ); + } + + /** + * Calculates a value on the curve described by the provided parameters. The curve we're using is + * (a^(b*x) - 1) / (a^b - 1): https://www.desmos.com/calculator/maqhpttupp + * + * @param {number} score A value between 0 and maxScore, representing x. + * @param {number} maxScore Highest possible score. + * @param {number} factor The slope describing the curve to get to maxScore. A low slope value + * [0, 0.5] results in a log-shaped curve, a high slope [0.5, 1] results in a exp-shaped curve, + * a slope of exactly 0.5 is linear. + * @param {number} ease Adjusts how much bend is in the curve i.e. how dramatic the maximum + * effect of the slope can be. This represents b in the formula above. + * @return {number} the final score + */ + calculateScore(score, maxScore, factor, ease = 2) { + let a = 0; + let x = Math.max(0, score / maxScore); + + if (x >= 1) { + return 1; + } + + if (factor === 0.5) { + return x; + } + + if (factor < 0.5) { + // We want a log-shaped curve so we scale "a" between 0 and .99 + a = (factor / 0.5) * 0.49; + } else if (factor > 0.5) { + // We want an exp-shaped curve so we scale "a" between 1.01 and 10 + a = 1 + ((factor - 0.5) / 0.5) * 9; + } + + return (Math.pow(a, ease * x) - 1) / (Math.pow(a, ease) - 1); + } + + /** + * Queries the visit counts in the given time segment. + * + * @param ts the time segment + * @return the visit count object: {"a.com": 1, "b.com": 2} + */ + queryVisits(ts) { + const visitCounts = {}; + const query = PlacesUtils.history.getNewQuery(); + if (!query) { + return visitCounts; + } + const wwwRegEx = /^www\./; + + query.beginTimeReference = query.TIME_RELATIVE_NOW; + query.beginTime = + ts.startTime && ts.startTime !== 0 + ? -(ts.startTime * 1000 * 1000) + : -(Date.now() * 1000); + + query.endTimeReference = query.TIME_RELATIVE_NOW; + query.endTime = + ts.endTime && ts.endTime !== 0 ? -(ts.endTime * 1000 * 1000) : 0; + + const options = PlacesUtils.history.getNewQueryOptions(); + options.sortingMode = options.SORT_BY_VISITCOUNT_DESCENDING; + options.maxResults = this.maxHistoryQueryResults; + + const { root } = PlacesUtils.history.executeQuery(query, options); + root.containerOpen = true; + for (let i = 0; i < root.childCount; i++) { + let node = root.getChild(i); + let host = Services.io.newURI(node.uri).host.replace(wwwRegEx, ""); + if (!visitCounts[host]) { + visitCounts[host] = 0; + } + visitCounts[host] += node.accessCount; + } + root.containerOpen = false; + return visitCounts; + } + + /** + * Calculates an item's relevance score. + * + * @param item the item (story), must contain domain affinities, otherwise a + * score of 1 is returned. + * @return the calculated item's score or 1 if item has no domain_affinities + * or references an unknown parameter set. + */ + calculateItemRelevanceScore(item) { + const params = this.parameterSets[item.parameter_set]; + if (!item.domain_affinities || !params) { + return item.item_score; + } + + const scores = Object.keys(item.domain_affinities).reduce( + (acc, d) => { + let userDomainAffinityScore = this.scores[d] + ? this.scores[d][item.parameter_set] + : false; + if (userDomainAffinityScore) { + acc.combinedDomainScore += + userDomainAffinityScore * item.domain_affinities[d]; + acc.matchingDomainsCount++; + } + return acc; + }, + { combinedDomainScore: 0, matchingDomainsCount: 0 } + ); + + // Boost the score as configured in the provided parameter set + const boostedCombinedDomainScore = + scores.combinedDomainScore * + Math.pow(params.multiDomainBoost + 1, scores.matchingDomainsCount); + + // Calculate what the score would be if the item score is ignored + const normalizedCombinedDomainScore = this.calculateScore( + boostedCombinedDomainScore, + params.perfectCombinedDomainScore, + params.combinedDomainFactor + ); + + // Calculate the final relevance score using the itemScoreFactor. The itemScoreFactor + // allows weighting the item score in relation to the normalizedCombinedDomainScore: + // An itemScoreFactor of 1 results in the item score and ignores the combined domain score + // An itemScoreFactor of 0.5 results in the the average of item score and combined domain score + // An itemScoreFactor of 0 results in the combined domain score and ignores the item score + return ( + params.itemScoreFactor * + (item.item_score - normalizedCombinedDomainScore) + + normalizedCombinedDomainScore + ); + } + + /** + * Returns an object holding the settings and affinity scores of this provider instance. + */ + getAffinities() { + return { + timeSegments: this.timeSegments, + parameterSets: this.parameterSets, + maxHistoryQueryResults: this.maxHistoryQueryResults, + version: this.version, + scores: this.scores, + }; + } +}; + +const EXPORTED_SYMBOLS = ["UserDomainAffinityProvider"]; |