summaryrefslogtreecommitdiffstats
path: root/browser/components/newtab/lib/UserDomainAffinityProvider.jsm
diff options
context:
space:
mode:
Diffstat (limited to 'browser/components/newtab/lib/UserDomainAffinityProvider.jsm')
-rw-r--r--browser/components/newtab/lib/UserDomainAffinityProvider.jsm390
1 files changed, 390 insertions, 0 deletions
diff --git a/browser/components/newtab/lib/UserDomainAffinityProvider.jsm b/browser/components/newtab/lib/UserDomainAffinityProvider.jsm
new file mode 100644
index 0000000000..f165aad1e5
--- /dev/null
+++ b/browser/components/newtab/lib/UserDomainAffinityProvider.jsm
@@ -0,0 +1,390 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+"use strict";
+
+const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm");
+
+ChromeUtils.defineModuleGetter(
+ this,
+ "PlacesUtils",
+ "resource://gre/modules/PlacesUtils.jsm"
+);
+
+const DEFAULT_TIME_SEGMENTS = [
+ { id: "hour", startTime: 3600, endTime: 0, weightPosition: 1 },
+ { id: "day", startTime: 86400, endTime: 3600, weightPosition: 0.75 },
+ { id: "week", startTime: 604800, endTime: 86400, weightPosition: 0.5 },
+ { id: "weekPlus", startTime: 0, endTime: 604800, weightPosition: 0.25 },
+ { id: "alltime", startTime: 0, endTime: 0, weightPosition: 0.25 },
+];
+
+const DEFAULT_PARAMETER_SETS = {
+ "linear-frequency": {
+ recencyFactor: 0.4,
+ frequencyFactor: 0.5,
+ combinedDomainFactor: 0.5,
+ perfectFrequencyVisits: 10,
+ perfectCombinedDomainScore: 2,
+ multiDomainBoost: 0.1,
+ itemScoreFactor: 0,
+ },
+};
+
+const DEFAULT_MAX_HISTORY_QUERY_RESULTS = 1000;
+
+function merge(...args) {
+ return Object.assign.apply(this, args);
+}
+
+/**
+ * Provides functionality to personalize content recommendations by calculating
+ * user domain affinity scores. These scores are used to calculate relevance
+ * scores for items/recs/stories that have domain affinities.
+ *
+ * The algorithm works as follows:
+ *
+ * - The recommendation endpoint returns a settings object containing
+ * timeSegments and parametersets.
+ *
+ * - For every time segment we calculate the corresponding domain visit counts,
+ * yielding result objects of the following structure: {"mozilla.org": 12,
+ * "mozilla.com": 34} (see UserDomainAffinityProvider#queryVisits)
+ *
+ * - These visit counts are transformed to domain affinity scores for all
+ * provided parameter sets: {"mozilla.org": {"paramSet1": 0.8,
+ * "paramSet2": 0.9}, "mozilla.org": {"paramSet1": 1, "paramSet2": 0.9}}
+ * (see UserDomainAffinityProvider#calculateScoresForParameterSets)
+ *
+ * - The parameter sets provide factors for weighting which allows for
+ * flexible targeting. The functionality to calculate final scores can
+ * be seen in UserDomainAffinityProvider#calculateScores
+ *
+ * - The user domain affinity scores are summed up across all time segments
+ * see UserDomainAffinityProvider#calculateAllUserDomainAffinityScores
+ *
+ * - An item's domain affinities are matched to the user's domain affinity
+ * scores by calculating an item relevance score
+ * (see UserDomainAffinityProvider#calculateItemRelevanceScore)
+ *
+ * - The item relevance scores are used to sort items (see TopStoriesFeed for
+ * more details)
+ *
+ * - The data structure was chosen to allow for fast cache lookups during
+ * relevance score calculation. While user domain affinities are calculated
+ * infrequently (i.e. only once a day), the item relevance score (potentially)
+ * needs to be calculated every time the feed updates. Therefore allowing cache
+ * lookups of scores[domain][parameterSet] is beneficial
+ */
+this.UserDomainAffinityProvider = class UserDomainAffinityProvider {
+ constructor(
+ timeSegments = DEFAULT_TIME_SEGMENTS,
+ parameterSets = DEFAULT_PARAMETER_SETS,
+ maxHistoryQueryResults = DEFAULT_MAX_HISTORY_QUERY_RESULTS,
+ version,
+ scores
+ ) {
+ this.timeSegments = timeSegments;
+ this.maxHistoryQueryResults = maxHistoryQueryResults;
+ this.version = version;
+ if (scores) {
+ this.parameterSets = parameterSets;
+ this.scores = scores;
+ } else {
+ this.parameterSets = this.prepareParameterSets(parameterSets);
+ this.scores = this.calculateAllUserDomainAffinityScores();
+ }
+ }
+
+ /**
+ * Adds dynamic parameters to the given parameter sets that need to be
+ * computed based on time segments.
+ *
+ * @param ps The parameter sets
+ * @return Updated parameter sets with additional fields (i.e. timeSegmentWeights)
+ */
+ prepareParameterSets(ps) {
+ return (
+ Object.keys(ps)
+ // Add timeSegmentWeight fields to param sets e.g. timeSegmentWeights: {"hour": 1, "day": 0.8915, ...}
+ .map(k => ({
+ [k]: merge(ps[k], {
+ timeSegmentWeights: this.calculateTimeSegmentWeights(
+ ps[k].recencyFactor
+ ),
+ }),
+ }))
+ .reduce((acc, cur) => merge(acc, cur))
+ );
+ }
+
+ /**
+ * Calculates a time segment weight based on the provided recencyFactor.
+ *
+ * @param recencyFactor The recency factor indicating how to weigh recency
+ * @return An object containing time segment weights: {"hour": 0.987, "day": 1}
+ */
+ calculateTimeSegmentWeights(recencyFactor) {
+ return this.timeSegments.reduce(
+ (acc, cur) =>
+ merge(acc, {
+ [cur.id]: this.calculateScore(cur.weightPosition, 1, recencyFactor),
+ }),
+ {}
+ );
+ }
+
+ /**
+ * Calculates user domain affinity scores based on browsing history and the
+ * available times segments and parameter sets.
+ */
+ calculateAllUserDomainAffinityScores() {
+ return (
+ this.timeSegments
+ // Calculate parameter set specific domain scores for each time segment
+ // => [{"a.com": {"ps1": 12, "ps2": 34}, "b.com": {"ps1": 56, "ps2": 78}}, ...]
+ .map(ts => this.calculateUserDomainAffinityScores(ts))
+ // Keep format, but reduce to single object, with combined scores across all time segments
+ // => "{a.com":{"ps1":2,"ps2":2}, "b.com":{"ps1":3,"ps2":3}}""
+ .reduce((acc, cur) => this._combineScores(acc, cur))
+ );
+ }
+
+ /**
+ * Calculates the user domain affinity scores for the given time segment.
+ *
+ * @param ts The time segment
+ * @return The parameter specific scores for all domains with visits in
+ * this time segment: {"a.com": {"ps1": 12, "ps2": 34}, "b.com" ...}
+ */
+ calculateUserDomainAffinityScores(ts) {
+ // Returns domains and visit counts for this time segment: {"a.com": 1, "b.com": 2}
+ let visits = this.queryVisits(ts);
+
+ return Object.keys(visits).reduce(
+ (acc, d) =>
+ merge(acc, {
+ [d]: this.calculateScoresForParameterSets(ts, visits[d]),
+ }),
+ {}
+ );
+ }
+
+ /**
+ * Calculates the scores for all parameter sets for the given time segment
+ * and domain visit count.
+ *
+ * @param ts The time segment
+ * @param vc The domain visit count in the given time segment
+ * @return The parameter specific scores for the visit count in
+ * this time segment: {"ps1": 12, "ps2": 34}
+ */
+ calculateScoresForParameterSets(ts, vc) {
+ return Object.keys(this.parameterSets).reduce(
+ (acc, ps) =>
+ merge(acc, {
+ [ps]: this.calculateScoreForParameterSet(
+ ts,
+ vc,
+ this.parameterSets[ps]
+ ),
+ }),
+ {}
+ );
+ }
+
+ /**
+ * Calculates the final affinity score in the given time segment for the given parameter set
+ *
+ * @param timeSegment The time segment
+ * @param visitCount The domain visit count in the given time segment
+ * @param parameterSet The parameter set to use for scoring
+ * @return The final score
+ */
+ calculateScoreForParameterSet(timeSegment, visitCount, parameterSet) {
+ return this.calculateScore(
+ visitCount * parameterSet.timeSegmentWeights[timeSegment.id],
+ parameterSet.perfectFrequencyVisits,
+ parameterSet.frequencyFactor
+ );
+ }
+
+ /**
+ * Keeps the same format, but reduces the two objects to a single object, with
+ * combined scores across all time segments => {a.com":{"ps1":2,"ps2":2},
+ * "b.com":{"ps1":3,"ps2":3}}
+ */
+ _combineScores(a, b) {
+ // Merge both score objects so we get a combined object holding all domains.
+ // This is so we can combine them without missing domains that are in a and not in b and vice versa.
+ const c = merge({}, a, b);
+ return Object.keys(c).reduce(
+ (acc, d) => merge(acc, this._combine(a, b, c, d)),
+ {}
+ );
+ }
+
+ _combine(a, b, c, d) {
+ return (
+ Object.keys(c[d])
+ // Summing up the parameter set specific scores of each domain
+ .map(ps => ({
+ [d]: {
+ [ps]: Math.min(
+ 1,
+ ((a[d] && a[d][ps]) || 0) + ((b[d] && b[d][ps]) || 0)
+ ),
+ },
+ }))
+ // Reducing from an array of objects with a single parameter set to a single object
+ // [{"a.com":{"ps1":11}}, {"a.com: {"ps2":12}}] => {"a.com":{"ps1":11,"ps2":12}}
+ .reduce((acc, cur) => ({ [d]: merge(acc[d], cur[d]) }))
+ );
+ }
+
+ /**
+ * Calculates a value on the curve described by the provided parameters. The curve we're using is
+ * (a^(b*x) - 1) / (a^b - 1): https://www.desmos.com/calculator/maqhpttupp
+ *
+ * @param {number} score A value between 0 and maxScore, representing x.
+ * @param {number} maxScore Highest possible score.
+ * @param {number} factor The slope describing the curve to get to maxScore. A low slope value
+ * [0, 0.5] results in a log-shaped curve, a high slope [0.5, 1] results in a exp-shaped curve,
+ * a slope of exactly 0.5 is linear.
+ * @param {number} ease Adjusts how much bend is in the curve i.e. how dramatic the maximum
+ * effect of the slope can be. This represents b in the formula above.
+ * @return {number} the final score
+ */
+ calculateScore(score, maxScore, factor, ease = 2) {
+ let a = 0;
+ let x = Math.max(0, score / maxScore);
+
+ if (x >= 1) {
+ return 1;
+ }
+
+ if (factor === 0.5) {
+ return x;
+ }
+
+ if (factor < 0.5) {
+ // We want a log-shaped curve so we scale "a" between 0 and .99
+ a = (factor / 0.5) * 0.49;
+ } else if (factor > 0.5) {
+ // We want an exp-shaped curve so we scale "a" between 1.01 and 10
+ a = 1 + ((factor - 0.5) / 0.5) * 9;
+ }
+
+ return (Math.pow(a, ease * x) - 1) / (Math.pow(a, ease) - 1);
+ }
+
+ /**
+ * Queries the visit counts in the given time segment.
+ *
+ * @param ts the time segment
+ * @return the visit count object: {"a.com": 1, "b.com": 2}
+ */
+ queryVisits(ts) {
+ const visitCounts = {};
+ const query = PlacesUtils.history.getNewQuery();
+ if (!query) {
+ return visitCounts;
+ }
+ const wwwRegEx = /^www\./;
+
+ query.beginTimeReference = query.TIME_RELATIVE_NOW;
+ query.beginTime =
+ ts.startTime && ts.startTime !== 0
+ ? -(ts.startTime * 1000 * 1000)
+ : -(Date.now() * 1000);
+
+ query.endTimeReference = query.TIME_RELATIVE_NOW;
+ query.endTime =
+ ts.endTime && ts.endTime !== 0 ? -(ts.endTime * 1000 * 1000) : 0;
+
+ const options = PlacesUtils.history.getNewQueryOptions();
+ options.sortingMode = options.SORT_BY_VISITCOUNT_DESCENDING;
+ options.maxResults = this.maxHistoryQueryResults;
+
+ const { root } = PlacesUtils.history.executeQuery(query, options);
+ root.containerOpen = true;
+ for (let i = 0; i < root.childCount; i++) {
+ let node = root.getChild(i);
+ let host = Services.io.newURI(node.uri).host.replace(wwwRegEx, "");
+ if (!visitCounts[host]) {
+ visitCounts[host] = 0;
+ }
+ visitCounts[host] += node.accessCount;
+ }
+ root.containerOpen = false;
+ return visitCounts;
+ }
+
+ /**
+ * Calculates an item's relevance score.
+ *
+ * @param item the item (story), must contain domain affinities, otherwise a
+ * score of 1 is returned.
+ * @return the calculated item's score or 1 if item has no domain_affinities
+ * or references an unknown parameter set.
+ */
+ calculateItemRelevanceScore(item) {
+ const params = this.parameterSets[item.parameter_set];
+ if (!item.domain_affinities || !params) {
+ return item.item_score;
+ }
+
+ const scores = Object.keys(item.domain_affinities).reduce(
+ (acc, d) => {
+ let userDomainAffinityScore = this.scores[d]
+ ? this.scores[d][item.parameter_set]
+ : false;
+ if (userDomainAffinityScore) {
+ acc.combinedDomainScore +=
+ userDomainAffinityScore * item.domain_affinities[d];
+ acc.matchingDomainsCount++;
+ }
+ return acc;
+ },
+ { combinedDomainScore: 0, matchingDomainsCount: 0 }
+ );
+
+ // Boost the score as configured in the provided parameter set
+ const boostedCombinedDomainScore =
+ scores.combinedDomainScore *
+ Math.pow(params.multiDomainBoost + 1, scores.matchingDomainsCount);
+
+ // Calculate what the score would be if the item score is ignored
+ const normalizedCombinedDomainScore = this.calculateScore(
+ boostedCombinedDomainScore,
+ params.perfectCombinedDomainScore,
+ params.combinedDomainFactor
+ );
+
+ // Calculate the final relevance score using the itemScoreFactor. The itemScoreFactor
+ // allows weighting the item score in relation to the normalizedCombinedDomainScore:
+ // An itemScoreFactor of 1 results in the item score and ignores the combined domain score
+ // An itemScoreFactor of 0.5 results in the the average of item score and combined domain score
+ // An itemScoreFactor of 0 results in the combined domain score and ignores the item score
+ return (
+ params.itemScoreFactor *
+ (item.item_score - normalizedCombinedDomainScore) +
+ normalizedCombinedDomainScore
+ );
+ }
+
+ /**
+ * Returns an object holding the settings and affinity scores of this provider instance.
+ */
+ getAffinities() {
+ return {
+ timeSegments: this.timeSegments,
+ parameterSets: this.parameterSets,
+ maxHistoryQueryResults: this.maxHistoryQueryResults,
+ version: this.version,
+ scores: this.scores,
+ };
+ }
+};
+
+const EXPORTED_SYMBOLS = ["UserDomainAffinityProvider"];