-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathindex.html
569 lines (526 loc) · 51.6 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
<!DOCTYPE html>
<html>
<head>
<title>Mitigating Browser Fingerprinting in Web Specifications</title>
<meta charset='utf-8'>
<script src='https://www.w3.org/Tools/respec/respec-w3c-common' class='remove'></script>
<script class='remove'>
var respecConfig = {
// specification status (e.g. WD, LCWD, NOTE, etc.). If in doubt use ED.
//specStatus: "IG-NOTE",
specStatus: "ED",
noRecTrack: true,
//publishDate: "2016-07-06",
// the specification's short name, as in http://www.w3.org/TR/short-name/
shortName: "fingerprinting-guidance",
// if your specification has a subtitle that goes below the main
// formal title, define it here
// subtitle : "an excellent document",
// if you wish the publication date to be other than today, set this
// publishDate: "2009-08-06",
// if the specification's copyright date is a range of years, specify
// the start date here:
// copyrightStart: "2005"
// if there is a previously published draft, uncomment this and set its YYYY-MM-DD date
// and its maturity status
previousPublishDate: "2015-11-24",
previousMaturity: "IG-NOTE",
// if there a publicly available Editor's Draft, this is the link
edDraftURI: "https://w3c.github.io/fingerprinting-guidance/",
// if this is a LCWD, uncomment and set the end of its review period
// lcEnd: "2009-08-05",
// editors, add as many as you like
// only "name" is required
editors: [
{ name: "Nick Doty", url: "https://npdoty.name/" },
],
otherLinks: [{
key: "Version history",
data: [{
value: "GitHub commit history",
href: "https://github.com/w3c/fingerprinting-guidance/commits/gh-pages"
}]},
{
key: "Issues list",
data: [{
value: "GitHub issues list",
href: "https://github.com/w3c/fingerprinting-guidance/issues"
}]
}
],
// name of the WG
wg: "Privacy Interest Group",
// URI of the public WG page
wgURI: "https://www.w3.org/Privacy/",
// name (without the @w3c.org) of the public mailing to which comments are due
wgPublicList: "public-privacy",
// URI of the patent status for this WG, for Rec-track documents
// !!!! IMPORTANT !!!!
// This is important for Rec-track documents, do not copy a patent URI from a random
// document unless you know what you're doing. If in doubt ask your friendly neighbourhood
// Team Contact.
wgPatentURI: "",
charterDisclosureURI: "https://www.w3.org/2011/07/privacy-ig-charter.html",
localBiblio: {
"EVERCOOKIE": {
"authors": ["Samy Kamkar"],
"href": "https://samy.pl/evercookie/",
"title": "evercookie - virtually irrevocable persistent cookies",
"date": "September 2010"
},
"NDSS-FINGERPRINTING": {
"authors": ["Ting-Fang Yen", "Yinglian Xie", "Fang Yu", "Roger Peng Yu", "Martin Abadi"],
"href": "https://www.microsoft.com/en-us/research/publication/host-fingerprinting-and-tracking-on-the-webprivacy-and-security-implications/",
"title": "Host Fingerprinting and Tracking on the Web: Privacy and Security Implications",
"date": "February 2012",
"publisher": "In Proceedings of the Network and Distributed System Security Symposium (NDSS)"
},
"RFC6973": {
"authors": [
"A. Cooper",
"H. Tschofenig",
"B. Aboba",
"J. Peterson",
"J. Morris",
"M. Hansen",
"R. Smith"
],
"href": "https://www.rfc-editor.org/rfc/rfc6973.txt",
"title": "Privacy Considerations for Internet Protocols",
"date": "July 2013",
"status": "RFC",
"publisher": "IETF"
},
"TAG-UNSANCTIONED": {
"href": "https://w3ctag.github.io/unsanctioned-tracking/",
"title": "Unsanctioned Web Tracking",
"date": "17 July 2015",
"authors": ["Mark Nottingham"],
"publisher": "W3C Technical Architecture Group"
},
"HIDING-CROWD": {
"href": "https://hal.inria.fr/hal-01718234v2",
"title": "Hiding in the Crowd: an Analysis of the Effectiveness of Browser Fingerprinting at Large Scale",
"date": "April 2018",
"authors": ["Alejandro Gómez-Boix", "Pierre Laperdrix", "Benoit Baudry"],
"publisher": "WWW2018 - TheWebConf2018: 27th International World Wide Web Conference"
},
"WPM-MILLION": {
"href": "https://webtransparency.cs.princeton.edu/webcensus/",
"authors": ["Steven Englehardt", "Arvind Narayanan"],
"date": "May 2016",
"title": "Online tracking: A 1-million-site measurement and analysis"
},
"FLASHCOOKIES": {
"href": "https://papers.ssrn.com/sol3/papers.cfm?abstract_id=1446862",
"date": "10 August 2009",
"authors": ["Ashkan Soltani","Shannon Canty","Quentin Mayo","Lauren Thomas","Chris Jay Hoofnagle"],
"title": "Flash Cookies and Privacy"
},
"FLASHCOOKIES-2": {
"href": "https://ptolemy.berkeley.edu/projects/truststc/education/reu/11/Posters/AyensonMWambachDpaper.pdf",
"authors": ["Mika Ayenson", "Dietrich Wambach", "Ashkan Soltani", "Nathan Good", "Chris Hoofnagle"],
"title": "Flash cookies and privacy II: Now with HTML5 and ETag respawning"
},
"TOR-DESIGN": {
"title": "The Design and Implementation of the Tor Browser",
"href": "https://spec.torproject.org/torbrowser-design",
"date": "15 June 2018",
"authors": ["Mike Perry", "Erinn Clark", "Steven Murdoch", "Georg Koppen"]
},
"TAG-MINIMIZATION": {
"href": "https://www.w3.org/2001/tag/doc/APIMinimization",
"date": "12 September 2011",
"authors": ["Daniel Appelquist"],
"publisher": "W3C Technical Architecture Group",
"title": "Data Minimization in Web APIs"
},
"LEAKING-BATTERY": {
"href": "https://eprint.iacr.org/2015/616.pdf",
"date": "2015",
"authors": ["Łukasz Olejnik", "Gunes Acar", "Claude Castelluccia", "Claudia Diaz"],
"title": "The leaking battery: A privacy analysis of the HTML5 Battery Status API"
},
"BEAUTY-BEAST": {
"authors": ["Pierre Laperdrix", "Walter Rudametkin", "Benoit Baudry"],
"title": "Beauty and the Beast: Diverting modern web browsers to build unique browser fingerprints",
"publisher": "IEEE Symposium on Security and Privacy (S&P 2016)",
"date": "May 2016",
"href": "https://hal.inria.fr/hal-01285470v2/"
},
"httpbis-client-hints": {
"href": "https://httpwg.org/http-extensions/client-hints.html",
"title": "HTTP Client Hints",
"authors": ["Ilya Grigorik"],
"date": "January 2019",
"publisher": "HTTP Working Group"
},
"security-privacy-questionnaire-tag": {
"href": "https://w3ctag.github.io/security-questionnaire/",
"authors": ["Lukasz Olejnik", "Jason Novak"],
"date": "December 2018",
"title": "Self-Review Questionnaire: Security and Privacy",
"publisher": "W3C Technical Architecture Group"
}
}
};
</script>
<style type="text/css" media="screen">
img.fingerprint {
float: left;
margin-left: -25px;
}
ul.practicedesc {
margin-top: -2em;
padding-top: .5em;
}
</style>
</head>
<body>
<section id="abstract">
Exposure of settings and characteristics of browsers can harm user privacy by allowing for browser fingerprinting. This document defines different types of fingerprinting, considers distinct levels of mitigation for the related privacy risks and provides guidance for Web specification authors on how to balance these concerns when designing new Web features.
</section>
<section id="sotd">
This document is a draft Interest Group Note to provide guidance to Web specification authors on mitigating the privacy impacts of browser fingerprinting, currently under development by the <a href="https://www.w3.org/Privacy/">Privacy Interest Group</a> (<abbr title="Privacy Interest Group">PING</abbr>). <a href="https://www.w3.org/TR/2015/NOTE-fingerprinting-guidance-20151124/">A snapshot draft of this Note was published on 24 November 2015</a>. PING is collaborating with the <a href="https://www.w3.org/2001/tag/">Technical Architecture Group</a> (<abbr title="Technical Architecture Group">TAG</abbr>) on this guidance. Constructive input of all kinds would be useful; feel free to contact the editor directly, send comments to the <a href="mailto:[email protected]">mailing list</a> or <a href="https://github.com/w3c/fingerprinting-guidance/issues">file issues on GitHub</a>.
</section>
<section>
<h2>Browser fingerprinting</h2>
<section>
<h2>What is fingerprinting?</h2>
<p>In short, <dfn id="dfn-browser-fingerprinting" data-export="">browser fingerprinting</dfn> is the capability of a site to identify or re-identify a visiting user, user agent or device via configuration settings or other observable characteristics.</p>
<p>A similar definition is provided by [[?RFC6973]]. A more detailed list of types of fingerprinting is included below. This document does not attempt to catalog all features currently used or usable for browser fingerprinting; however, <a href="#research"></a> provides links to browser vendor pages and academic findings.</p>
</section>
<section id="privacy_threat_models">
<h2>Privacy impacts and threat models</h2>
<p>Browser fingerprinting can be used as a security measure (e.g. as means of authenticating the user). However, fingerprinting is also a potential threat to users' privacy on the Web. This document does not attempt to provide a single unifying definition of "privacy" or "personal data", but we highlight how browser fingerprinting might impact users' privacy. For example, browser fingerprinting can be used to:</p>
<ul>
<li>identify a user</li>
<li>correlate a user’s browsing activity within and across sessions</li>
<li>track users without transparency or control</li>
</ul>
<p>The privacy implications associated with each use case are discussed below. Following from the practice of security threat model analysis, we note that there are distinct models of privacy threats for fingerprinting. Defenses against these threats differ, depending on the particular privacy implication and the threat model of the user.</p>
<section>
<h3>Identify a user</h3>
<p>There are many reasons why users might wish to remain anonymous or unidentified online, including: concerns about surveillance, personal physical safety, and concerns about discrimination against them based on what they read or write when using the Web. When a browser fingerprint is correlated with identifying information (like an email address, a recognized given and sur-name, or a government-issued identifier), an application or service provider may be able to identify an otherwise pseudonymous user. The adversary and consequences of this threat will vary by the particular user and use case, but can include nation-state intelligence agencies and threats of violence or imprisonment.</p>
</section>
<section>
<h3>Correlation of browsing activity</h3>
<p>Browser fingerprinting raises privacy concerns even when offline identities are not implicated. Some users may be surprised or concerned that an online party can correlate multiple visits (on the same or different sites) to develop a profile or history of the user. This concern may be heightened because (see below) it may occur without the user's knowledge or consent and tools such as clearing cookies do not prevent further correlation.</p>
<p>Browser fingerprinting also allows for tracking across <a class="externalDFN" href="https://tools.ietf.org/html/rfc6454#section-4">origins</a> [[?RFC6454]]: different sites may be able to combine information about a single user even where a cookie policy would block accessing of cookies between origins, because the fingerprint is relatively unique and the same for all origins.</p>
</section>
<section>
<h3>Tracking without transparency or user control</h3>
<p>
In contrast to other mechanisms defined by Web standards for maintaining state (e.g. cookies), browser fingerprinting allows for collection of data about user activity without clear indications that such collection is happening. Transparency can be important for end users, to understand how ongoing collection is happening, but it also enables researchers, policymakers and others to document or regulate privacy-sensitive activity. Browser fingerprinting also allows for tracking of activity without clear or effective user controls: a browser fingerprint typically cannot be cleared or re-set. (See the finding on unsanctioned tracking [[?TAG-UNSANCTIONED]].)
</p>
</section>
</section>
<section>
<h2>What can we do about it?</h2>
<p>
Advances in techniques for browser fingerprinting (see <a href="#research"></a>, below), particularly in <a>active fingerprinting</a>, suggest that complete elimination of the capability of browser fingerprinting by a determined adversary through solely technical means that are widely deployed is implausible. However, mitigations in our technical specifications are possible, as described below (<a href="#mitigations"></a>), and may achieve different levels of success (<a href="#feasibility"></a>).
</p>
<p>
Mitigations recommended here are simply mitigations, not solutions. Users of the Web cannot confidently rely on sites being completely unable to correlate traffic, especially when executing client-side code. A fingerprinting surface extends across all implemented Web features for a particular user agent, and even to other layers of the stack; for example, differences in TCP connections. For example, a user might employ an onion routing system such as Tor to limit network-level linkability, but still face the risk of correlating Web-based activity through browser fingerprinting, or vice versa. In order to mitigate these privacy risks as a whole, fingerprinting must be considered during the design and development of all specifications.
</p>
<p>
The TAG finding on Unsanctioned Web Tracking, including browser fingerprinting, includes description of the limitations of technical measures and encourages minimizing and documenting new fingerprinting surface [[?TAG-UNSANCTIONED]]. The best practices below detail common actions that authors of specifications for Web features can take to mitigate the privacy impacts of browser fingerprinting. The Self-Review Questionnaire documents mitigations of privacy impacts in Web features more generally that may complement these practices [[?security-privacy-questionnaire-tag]].
</p>
</section>
</section>
<section id="bp-summary"></section>
<section>
<h2 id="types_of_fingerprinting">Types of fingerprinting</h2>
<section>
<h3 id="passive">Passive</h3>
<p><dfn>Passive fingerprinting</dfn> is browser fingerprinting based on characteristics observable in the contents of Web requests, without the use of any code executed on the client.</p>
<p>Passive fingerprinting would trivially include cookies (often unique identifiers sent in HTTP requests), the set of HTTP request headers and the IP address and other network-level information. The <a href="https://tools.ietf.org/html/rfc7231#section-5.5.3">User-Agent string</a> [[?RFC7231]], for example, is an HTTP request header that typically identifies the browser, renderer, version and operating system. For some populations, the User-Agent and IP address will often uniquely identify a particular user's browser [[?NDSS-FINGERPRINTING]].</p>
</section>
<section>
<h3 id="active">Active</h3>
<p>For <dfn>active fingerprinting</dfn>, we also consider techniques where a site runs JavaScript or other code on the local client to observe additional characteristics about the browser, user, device or other context.</p>
<p>Techniques for active fingerprinting might include accessing the window size, enumerating fonts or plug-ins, evaluating performance characteristics, reading from device sensors, and rendering graphical patterns. Key to this distinction is that <a>active fingerprinting</a> takes place in a way that is potentially detectable on the client.</p>
</section>
<section>
<h3 id="cookie_like_setting_retrieving_local_state">Cookie-like</h3>
<p>Users, user agents and devices may also be re-identified by a site that first sets and later retrieves state stored by a user agent or device. This <dfn>cookie-like fingerprinting</dfn> allows re-identification of a user or inferences about a user in the same way that HTTP cookies allow state management for the stateless HTTP protocol [[?RFC6265]].</p>
<p>Cookie-like fingerprinting can also circumvent user attempts to limit or clear cookies stored by the user agent, as demonstrated by the "evercookie" implementation [[?EVERCOOKIE]]. Where state is maintained across user agents (as in the case of common plugins with local storage), across devices (as in the case of certain browser syncing mechanisms) or across software upgrades, cookie-like fingerprinting can allow re-identification of users, user agents or devices where active and passive fingerprinting might not. The Security and Privacy Self-Review Questionnaire also considers this threat in origin state that persists across browsing sessions [[?security-privacy-questionnaire-tag]].</p>
</section>
</section>
<section id="feasibility">
<h2>Feasibility</h2>
<section>
<h2>Fingerprinting mitigation levels of success</h2>
<p>There are different levels of success in mitigating browser fingerprinting:</p>
<dl>
<dt>Decreased fingerprinting surface</dt><dd>Removing the source of entropy or available attributes that can be used for fingerprinting.</dd>
<dt>Increased anonymity set</dt><dd>By standardization, convention or common implementation, increasing the commonality of particular configurations to decrease the likelihood of unique fingerprintability.</dd>
<dt>Detectable fingerprinting</dt><dd>Making fingerprinting observable to others, so that the user agent might block it or researchers can determine that it's happening.</dd>
<dt>Clearable local state</dt><dd>Helping users respond to fingerprinting by making state mechanisms clearable.</dd>
</dl>
<p>Research has shown feasible improvement in privacy protection in all of these areas. While lists of plugins remain a large fingerprinting surface, entropy has decreased over time with migration to Web APIs over plugins [[?HIDING-CROWD]]. Collected data on Web users has shown mobile devices to have substantially larger anonymity sets than desktop browsers [[?HIDING-CROWD]]. Research on forms of active fingerprinting has documented its use and demonstrated changes in use of those techniques as an apparent result of increased awareness [[?WPM-MILLION]]. Respawning of cookies has continued, with an increasing variety of techniques, but awareness and technical responses to the issue has made the practice less widespread [[?FLASHCOOKIES-2]].</p>
</section>
<section>
<h2>Feasible goals for specification authors</h2>
<p>
This document works under the expectation that mitigations with different levels of success are feasible under different circumstances, for different threat models and against different types of fingerprinting. In general, active fingerprinting may be made detectable; we can minimize increases to the surface of passive fingerprinting; and cookie-like mechanisms can be made clearable.</p>
<p>
Some implementers and some users may be willing to accept reduced functionality or decreased performance in order to minimize browser fingerprinting. Documenting which features have fingerprinting risk eases the work of implementers building modes for these at-risk users; minimizing fingerprinting even in cases where common implementations will have easy active fingerprintability allows such users to reduce the functionality trade-offs necessary. Making browser fingerprinting more detectable also contributes to mitigations outside the standardization process; for example, though regulatory or policy means [[?TAG-UNSANCTIONED]].
</p>
</section>
</section>
<section>
<h2 id="identifying">Identifying fingerprinting surface and evaluating severity</h2>
<p>To mitigate browser fingerprinting in your specification:</p>
<ol>
<li>identify features that can be used for browser fingerprinting;</li>
<li>evaluate the severity of the fingerprinting surface based on <a href="#severity-list">these five factors</a>; and,</li>
<li>apply mitigations described in the best practices below (<a href="#mitigations"></a>), focused on limiting the severity of that fingerprinting surface.</li>
</ol>
<p>The <dfn id="dfn-fingerprinting-surface" data-export="">fingerprinting surface</dfn> of a user agent is the set of observable characteristics that can be used in concert to identify a user, user agent or device or correlate its activity.</p>
<p>Data sources that may be used for browser fingerprinting include:</p>
<ul>
<li>user configuration</li>
<li>device characteristics</li>
<li>environmental characteristics <em>(e.g. sensor readings)</em></li>
<li>operating system characteristics</li>
<li>user behavior</li>
<li>browser characteristics</li>
</ul>
<p>These data sources may be accessed directly for some features, but in many other cases they are inferred through some other observation. Timing channels, in particular, are commonly used to infer details of hardware (exactly how quickly different operations are completed may provide information on GPU capability, say), network information (via the latency or speed in loading a particular resource) or even user configuration (what items have been previously cached or what resources are not loaded). Consider the side effects of feature and how those side effects would allow inferences of any of these characteristics.</p>
<p>The <a href="https://spec.torproject.org/torbrowser-design#fingerprinting-linkability">Tor Browser design document</a> [[?TOR-DESIGN]] has more details on these sources and their relative priorities; this document adds environmental characteristics in that sensor readings or data access may distinguish a user, user agent or device by information about the environment (location, for example).</p>
<p id="severity-list">For each identified feature, consider the severity for the privacy impacts described above (<a href="#privacy_threat_models"></a>) based on the following factors:</p>
<dl>
<dt>entropy</dt><dd>How distinguishing is this new surface? Consider both the possible variations and the likely distribution of values. Adding 1-bit of entropy is typically of less concern; 30-some bits of entropy would be enough to uniquely identify every individual person. Different data sources may provide different distributions of variation; for example, some characteristics may reveal a common hardware class while other characteristics may reveal user configurations that vary between individual people.</dd>
<dt>detectability</dt><dd>Will use of this feature for browser fingerprinting be observable to the user agent or likely to be discoverable by researchers? Because detectability is an important — and perhaps the most feasible — mitigation, increases to the surface for <a>passive fingerprinting</a> are of particular concern and should be avoided.</dd>
<dt>persistence</dt><dd>How long will the characteristics of this fingerprinting surface stay unchanged? Can users control or re-set these values to prevent long-lived identification? While short-lived characteristics may still enable unexpected correlation of activity (for example, between two browser profiles on the same device), persistent or permanent identifiers are particularly concerning for the lack of user control.</dd>
<dt>availability</dt><dd>Will this surface be available to the "drive-by Web" or only in certain contexts where a user has granted a particular sensor permission or directly authenticated? While browser fingerprinting is still something to mitigate in the permissioned context, the concern that a feature will end up used primarily for fingerprinting is reduced.</dd>
<dt>scope</dt><dd>Is this surface consistent across origins or only within a single origin? In general, characteristics or identifiers that are tied to a particular origin are of less concern and can be handled with the same tools as HTTP cookies.</dd>
</dl>
<p>While we do not recommend specific trade-offs, these factors can be used to weigh increases to that surface (<a href="#weighing_increased_fingerprinting_surface"></a>) and suggest appropriate mitigations. Although each factor may suggest specific mitigations, in weighing whether to add <a>fingerprinting surface</a> they should be considered in concert. For example, access to a new set of characteristics about the user may be high entropy, but be of less concern because it has limited availability and is easily detectable. A cross-origin, drive-by-available, permanent, passive unique identifier is incompatible with our expectations for privacy on the Web.</p>
<p>In conducting this analysis, it may be tempting to dismiss certain fingerprinting surface in a specification because of a comparison to fingerprinting surface exposed by other parts of the Web platform or other layers of the stack. Be cautious about making such claims. First, while similar information may be available through other means, similar is not identical: information disclosures may not be exactly the same and fingerprintability is promoted by combining these distinct sources. Second, where identical entropy is present, other factors of severity or availability may differ and those factors are important for feasible mitigation. Third, the platform is neither monolithic nor static; not all other features are implemented in all cases and may change (or be removed) in the future. Fourth, circular dependencies are a danger when so many new features are under development; two specifications sometimes refer to one another in arguing that fingerprinting surface already exists. It is more useful to reviewers and implementers to consider the fingerprinting surface provided by the particular Web feature itself, with specific references where surface may be available through other features as well.</p>
</section>
<section id="mitigations">
<h2>Mitigations</h2>
<section id="weighing_increased_fingerprinting_surface">
<h3>Weighing increased fingerprinting surface</h3>
<p>Web specification authors regularly attempt to strike a balance between new functionality and fingerprinting surface. For example, feature detection functionality allows for progressive enhancement with a small addition to fingerprinting surface; detailed enumerations of plugins, fonts, connected devices may provide a large fingerprinting surface with minimal functional support.</p>
<p>Authors and Working Groups determine the appropriate balance between these properties on a case-by-case basis, given their understanding of the functionality, its implementations and the severity of increased fingerprinting surface. However, given the distinct privacy impacts described above and in order to improve consistency across specifications, these practices provide some guidance:</p>
<div class="practice">
<p>
<span id="avoid-passive-increases" class="practicelab">Avoid unnecessary or severe increases to fingerprinting surface, especially for passive fingerprinting.</span>
</p>
<p class="practicedesc">
Consider each of the <a href="#severity-list">severity factors</a> described above and whether that functionality is necessary and whether comparable functionality is feasible with less severe increases to the fingerprinting surface.
</p>
<p class="practicedesc">
In particular, unless a feature cannot reasonably be designed in any other way, increased passive fingerprintability should be avoided. Passive fingerprinting allows for easier and widely-available identification, without opportunities for external detection or control by users or third parties.
</p>
</div>
<div class="practice">
<p>
<span id="narrow-scope-availability" class="practicelab">Narrow the scope and availability of a feature with fingerprinting surface to what is functionally necessary.</span>
</p>
<p class="practicedesc">
What browsing contexts, resources and requests need access to a particular feature? Identifiers can often be scoped to have a different value in different origins. Some configuration may only be necessary in top-level browsing contexts.
</p>
<p class="practicedesc">
Should access to this functionality be limited to where users have granted a particular permission? While excessive permissions can create confusion and fatigue, limiting highly granular data to situations where a user has already granted permission to access sensitive data widely mitigates the risk of that feature being used primarily for browser fingerprinting in "drive-by" contexts. For example, Media Capture and Streams [[?mediacapture-streams]] limits access to attached microphone and camera device labels to when the user has granted permission to access a camera or microphone (while still allowing access to the number and configuration of attached cameras and microphones in all contexts, a noted increase in drive-by fingerprinting surface).
</p>
</div>
<p>Some implementations may also limit the entropy of fingerprinting surface by not exposing different capabilities for different devices or installations of a user agent. Font lists, for example, can be limited to a list commonly available on all devices that run a particular browser or operating system (as implemented in Tor Browser, Firefox and Safari).</p>
<div class="practice">
<p>
<span id="mark-fingerprinting" class="practicelab">Mark features that contribute to fingerprintability.</span>
</p>
<p class="practicedesc">
<img src="https://www.w3.org/TR/html5/images/fingerprint.png" class="fingerprint" alt="This feature may contribute to browser fingerprintability.">
Where a feature does contribute to the <a>fingerprinting surface</a>, indicate that impact, by explaining the effect (and any known implementer mitigations) and marking the relevant section with a fingerprinting icon, as this paragraph is.
</p>
</div>
<div class="example">
The following code can be used to mark a paragraph with the fingerprint icon.
<pre class="highlight">
<img src="https://www.w3.org/TR/html5/images/fingerprint.png"
class="fingerprint"
alt="This feature may contribute to browser fingerprintability."></pre>
</div>
</section>
<section>
<h3 id="a_standardized_profile">Standardization</h3>
<p>
Specifications can mitigate against fingerprintability through standardization; by defining a consistent behavior, conformant implementations won't have variations that can be used for browser fingerprinting.
</p>
<p>
Randomization of certain browser characteristics has been proposed as a way to combat browser fingerprinting. While this strategy may be pursued by some implementations, we expect in general it will be more effective for us to standardize or null values rather than setting a range over which they can vary. The Tor Browser design [[?TOR-DESIGN]] provides more detailed information, but in short: it's difficult to measure how well randomization will work as a mitigation and it can be costly to implement in terms of usability (varying functionality or design in unwanted ways), processing (generating random numbers) and development (including the cost of introducing new security vulnerabilities). Standardization provides the benefit of an increased anonymity set for conformant browsers with the same configuration: that is, an individual can look the same as a larger group of people rather than trying to look like a number of different individuals.
</p>
<div class="practice">
<p>
<span id="specify-ordering" class="practicelab">Specify orderings and non-functional differences.</span>
</p>
<p class="practicedesc">
To reduce unnecessary entropy, specify aspects of API return values and behavior that don't contribute to functional differences. For example, if the ordering of return values in a list has no semantic value, specify a particular ordering (alphabetical order by a defined algorithm, for example) so that incidental differences don't expose fingerprinting surface.
</p>
<p class="practicedesc">
Access to a list of system fonts via Flash or Java plugins notably returns the list sorted not in a standard alphabetical order, but in an unspecified order specific to the system. This ordering adds to the entropy available from that plugin in a way that provides no functional advantage. (See <a href="https://trac.webkit.org/wiki/Fingerprinting#ii.CollectingSystemFontsviaFlashPlugins">Collecting System Fonts via Flash Plugins</a>.)
</p>
</div>
<p>
Standardization does <em>not</em> need to attempt to hide all differences between different browsers (e.g. Edge and Chrome); implemented functionality and behavior differences will always exist between different implementations. For that reason, removing <code>User-Agent</code> headers altogether is not a goal. However, variation in the <code>User-Agent</code> string that reveals additional information about the user or device has been shown to provide substantial fingerprinting surface [[?BEAUTY-BEAST]].
</p>
</section>
<section>
<h3>Detectability</h3>
<p>Where a client-side API provides some fingerprinting surface, authors can still mitigate the privacy concerns via detectability. If client-side fingerprinting activity is to some extent distinguishable from functional use of APIs, user agent implementations may have an opportunity to prevent ongoing fingerprinting or make it observable to users and external researchers (including academics or relevant regulators) who may be able to detect and investigate the use of fingerprinting.</p>
<div class="practice">
<p>
<span id="api-minimization" class="practicelab">Design APIs to access only the entropy necessary.</span>
</p>
<p class="practicedesc">
Following the basic principle of <a data-cite="rfc6973#section-6.1">data minimization</a> [[?RFC6973]], design your APIs such that a site can access (and does access by default) only the entropy necessary for particular functionality.
</p>
<p class="practicedesc">
Authors might design an API to allow for querying of a particular value, rather than returning an enumeration of all values. User agents and researchers can then more easily distinguish between sites that query for one or two particular values (gaining minimal entropy) and those that query for all values (more likely attempting to fingerprint the browser); or implementations can cap the number of different values. For example, Tor Browser limits the number of fonts that can be queried with a <code>browser.display.max_font_attempts</code> preference.
</p>
<p class="practicedesc">
The granularity or precision of information returned can be minimized in order to reduce entropy. For example, implementations of the Battery Status API [[?BATTERY-STATUS]] allowed for high precision (double-precision, or 15-17 significant digits) readings of the current battery level, which provided a short-term identifier that could be used to correlate traffic across origins or clearance of local state. Rounding off values to lower precision mitigates browser fingerprinting while maintaining functional use cases. Alternatively, providing Boolean or a small enumeration of values might provide functionality without revealing underlying details; for example, the Boolean <code>near</code> property in the Proximity Sensor API [[?PROXIMITY]].
</p>
<p class="practicedesc">
For more information, see:
</p>
<ul class="practicedesc">
<li><a href="https://www.w3.org/TR/dap-privacy-reqs/">Device API Privacy Requirements</a> [[?dap-privacy-reqs]], <abbr title="Device APIs Working Group">DAP</abbr> Working Group Note, June 2010.</li>
<li><a href="https://www.w3.org/2001/tag/doc/APIMinimization">Data Minimization in Web APIs</a> [[?TAG-MINIMIZATION]], W3C <abbr title="Technical Architecture Group">TAG</abbr>, September 2011.</li>
<li><a href="https://www.w3.org/TR/generic-sensor/#security-and-privacy">Generic Sensor API: Security and privacy considerations</a> [[?generic-sensor]], March 2018.</li>
<li><a href="https://eprint.iacr.org/2015/616.pdf">The leaking battery: A privacy analysis of the HTML5 Battery Status API</a> [[?LEAKING-BATTERY]], 2015.</li>
</ul>
</div>
<p>
Related, detectability is improved even with data sent in HTTP headers (what we would typically consider passive fingerprinting) if sites are required to request access (or "opt in") to information before it's sent.
</p>
<div class="practice">
<p>
<span id="server-advertisement" class="practicelab">Require servers to advertise or opt in to access data.</span>
</p>
<p class="practicedesc">
Even for data sent in HTTP request headers, requiring servers to advertise use of particular data, publicly document a policy, or "opt in" before clients send configuration data provides the possibility of detection by user agents or researchers.
</p>
<p class="practicedesc">
For example, Client Hints [[?httpbis-client-hints]] proposes an <code>Accept-CH</code> response header for services to indicate that specific hints can be used for content negotiation, rather than all supporting clients sending all hints in all requests.
</p>
<p class="note">
This is a relatively new approach; we're still evaluating whether this provides meaningful and useful detectability.
</p>
</div>
<p>
Implementers can facilitate detectability by providing or enabling instrumentation so that users or third parties are able to calculate when fingerprinting surface is being accessed. Of particular importance for instrumentation are: access to all the different sources of fingerprinting surface; identification of the originating script; avoiding exposure that instrumentation is taking place. Beyond the minimization practice described above, these are largely implementation-specific (rather than Web specification) features.
</p>
<p>
If your specification exposes some fingerprinting surface (whether it's active or passive), some implementers (e.g. Tor Browser) are going to be compelled to disable those features for certain privacy-conscious users.
</p>
<div class="practice">
<p>
<span id="anticipate-disabled" class="practicelab">Enable graceful degradation for privacy-conscious users or implementers.</span>
</p>
<p class="practicedesc">
Following the principle of progressive enhancement, and to avoid further divergence (which might itself expose variation in users), consider whether some functionality in your specification is still possible if fingerprinting surface features are disabled.
</p>
<p class="practicedesc">
Explicit hooks or API flags may be used so that browser extensions or certain user agents can easily disable specific features. For example, the <a data-cite="html52/semantics-scripting.html#canvas-origin-clean">origin-clean flag</a> [[?html52]] allows control over whether an image canvas can be read, a significant fingerprinting surface.
</p>
</div>
</section>
<section>
<h3>Clearing all local state</h3>
<p>Features which enable storage of data on the client and functionality for client- or server-side querying of that data can increase the ease of cookie-like fingerprinting. Storage can vary between large amounts of data (for example, the Web Storage API) or just a binary flag (has or has not provided a certain permission; has or has not cached a single resource).</p>
<div class="practice">
<p>
<span id="no-new-cookies" class="practicelab">Avoid unnecessary new local state mechanisms.</span>
</p>
<p class="practicedesc">
If functionality does not require maintaining client-side state in a way that is subsequently queryable (or otherwise observable), avoid creating a new cookie-like feature. Can the functionality be accomplished with existing HTTP cookies or an existing JavaScript local storage API?
</p>
<p class="practicedesc">
For example, the Flash plugin's Local Shared Objects (LSOs) have often been used to duplicate and re-spawn HTTP cookies cleared by the user [[?FLASHCOOKIES]].
</p>
</div>
<p>Where features do require setting and retrieving local state, there are ways to mitigate the privacy impacts related to unexpected cookie-like behavior; in particular, you can help implementers prevent "permanent", "zombie", "super" or "evercookies".</p>
<div class="practice">
<p>
<span id="mark-cookie-like" class="practicelab">Highlight any local state mechanisms to enable simultaneous clearing.</span>
</p>
<p class="practicedesc">
Clearly note where state is being maintained and could be queried and provide guidance to implementers on enabling simultaneous deletion of local state for users. Such functionality can mitigate the threat of "evercookies" because the presence of state in one such storage mechanism can't be used to persist and re-create an identifier.
</p>
</div>
<p>Permanent or persistent data (including any identifiers) are of particular risk because they undermine the ability for a user to clear or re-set the state of their device or to maintain different identities.</p>
<div class="practice">
<p>
<span id="no-permanent" class="practicelab">Limit permanent or persistent state.</span>
</p>
<p class="practicedesc">
Permanent identifiers or other state (for example, identifiers or keys set in hardware) should typically not be exposed. Where necessary, access to such identifiers would require user permission (however, explaining the implications of such permission to users may be difficult) and limitation to a particular origin (however, server-side collusion between origins will be difficult to detect).
As a result, your design should not rely on saving and later querying data on the client beyond a user's clearing cookies or other local state. That is, you should not expect any local state information to be permanent or to persist longer than other local state.
</p>
</div>
<p>Though not strictly browser fingerprinting, there are other privacy concerns regarding user tracking for features that provide local storage of data. Mitigations suggested in the Web Storage API specification include: safe-listing, block-listing, expiration and secure deletion <a data-cite="?HTML/webstorage.html#user-tracking">[HTML#user-tracking]</a>.</p>
</section>
<section><h3 id="do_not_track_a_cooperative_approach">Do Not Track</h3>
<p>Expressions of, and compliance with, a Do Not Track signal does not inhibit the capability of browser fingerprinting, but may mitigate some user concerns about fingerprinting, specifically around tracking as defined in those specifications [[?TRACKING-DNT]] [[?TRACKING-COMPLIANCE]] and as implemented by services that comply with those user preferences. That is, DNT can mitigate concerns with cooperative sites.</p>
<p>The use of <abbr title="Do Not Track">DNT</abbr> in this way typically does not require changes to other functional specifications.
If your specification expects a particular behavior upon receiving a particular DNT signal, indicate that with a reference to [[?TRACKING-DNT]].
If your specification introduces a new communication channel that could be used for tracking, you might wish to define how a DNT signal should be communicated.
</p>
</section>
</section>
<section class="appendix" id="research">
<h2>Research</h2>
<section>
<h3>Browser vendor documentation</h3>
<p>Some browser developers maintain pages on browser fingerprinting, including: potential mitigations or modifications necessary to decrease the surface of that browser engine; different vectors that can be used for fingerprinting; potential future work. These are not cheery, optimistic documents.</p>
<ul>
<li>The Chromium Projects: <a href="https://www.chromium.org/Home/chromium-security/client-identification-mechanisms/">Technical analysis of client identification mechanisms</a></li>
<li><a href="https://trac.webkit.org/wiki/Fingerprinting">WebKit Wiki: Fingerprinting</a></li>
<li><a href="https://wiki.mozilla.org/Fingerprinting">Mozilla Wiki: Fingerprinting</a></li>
<li><a href="https://spec.torproject.org/torbrowser-design#fingerprinting-linkability#fingerprinting-linkability">The Design and Implementation of the Tor Browser: Cross-Origin Fingerprinting Unlinkability</a></li>
</ul>
</section>
<section>
<h3>Academic research</h3>
<p>What are the key papers to read here, historically or to give the latest on fingerprinting techniques? What are some areas of open research that might be relevant?</p>
<ul>
<li>Eckersley, Peter. "<a href="https://panopticlick.eff.org/static/browser-uniqueness.pdf">How unique is your web browser?</a>" <i>Privacy Enhancing Technologies</i>. Springer Berlin Heidelberg, 2010.</li>
<li>Mowery, Keaton, Dillon Bogenreif, Scott Yilek, and Hovav Shacham. “<a href="https://cseweb.ucsd.edu/~kmowery/papers/js-fingerprinting.pdf">Fingerprinting Information in JavaScript Implementations</a>.” In <i>Web 2.0 Security and Privacy</i>, 2011.</li>
<li>Yen, Ting-Fang, et al. "<a href="https://www.microsoft.com/en-us/research/publication/host-fingerprinting-and-tracking-on-the-webprivacy-and-security-implications/">Host fingerprinting and tracking on the web: Privacy and security implications</a>." <em>Proceedings of NDSS</em>. 2012. [[?NDSS-FINGERPRINTING]]</li>
<li>Mowery, Keaton, and Hovav Shacham. "<a href="https://hovav.net/ucsd/dist/canvas.pdf">Pixel perfect: Fingerprinting canvas in HTML5</a>." <i>Web 2.0 Security and Privacy</i>, 2012.</li>
<li id="wsj-orbitz">Mattioli, Dana. "<a href="https://www.wsj.com/articles/SB10001424052702304458604577488822667325882">On Orbitz, Mac Users Steered to Pricier Hotels</a>". <i>Wall Street Journal</i>, August 23, 2012.</li>
<li id="FPDetective">Gunes Acar et al. "<a href="https://dl.acm.org/citation.cfm?id=2516674">FPDetective: dusting the web for fingerprinters</a>." In <i>CCS '13</i>.</li>
<li>Nikiforakis, Nick, et al. "<a href="https://seclab.cs.ucsb.edu/media/uploads/papers/sp2013_cookieless.pdf">Cookieless monster: Exploring the ecosystem of web-based device fingerprinting</a>." <i>IEEE Symposium on Security and Privacy (S&P 2013)</i>, 2013.</li>
<li>G. Acar, C. Eubank, S. Englehardt, M. Juarez, A. Narayanan, C. Diaz. "<a href="https://securehomes.esat.kuleuven.be/%7Egacar/persistent/">The Web never forgets: Persistent tracking mechanisms in the wild</a>." In <i>Proceedings of CCS 2014</i>, Nov. 2014.</li>
<li>Steven Englehardt, Arvind Narayanan. "<a href="https://webtransparency.cs.princeton.edu/webcensus/">Online tracking: A 1-million-site measurement and analysis</a>." May 2016. [[?WPM-MILLION]]</li>
<li>Pierre Laperdrix, Walter Rudametkin, Benoit Baudry. "<a href="https://hal.inria.fr/hal-01285470v2/">Beauty and the Beast: Diverting modern web browsers to build unique browser fingerprints</a>." <i>IEEE Symposium on Security and Privacy (S&P 2016)</i>, May 2016.</li>
<li>
"<a href="https://hal.inria.fr/hal-01718234v2">Hiding in the Crowd: an Analysis of the Effectiveness of Browser Fingerprinting at Large Scale</a>." <i>WWW2018 - TheWebConf 2018: 27th International World Wide Web Conference</i>, April 2018. [[?HIDING-CROWD]]
</li>
</ul>
</section>
<section>
<h3>Testing</h3>
<p>A non-exhaustive list of sites that allow the visitor to test their configuration for fingerprintability.</p>
<ul>
<li><a href="https://amiunique.org/">amiunique.org</a> (INRIA)</li>
<li><a href="https://panopticlick.eff.org/">panopticlick.eff.org</a> (EFF)</li>
<li><a href="https://browserspy.dk/">BrowserSPY.dk</a></li>
<li><a href="https://fingerprint.pet-portal.eu/">pet-portal cross-browser fingerprinting test</a></li>
<li><a href="http://lcamtuf.coredump.cx/p0f3/">p0f v3</a> (purely passive fingerprinting)</li>
</ul>
</section>
</section>
<section class="appendix">
<h2>Acknowledgements</h2>
<p>
Many thanks to Robin Berjon for ReSpec and to Tobie Langel for Github advice; to the Privacy Interest Group and the Technical Architecture Group for review; to the Tor Browser designers for references and recommendations; and to Christine Runnegar for contributions.
</p>
</section>
</body>
</html>