draft-ietf-dnssd-privacy.xml

<?xml version="1.0" encoding="UTF-8"?>

<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [

<!ENTITY rfc1033 PUBLIC ''
   'http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.1033.xml'>
<!ENTITY rfc1034 PUBLIC ''
   'http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.1034.xml'>
<!ENTITY rfc1035 PUBLIC ''
   'http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.1035.xml'>
<!ENTITY rfc2045 PUBLIC ''
   'http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2045.xml'>
<!ENTITY rfc2119 PUBLIC ''
   'http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2119.xml'>
<!ENTITY rfc2782 PUBLIC ''
   'http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2782.xml'>
<!ENTITY rfc4055 PUBLIC ''
   'http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.4055.xml'>
<!ENTITY rfc4075 PUBLIC ''
   'http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.4075.xml'>
<!ENTITY rfc4279 PUBLIC ''
   'http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.4279.xml'>
<!ENTITY rfc4648 PUBLIC ''
   'http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.4648.xml'>
<!ENTITY rfc5246 PUBLIC ''
   'http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.5246.xml'>
<!ENTITY rfc6762 PUBLIC ''
   'http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6762.xml'>
<!ENTITY rfc6763 PUBLIC ''
   'http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6763.xml'>
<!ENTITY rfc7626 PUBLIC ''
   'http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7626.xml'>
<!ENTITY rfc7844 PUBLIC ''
   'http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7844.xml'>
<!ENTITY rfc7858 PUBLIC ''
   'http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7858.xml'>
<!ENTITY rfc8117 PUBLIC ''
   'http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8117.xml'>
<!ENTITY rfc8094 PUBLIC ''
   'http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8094.xml'>
<!ENTITY rfc8117 PUBLIC ''
   'http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8117.xml'>
<!ENTITY I-D.ietf-tls-tls13 PUBLIC ''  
   "http://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.ietf-tls-tls13.xml">
<!ENTITY I-D.ietf-dnssd-push PUBLIC ''  
   "http://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.ietf-dnssd-push">
<!ENTITY I-D.ietf-dnssd-pairing PUBLIC ''  
   "http://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.ietf-dnssd-pairing">
<!ENTITY I-D.ietf-dnssd-privacyscaling PUBLIC ''  
   "http://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.ietf-dnssd-privacyscaling">
<!ENTITY I-D.ietf-dnssd-prireq PUBLIC ''  
   "http://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.ietf-dnssd-prireq">

<!ENTITY kw14a PUBLIC ''
   "references/reference.kw14a.xml">
<!ENTITY kw14b PUBLIC ''
   "references/reference.kw14b.xml">
]>

<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<?rfc compact="yes"?>
<?rfc toc="yes"?>
<?rfc symrefs="yes"?>
<?rfc sortrefs="yes"?>

<!-- Expand crefs and put them inline -->
<?rfc comments='yes' ?>
<?rfc inline='yes' ?>

<rfc category="std" 
     docName="draft-ietf-dnssd-privacy-05"
     ipr="trust200902">

<front>
    <title abbrev="DNS-SD Privacy Extensions">
      Privacy Extensions for DNS-SD
    </title>

   <author fullname="Christian Huitema" initials="C." surname="Huitema">
      <organization>Private Octopus Inc.</organization>
      <address>
        <postal>
          <street></street>
          <city>Friday Harbor</city>
          <code>98250</code>
          <region>WA</region>
          <country>U.S.A.</country>
        </postal>
        <email>huitema@huitema.net</email>
        <uri>http://privateoctopus.com/</uri>
      </address>
    </author>

   <author fullname="Daniel Kaiser" initials="D." surname="Kaiser">
     <organization>University of Konstanz</organization>
      <address>
        <postal>
          <street> </street>
          <city>Konstanz</city>
          <code>78457</code>
          <region></region>
          <country>Germany</country>
        </postal>
        <email>daniel.kaiser@uni-konstanz.de</email>
      </address>
    </author>

    <date year="2018" />

    <abstract>
        <t>
DNS-SD (DNS Service Discovery) normally discloses information about both the devices offering services and the devices requesting services.
This information includes host names, network parameters, and possibly a further description of the corresponding service instance.
Especially when mobile devices engage in DNS Service Discovery over Multicast DNS at a public hotspot,
a serious privacy problem arises.
</t>
<t>
  We propose to solve this problem by a two-stage approach.
  In the first stage, hosts discover Private Discovery Service Instances via
  DNS-SD using special formats to protect their privacy.
  These service instances correspond to Private Discovery Servers running on peers.
  In the second stage, hosts directly query these Private Discovery Servers via DNS-SD over TLS.
  A pairwise shared secret necessary to establish these connections
  is only known to hosts authorized by a pairing system.
</t>
<t>
  Revisions of this draft are currently considered in the DNSSD working group.
</t>
    </abstract>
</front>

<middle>
<section title="Introduction">
<t>
DNS-SD <xref target="RFC6763" /> over mDNS <xref target="RFC6762" /> enables configurationless 
service discovery in local networks.
It is very convenient for users, but it requires the public exposure 
of the offering and requesting identities along with information about the offered and 
requested services.
Parts of the published information can seriously breach the user's privacy.
These privacy issues and potential solutions are discussed in <xref target="KW14a" />
and <xref target="KW14b" />.
</t>
<t>
There are cases when nodes connected to a network want to provide
or consume services without exposing their identity to the other
parties connected to the same network. Consider for example a
traveler wanting to upload pictures from a phone to a laptop
when connected to the Wi-Fi network of an Internet cafe, or
two travelers who want to share files between their laptops
when waiting for their plane in an airport lounge.
</t>
<t>
We expect that these exchanges will start with a discovery 
procedure using DNS-SD <xref target="RFC6763" /> over mDNS <xref target="RFC6762" />.
One of the devices will publish the availability of a service, such as a picture library
or a file store in our examples. The user of the other device will
discover this service, and then connect to it.
</t>
<t>
When analyzing these scenarios in <xref target="I-D.ietf-dnssd-prireq" />, we find that
the DNS-SD messages leak identifying information such as the instance name,
the host name or service properties. We review the design constraint of a solution
in <xref target="design"/>, and describe the proposed solution in
<xref target="solution"/>.
</t>
<t>
  While we focus on a mDNS-based distribution of the DNS-SD resource records,
  our solution is agnostic about the distribution method and also works with other distribution methods,
  e.g. the classical hierarchical DNS.
</t>
<t>
  The solution presented here relies on 1-1 pairings between clients and servers.
  Discussions during the IETF 101 in London showed that this requirement of a full
  mesh of pairings poses some scalability issues, as explained in
  <xref target="I-D.ietf-dnssd-privacyscaling"/>. The next revision of this draft
  may propose a different mechanism.
</t>
<section title="Requirements">
<t>
  The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
  "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
  document are to be interpreted as described in <xref target="RFC2119" />.
</t>
</section>
</section>
<!-- DNSSD analysis moved to separate document
<section title="Privacy Implications of DNS-SD" anchor="analysis">
<t>
DNS-Based Service Discovery (DNS-SD) is defined in <xref target="RFC6763" />.
It allows nodes to publish the availability of an instance of a service by
inserting specific records in the DNS (<xref target="RFC1033"/>,
<xref target="RFC1034"/>, <xref target="RFC1035"/>) or by publishing
these records locally using
multicast DNS (mDNS) <xref target="RFC6762"/>.
Available services are described using three types of records:
</t>
<t>
<list style="hanging">
<t hangText="PTR Record:">Associates a service type in the domain with
an "instance" name of this service type.
</t>
<t hangText="SRV Record:">Provides the node name, port number, priority and
weight associated with the service instance, in conformance with <xref target="RFC2782" />.
</t>
<t hangText="TXT Record:">Provides a set of attribute-value pairs describing
specific properties of the service instance.
</t>
</list>
</t>
<t>
In the remaining subsections, we will review the privacy issues related to publishing
instance names, node names, service attributes and other data, as well as review 
the implications of using the discovery service as a client.
</t>

<section title="Privacy Implication of Publishing Service Instance Names" anchor="instanceLeak" >
<t>
In the first phase of discovery, the client obtains all
the PTR records associated with a service type in a given naming domain.
Each PTR record contains a Service Instance Name defined in Section 4 of <xref target="RFC6763" />:
</t>

<t>
<figure>
<artwork>
  Service Instance Name = &lt;Instance&gt; . &lt;Service&gt; . &lt;Domain&gt;
</artwork>
</figure>
</t>

<t>
The &lt;Instance&gt; portion of the Service Instance Name is meant to convey
enough information for users of discovery clients to easily select the desired service instance.
Nodes that use DNS-SD over mDNS <xref target="RFC6762" /> in a mobile environment will rely on the specificity
of the instance name to identify the desired service instance.
In our example of users wanting to upload pictures to a laptop in an Internet Cafe, the list of 
available service instances may look like:
</t>
<t>
<figure>
<artwork>
Alice's Images         . _imageStore._tcp . local
Alice's Mobile Phone   . _presence._tcp   . local
Alice's Notebook       . _presence._tcp   . local
Bob's Notebook         . _presence._tcp   . local
Carol's Notebook       . _presence._tcp   . local
</artwork>
</figure>
</t>
<t>
Alice will see the list on her phone and understand intuitively that she should
pick the first item. The discovery will "just work".
</t>
<t>
However, DNS-SD/mDNS will reveal to anybody that Alice is currently visiting the Internet Cafe.
It further discloses the fact that she uses two devices, shares an image store, 
and uses a chat application supporting the
_presence protocol on both of her devices. She might currently chat with Bob or Carol, 
as they are also using a _presence supporting chat application.
This information is not just available to devices actively browsing for and offering 
services, but to anybody passively listening to the network traffic.
</t>
</section>

<section title="Privacy Implication of Publishing Node Names">
<t>
The SRV records contain the DNS name of the node publishing the
service. Typical implementations construct this DNS name by
concatenating the "host name" of the node with the name of the 
local domain. The privacy implications of this
practice are reviewed in <xref target="RFC8117" />.
Depending on naming practices, the host name is either a strong 
identifier of the device, or at a minimum a partial identifier.
It enables tracking of both the device, and, by extension, the device's owner.
</t>
</section>

<section title="Privacy Implication of Publishing Service Attributes">
<t>
The TXT record's attribute-value pairs contain information on the characteristics of
the corresponding service instance.
This in turn reveals information
about the devices that publish services. The amount of information
varies widely with the particular service and its implementation:
</t>
<t>
<list style="symbols">
<t>
Some attributes like the paper size available in a printer, are the
same on many devices, and thus only provide limited information
to a tracker.
</t>
<t>
Attributes that have freeform values, such as the name of a directory,
may reveal much more information.
</t>
</list>
</t>
<t>
Combinations of attributes have more information power than specific attributes,
and can potentially be used for "fingerprinting" a specific device.
</t>

<t>
Information contained in TXT records does not only breach privacy by making devices
trackable, but might directly contain private information about the user.
For instance the _presence service reveals the "chat status" to everyone in the same network.
Users might not be aware of that.
</t>

<t>
  Further, TXT records often contain version information about services allowing potential attackers
  to identify devices running exploit-prone versions of a certain service.
</t>

</section>

<section title="Device Fingerprinting" anchor="serverFingerprint">
<t>
The combination of information published in DNS-SD has the potential to
provide a "fingerprint" of a specific device. Such information includes: 
</t>
<t>
<list style="symbols">
<t>
The list of services published by the device, which can be retrieved because the
SRV records will point to the same host name.
</t>
<t>
The specific attributes describing these services.
</t>
<t>
The port numbers used by the services.
</t>
<t>
The values of the priority and weight attributes in the SRV records.
</t>
</list>
</t>
<t>
This combination of services and attributes will often be sufficient to identify
the version of the software running on a device. If a device publishes
many services with rich sets of attributes, the combination may be
sufficient to identify the specific device.
</t>

<t>
A sometimes heard argument is that devices providing services can be identified
by observing the local traffic, and that trying to hide the presence of the service
is futile.
This argument, however, does not carry much weight because
<list style="numbers">
<t>
proving privacy at the discovery layer is of the essence for enabling automatically configured privacy-preserving
network applications. Application layer protocols are not forced to leverage the offered privacy,
but if device tracking is not prevented at the deeper layers, including the service discovery layer,
obfuscating a certain service's protocol at the application layer is futile.
</t>
<t>
Further, even if the application layer does not protect privacy,
it is hard to record and analyse the unicast traffic (which most applications will generate)
compared to just listening to the multicast messages sent by DNS-SD/mDNS.
</t>
</list>
The same argument can be extended to say that the pattern of services
offered by a device allows for fingerprinting the device. This may or may not
be true, since we can expect that services will be designed or updated to
avoid leaking fingerprints. In any case, the design of the discovery
service should avoid making a bad situation worse, and should as much as
possible avoid providing new fingerprinting information.
</t>
</section>

<section title="Privacy Implication of Discovering Services" anchor="clientPrivacy" >
<t>
The consumers of services engage in discovery, and in doing so
reveal some information such as the list of services they
are interested in and the domains in which they are looking for the
services. When the clients select specific instances of services,
they reveal their preference for these instances. This can be benign if
the service type is very common, but it could be more problematic
for sensitive services, such as for example some private messaging services.
</t>
<t>
One way to protect clients would be to somehow encrypt the requested service types.
Of course, just as we noted in <xref target="serverFingerprint"/>, traffic
analysis can often reveal the service. 
</t>
</section>
</section>
end of analysis -->

<section title="Design of the Private DNS-SD Discovery Service" anchor="design" >
<t>
In this section, we present the design of a two-stage solution that enables private
use of DNS-SD, without affecting existing users.
The solution is largely based on the architecture proposed in <xref target="KW14b" /> and <xref target="K17" />, which separates the 
general private discovery problem in three components.
The first component is an offline pairing mechanism,
which is performed only once per pair of users.
It establishes a shared secret over an authenticated channel, allowing devices to
authenticate using this secret without user interaction at any later point in time.
We use the pairing system proposed in <xref target="I-D.ietf-dnssd-pairing" />.
</t>

<t>
  The further two components are online (in contrast to pairing they are performed anew each time joining a network)
  and compose the two service discovery stages, namely
</t>
<t>
<list style="symbols">
<t>
Discovery of the Private Discovery Service -- the first stage -- in which
hosts discover the Private Discovery Service (PDS), a special service offered by every 
host supporting our extension.
After the discovery, hosts connect to the PSD offered by paired peers.
</t>
<t>
Actual Service Discovery -- the second stage -- is performed
through the Private Discovery Service, which only accepts encrypted messages associated 
with an authenticated session; thus not compromising privacy.
</t>
</list>
</t>
<t>
  In other words, the hosts first discover paired peers and then directly engage in 
privacy preserving service discovery.
</t>

<t>
  The stages are independent with respect to means used for transmitting the necessary data.
  While in our extension the messages for the first stage are transmitted using IP multicast, 
  the messages for
  the second stage are transmitted via unicast.
  One could also imagine using a Distributed Hash Table for the first stage, being completely 
  independent of multicast.
</t>


<section title="Device Pairing" anchor="pairingDesign" >


<t>
  Any private discovery solution needs to differentiate
  between authorized devices, which are allowed to get information about discoverable entities,
  and other devices, which should not be aware of the availability of private entities.
  The commonly used solution to this problem is establishing a  "device pairing".
</t>

<t>
  Device pairing has to be performed only once per pair of users. This is important 
  for user-friendliness, as it is the only step that demands user-interaction.
  After this single pairing, privacy preserving service discovery works fully automatically.
  In this document, we utilize <xref target="I-D.ietf-dnssd-pairing" /> as the pairing mechanism.
</t>

<t>
  The pairing yields a mutually authenticated shared secret, and optionally
  mutually authenticated public keys or certificates added to a local web of trust.
  Public key technology has many advantages, but shared secrets are typically easier to
  handle on small devices.
</t>

</section>

<section title="Discovery of the Private Discovery Service" anchor="stage1Design">
<t>
The first stage of service discovery is to check whether 
instances of compatible Private Discovery Services are available in the local scope.
The goal of that stage is to identify devices that share a pairing with the querier, and
are available locally.
The service instances can be browsed using regular DNS-SD procedures,
and then filtered so that only instances offered by paired devices are retained.
</t>

<section title="Obfuscated Instance Names" anchor="specHashProof" >
<t>
The instance names for the Private Discovery Service are obfuscated, so that authorized
peers can associate the instance with its publisher, but unauthorized peers can only
observe what looks like a random name. To achieve this, the names are
composed as the concatenation of
a nonce and a proof, which is composed by hashing the nonce with a pairing key:
</t>
<t>
<figure>
<artwork>
   PrivateInstanceName = &lt;nonce&gt;|&lt;proof&gt;
   proof = hash(&lt;nonce&gt;|&lt;key&gt;)
</artwork>
</figure>
</t>
<t>
The publisher will publish as many instances as it has established pairings. 
</t>
<t>
The discovering party that looks for instances of the service will receive lists of advertisements from
nodes present on the network. For each advertisement, it will parse the instance name, and then,
for each available pairing key,
compares the proof to the hash of the nonce concatenated with this pairing key.
If there is no match, it discards the instance name. If there is a match, it has discovered
a peer.
</t>
</section>

<section title="Using a Predictable Nonce" anchor="predictNonce">
<t>
Assume that there are N nodes on the local scope, and that each node has on average M pairings. 
Each node will publish on average M records, and the node engaging in discovery may have
to process on average N*M instance names. The discovering node will have to compute on average
M potential hashes for each nonce. The number of hash computations would scale as O(N*M*M),
which means that it could cause a significant drain of resource in large networks.
</t>
<t>
In order to minimize the amount of computing resource, we suggest that the nonce be derived
from the current time, for example set to a representation of the current time rounded 
to some period.
With this convention, receivers can predict the nonces that will appear in the published instances.
</t>
<t>
The publishers will have to create new records at the end of each rounding period. 
If the rounding period is set too short, they will have to repeat that very often,
which is inefficient. On the other hand, if the rounding period is too long, the 
system may be exposed to replay attacks. We initially proposed a value of about
5 minutes, which would work well for the mDNS variant of DNS-SD. However, this may
cause an excessive number of updates for the DNS server based version of DNS-SD.
We propose to set a value of about 30 minutes,
which seems to be a reasonable compromise.
</t>

<t>
  Receivers can pre-calculate all the M relevant proofs once per time interval and then
  establish a mapping from the corresponding instance names to the pairing data in form of a hash table.
  These M relevant proofs are the proofs resulting from hashing a host's M pairing keys alongside the current nonce.
  Each time they receive an instance name, they can test in O(1) time if the received service information is relevant or not.
</t>

<t>
Unix defines a 32 bit time stamp as the number of seconds elapsed since January 1st, 1970 
not counting leap seconds. The most significant 20 bits of this 32 bit number represent
the number of 2048 seconds intervals since the epoch. 2048 seconds correspond to 34 minutes
and 8 seconds, which is close enough to our design goal of 30 minutes. We will thus
use this 20 bit number as nonce, which for simplicity will be padded zeroes to 24 bits
and encoded in 3 octets.
</t>

<t>
  For coping with time skew, receivers pre-calculate proofs for the respective next time interval
  and store hash tables for the last, the current, and the next time interval.
  When receiving a service instance name, receivers first check whether the nonce corresponds
  to the current, the last or the next time interval, and if so, check whether the instance name is
  in the corresponding hash table.
  For (approximately) meeting our design goal of 5 min validity, the last time interval may only be
  considered if the current one is less than half way over and the next time interval may only be considered if
  the current time interval is more than half way over.
</t>

<t>
Publishers will need to compute O(M) hashes at most once per time stamp interval.
If records can be created "on the fly", publishers will only need to perform that computation
upon receipt of the first query during a given interval, and cache the computed results
for the remainder of the interval. There are however scenarios in which
records have to be produced in advance, for example when records are published within
a scope defined by a domain name and managed by a "classic" DNS server. In such scenarios,
publishers will need to perform the computations and publication exactly once per time stamp 
interval.
</t>
</section>

<section title="Using a Short Proof" anchor="shortProof">
<t>
Devices will have to publish as many instance names as they have peers.
The instance names will have to be represented via a text string,
which means that the binary concatenation of nonce and proof will
have to be encoded using a binary-to-text conversion such as
BASE64  (<xref target="RFC2045" /> section 6.8) or BASE32
(<xref target="RFC4648" /> section 6).
</t>
<t>
Using long proofs, such as the full output of SHA256 <xref target="RFC4055"/>,
would generate fairly long instance names: 48 characters using BASE64, or 56
using BASE32. These long names would inflate the network traffic required 
when discovering the privacy service. They would also limit the number of
DNS-SD PTR records that could be packed in a single 1500 octet sized packet,
to 23 or fewer with BASE64, or 20 or fewer with BASE32.
</t>
<t>
Shorter proofs lead to shorter messages, which is more efficient as long as 
we do not encounter too many collisions. A collision will happen if the proof
computed by the publisher using one key matches a proof computed by a
receiver using another key. If
a receiver mistakenly believes that a proof fits one of its peers, it will 
attempt to connect to the service as explained in section <xref target="solution:discoverPds" />
but in the absence of the proper pairwise shared key, the connection will fail.
This will not create an actual error, but the probability of such events should be 
kept low.
</t>
<t>
The following table provides the probability that a discovery agent maintaining 100
pairings will observe a collision after receiving 100000 advertisement records. It
also provides the number of characters required for the encoding of the corresponding
instance name in BASE64 or BASE32, assuming 24 bit nonces.
</t>

<texttable anchor="variousProofLengths">
<ttcol align='center'>Proof</ttcol>
<ttcol align='center'>Collisions</ttcol>
<ttcol align='center'>BASE64</ttcol>
<ttcol align='center'>BASE32</ttcol>
<c>24</c><c>5.96046%</c><c>8</c><c>16</c>
<c>32</c><c>0.02328%</c><c>11</c><c>16</c>
<c>40</c><c>0.00009%</c><c>12</c><c>16</c>
<c>48</c><c>3.6E-09</c><c>12</c><c>16</c>
<c>56</c><c>1.4E-11</c><c>15</c><c>16</c>
</texttable>

<t>
The table shows that for a proof, 24 bits would be too short. 32 bits might
be long enough, but the BASE64 encoding requires padding if the input is not an
even multiple of 24 bits, and BASE32 requires padding if the input is not a 
multiple of 40 bits. Given that, the desirable proof lengths are thus 48 bits
if using BASE64, or 56 bits if using BASE32. The resulting instance name will be
either 12 characters long with BASE64, allowing 54 advertisements in an 1500 byte mDNS message,
or 16 characters long with BASE32, allowing 47 advertisements per message.
</t>
<t>
In the specification section, we will assume BASE64, and 48 bit proofs composed of the
first 6 bytes of a SHA256 hash.
</t>
</section> 

<section title="Direct Queries" >
<t>
The preceding sections assume that the discovery is performed using the classic
DNS-SD process, in which a query for all available "instance names" of a service
provides a list of PTR records. The discoverer will then select the instance names
that correspond to its peers, and request the SRV and TXT records corresponding
to the service instance, and then obtain the relevant A or AAAA records. This is
generally required in DNS-SD because the instance names are not known in advance,
but for the Private Discovery Service the instance names can be predicted, and
a more efficient Direct Query method can be used.
</t>
<t>
At a given time, the node engaged in discovery can predict the nonce that its peer
will use, since that nonce is composed by rounding the current time. The node can also
compute the proofs that its peers might use, since it knows the nonce and the keys.
The node can thus build a list of instance names, and directly query the SRV records
corresponding to these names. If peers are present, they will answer directly. 
</t>
<t>
This "direct query" process will result in fewer network messages than the regular
DNS-SD query process in some circumstances, depending on the number of peers per
node and the number of nodes publishing the presence discovery service in the 
desired scope.
</t>
<t>
When using mDNS, it is possible to pack multiple queries in a single broadcast message.
Using name compression and 12 characters per instance name, it is possible to pack
70 queries in a 1500 octet mDNS multicast message. It is also possible to request
unicast replies to the queries, resulting in significant efficiency gains in wireless
networks.
</t>

</section>

</section> <!-- End of stage1Design -->


<section title="Private Discovery Service" >
<t>
The Private Discovery Service discovery allows
discovering a list of available paired devices, and verifying that either party knows the corresponding 
shared secret. At that point, the querier can engage in a series of
directed discoveries.
</t>
<t>
We have considered defining an ad-hoc protocol for the private discovery service, but
found that just using TLS would be much simpler. The directed Private Discovery Service
is just a regular DNS-SD service, accessed over TLS, using the encapsulation of DNS over
TLS defined in <xref target="RFC7858" />.
The main difference with plain DNS over TLS is the need for an authentication based on pre-shared keys.
</t>
<t>
We assume that the pairing process has provided each pair of authorized client and server
with a shared secret. We can use that shared secret to provide mutual authentication of
clients and servers using "Pre-Shared Key" authentication, as defined in <xref target="RFC4279" />
and incorporated in the latest version of TLS <xref target="I-D.ietf-tls-tls13" />.
</t>
<t>
One difficulty is the reliance on a key identifier in the protocol. 
For example, in TLS 1.3 the PSK extension is defined as:
</t>
<t>
<figure>
<artwork>
   opaque psk_identity&lt;0..2^16-1&gt;;

   struct {
       select (Role) {
           case client:
               psk_identity identities&lt;2..2^16-1&gt;;

           case server:
               uint16 selected_identity;
       }
   } PreSharedKeyExtension
</artwork>
</figure>
</t>
<t>
According to the protocol, the PSK identity is passed in clear text at the beginning of
the key exchange. This is logical, since server and clients need to identify the secret
that will be used to protect the connection. But if we used a static identifier for the
key, adversaries could use that identifier to track server and clients. The solution
is to use a time-varying identifier, constructed exactly like the "proof" described in
<xref target="stage1Design" />, by concatenating a nonce and the hash of the nonce with
the shared secret.
</t>


<section title="A Note on Private DNS Services" >
<t>
Our solution uses a variant of the DNS over TLS  protocol 
<xref target="RFC7858" /> defined by the DNS Private Exchange working group
(DPRIVE). DPRIVE further published an UDP variant,
DNS over DTLS <xref target="RFC8094" />, which
would also be a candidate.
</t>
<t>
DPRIVE and Private Discovery, however, solve two somewhat different
problems. While DPRIVE is concerned with the confidentiality of DNS transactions
addressing the problems outlined in <xref target="RFC7626" />,
DPRIVE does not address the confidentiality or privacy issues with
publication of services, and is not a direct solution to DNS-SD privacy:
</t>
<t>
<list style="symbols" >
<t>
Discovery queries are scoped by the domain name within which services
are published. As nodes move and visit arbitrary networks, there
is no guarantee that the domain services for these networks
will be accessible using DNS over TLS or DNS over DTLS.
</t>
<t>
Information placed in the DNS is considered public. Even if
the server does support DNS over TLS, third parties will 
still be able to discover the content of PTR, SRV and TXT
records.
</t>
<t>
Neither DNS over TLS nor DNS over DTLS applies to mDNS.
</t>
</list>
</t>
<t>
In contrast, we propose using mutual authentication of the client and server
as part of the TLS solution, to ensure that only authorized parties learn
the presence of a service.
</t>
</section>


 </section> <!-- End of Private Discovery Service -->


<section title="Randomized Host Names" >
<t>
Instead of publishing their actual host names in the SRV records, nodes 
could publish randomized host names. That is the solution argued for
in <xref target="RFC8117" />.
</t>
<t>
Randomized host names will prevent some of the tracking.
Host names are typically not visible by the users, and
randomizing host names will probably not cause much
usability issues.
</t>
</section>


<section title="Timing of Obfuscation and Randomization" anchor="timing" >
<t>
It is important that the obfuscation of instance names is performed at the right time,
and that the obfuscated names change in synchrony with other identifiers,
such as MAC Addresses, IP Addresses or host names.
If the randomized host name changed
but the instance name remained constant, an adversary would have no difficulty
linking the old and new host names. Similarly, if IP or MAC addresses changed but 
host names remained constant, the adversary could link the new addresses to the
old ones using the published name.
</t>
<t>
The problem is handled in <xref target="RFC8117" />, 
which recommends to pick a new random host name at the time of connecting to 
a new network. New instance names for the Private Discovery Services should be
composed at the same time.
</t>

</section>

</section> <!-- end of Design section -->

<section title="Private Discovery Service Specification" anchor="solution" >
<t>
The proposed solution uses the following components:
</t>

<t>
<list style="symbols">
<t>
Host name randomization to prevent tracking.
</t>
<t>
Device pairing yielding pairwise shared secrets.
</t>
<t>
A Private Discovery Server (PDS) running on each host.
</t>
<t>
Discovery of the PDS instances using DNS-SD.
</t>
</list>
</t>

<t>
These components are detailed in the following subsections.
</t>

<section title="Host Name Randomization" >
<t>
Nodes publishing services with DNS-SD and concerned about their privacy MUST
use a randomized host name. The randomized name MUST be changed when
network connectivity changes, to avoid the correlation issues described in
<xref target="timing" />. The randomized host name MUST be used in
the SRV records describing the service instance, and the corresponding 
A or AAAA records MUST be made available through DNS or mDNS, within the
same scope as the PTR, SRV and TXT records used by DNS-SD.
</t>
<t>
If the link-layer address of the network connection is properly obfuscated 
(e.g. using MAC Address Randomization),
the Randomized Host Name MAY be computed using the algorithm described
in section 3.7 of <xref target="RFC7844" />. 
If this is not possible, the randomized host name SHOULD be constructed by simply
picking a 48 bit random number meeting the 
Randomness Requirements for Security expressed in <xref target="RFC4075" />,
and then use the hexadecimal representation of this number as the
obfuscated host name.
</t>
</section>

<section title="Device Pairing" anchor="solution:pairing">
<t>
  Nodes that want to leverage the Private Directory Service for private service discovery among peers
  MUST share a secret with each of these peers. Each shared secret MUST be a 256 bit randomly chosen number.
  We RECOMMEND using the pairing mechanism proposed in
  <xref target="I-D.ietf-dnssd-pairing" /> to establish these secrets.
</t>

</section>

<section title="Private Discovery Server" anchor="solution:pns">
<t>
  A Private Discovery Server (PDS) is a minimal DNS server running on each host.
  Its task is to offer resource records corresponding to private services only to
  authorized peers. These peers MUST share a secret with the host 
  (see <xref target="solution:pairing" />). To ensure privacy of the requests, the service is 
  only available over TLS <xref target="RFC5246" />, and the shared secrets
  are used to mutually authenticate peers and servers.
</t>
<t>
  The Private Name Server SHOULD support DNS push notifications <xref target="I-D.ietf-dnssd-push" />,
  e.g. to facilitate an up-to-date contact list in a chat application without polling.
</t>

<section title="Establishing TLS Connections" anchor="solution:tls" >
<t>
  The PDS MUST only answer queries via DNS over TLS <xref target="RFC7858"/> and MUST use
  a PSK authenticated TLS handshake <xref target="RFC4279"/>. The client and server
  SHOULD negotiate a forward secure cipher suite such as DHE-PSK or ECDHE-PSK when 
  available. The shared secret exchanged during pairing MUST be used as PSK. To guarantee
  interoperability, implementations of the Private Name Server MUST support
  TLS_PSK_WITH_AES_256_GCM_SHA384.
</t>

<t>
When using the PSK based authentication, the "psk_identity" parameter identifying
the pre-shared key MUST be identical to the "Instance Identifier" defined in
<xref target="solution:publishPds" />, i.e. 24 bit nonce and 48 bit proof encoded
in BASE64 as 12 character string. The server will use the pairing key associated with 
this instance identifier.
</t>

</section>

</section> <!-- end of Private Discovery Server -->

<section title="Publishing Private Discovery Service Instances" anchor="solution:publishPds" >
<t>
Nodes that provide the Private Discovery Service SHOULD advertise their
availability by publishing instances of the service through DNS-SD.
</t>
<t>
The DNS-SD service type for the Private Discovery Service is "_pds._tcp".
</t>
<t>
Each published instance describes one server and one pairing.
In the case where a node manages more than one pairing, it should
publish as many instances as necessary to advertise the PDS to all paired peers.
</t>
<t>
Each instance name is composed as follows:
</t>
<t>
<figure>
<artwork>
   pick a 24 bit nonce, set to the 20 most significant bits of the
   32 bit Unix GMT time padded with 4 zeroes.

      For example, on August 22, 2017 at 20h 4 min and 54 seconds
      international time, the Unix 32 bit time had the
      hexadecimal value 0x599C8E68. The corresponding nonce
      would be set to the 24 bits: 0x599C80.

   compute a 48 bit proof:
      proof = first 48 bits of HASH(&lt;nonce&gt;|&lt;pairing key&gt;)

   set the 72 bit binary identifier as the concatenation
   of nonce and proof

   set instance_name = BASE64(binary identifier)
</artwork>
</figure>
</t>
<t>
In this formula, HASH SHOULD be the function SHA256 
defined in <xref target="RFC4055"/>, and BASE64 is defined 
in section 6.8 of <xref target="RFC2045" />. The concatenation
of a 24 bit nonce and 48 bit proof result in a 72 bit string.
The BASE64 conversion is 12 characters long per
<xref target="RFC6763"/>.
</t>

</section>

<section title="Discovering Private Discovery Service Instances"  anchor="solution:discoverPds" >
<t>
Nodes that wish to discover Private Discovery Service Instances SHOULD issue a DNS-SD 
discovery request for the service type "_pds._tcp". They MAY, as an alternative, use the Direct
Discovery procedure defined in <xref target="solution:discoverDirectPds" />. 
When using the Direct Discovery procedure over mDNS, nodes SHOULD always set the QU-bit 
(unicast response requested, see <xref target="RFC6762"/> Section 5.4)
because responses related to a "_pds._tcp" instance are only relevant for the querying node itself.
</t>
<t>
When nodes send a DNS-SD discovery request, they will receive in response a series of PTR records, 
each providing the name of one of the instances present in the scope.
</t>

<t>
  For each time interval, the querier SHOULD pre-calculate a hash table mapping instance names to pairings
  according to the following conceptual algorithm:
</t>
<figure>
<artwork>
  nonce = 20 bit rounded time stamp of the \
    respective next time interval padded to \
    24 bits with four zeroes
  for each available pairing
    retrieve the key Xj of pairing number j
    compute F = first 48 bits of hash(nonce, Xj)
    construct the binary instance_name as described \
      in the previous section
    instance_names[nonce][instance_name] = Xj;
</artwork>
</figure>
<t>
  The querier SHOULD store the hash tables for the previous, the current, and the next time interval.
</t>
<t>
The querier SHOULD examine each instance to see whether it corresponds to one
of its available pairings, according to the following conceptual algorithm:
</t>
<t>
<figure>
<artwork>
   for each received instance_name:
      convert the instance name to binary using BASE64
      if the conversion fails, 
         discard the instance.
      if the binary instance length is not 72 bits,
         discard the instance.

      nonce = first 24 bits of binary.

      Check that the 4 least significant bits of the nonce
      have the value 0, and that the 20 most significant
      bits of the nonce match the first 20 bits of
      the current time, or the previous interval (20 bit number
      minus 1) if the current interval is less than half over,
      or the next interval (20 bit number plus 1) if the
      current interval is more than half over. If the
      nonce does not match an acceptable value, discard
      the instance.

      if ((Xj = instance_names[nonce][instance_name]) != null)
        mark the pairing number j as available
</artwork>
</figure>
</t>
<t>
The check of the current time is meant to mitigate replay attacks, while not
mandating a time synchronization precision better than 15 minutes.
</t>
<t>
Once a pairing has been marked available, the querier SHOULD 
try connecting to the corresponding instance, using the selected key.
The connection is likely to succeed, but it MAY fail for a variety
of reasons. One of these reasons is the probabilistic nature of the
proof, which entails a small chance of "false positive" match. This
will occur if the hash of the nonce with two different keys produces
the same result. In that case, the TLS connection will fail with
an authentication error or a decryption error.
</t>

</section>

<section title="Direct Discovery of Private Discovery Service Instances"  anchor="solution:discoverDirectPds" >
<t>
Nodes that wish to discover Private Discovery Service Instances MAY use the
following Direct Discovery procedure instead of the regular DNS-SD Discovery 
explained in <xref target="solution:discoverPds" />. 
</t>
<t>
To perform Direct Discovery,
nodes should compose a list of Private Discovery Service Instances Names.
There will be one name for each pairing available to the node. 
The Instance name for each name will
be composed of a nonce and a proof, using the algorithm specified in
<xref target="solution:publishPds" />.
</t>
<t>
The querier will issue SRV record queries for each of these names. The queries will 
only succeed if the corresponding instance is present, in which case a pairing is
discovered. After that, the querier SHOULD try connecting to the corresponding instance, 
as explained in <xref target="solution:publishPds" />.
</t>
</section>

<section title="Using the Private Discovery Service" >
<t>
Once instances of the Private Discovery Service have been discovered, 
peers can establish TLS connections and send DNS requests over
these connections, as specified in DNS-SD.
</t>
</section>

</section> <!-- end of Private Discovery Server -->

<section title="Security Considerations">
<t> 
This document specifies a method for protecting the privacy of 
nodes that offer and query for services. This is especially useful when operating
in a public space.
Hiding the identity of the publishing nodes prevents
some forms of "targeting" of high value nodes. However,
adversaries can attempt various attacks to break the anonymity
of the service, or to deny it. A list of these attacks and their
mitigations are described in the following sections.
</t>

<section title="Attacks Against the Pairing System" >
<t>
There are a variety of attacks against pairing systems, which may
result in compromised pairing secrets. If an adversary manages to
acquire a compromised key, the adversary will be able to perform 
private service discovery according to <xref target="solution:discoverPds" />.
This will allow tracking of the service. The adversary will also
be able to discover which private services are available for the compromised pairing.
</t>
<t>
Attacks on pairing systems are detailed in <xref target="I-D.ietf-dnssd-pairing" />.
</t>
</section>

<section title="Denial of Discovery of the Private Discovery Service" >
<t>
The algorithm described in <xref target="solution:discoverPds" /> scales as
O(M*N), where M is the number of pairings per node and N is the number of nodes in
the local scope. Adversaries can attack this service by publishing "fake"
instances, effectively increasing the number N in that scaling equation.
</t>
<t>
Similar attacks can be mounted against DNS-SD: creating fake instances
will generally increase the noise in the system and make discovery less
usable. Private Discovery Service discovery SHOULD use the same
mitigations as DNS-SD.
</t>
<t>
The attack could be amplified if the clients needed to compute proofs for
all the nonces presented in Private Discovery Service Instance names. This
is mitigated by the specification of nonces as rounded time stamps in
<xref target="solution:discoverPds" />.
If we assume that timestamps must not be too old, there
will be a finite number of valid rounded timestamps at any time. Even
if there are many instances present, they would all pick their nonces
from this small number of rounded timestamps, and a smart client
will make sure that proofs are only computed once per valid
time stamp.
</t>
</section>

<section title="Replay Attacks Against Discovery of the Private Discovery Service" >
<t>
Adversaries can record the service instance names published by
Private Discovery Service instances, and replay them later in different
contexts. Peers engaging in discovery can be misled into believing
that a paired server is present. They will attempt to connect to the
absent peer, and in doing so will disclose their presence in a 
monitored scope.
</t>
<t>
The binary instance identifiers defined in <xref target="solution:publishPds"/>
start with 24 bits encoding the most significant bits of the "UNIX" time. In order to
protect against replay attacks, clients SHOULD verify that this time
is reasonably recent, as specified in <xref target="solution:discoverPds" />.
</t>
</section>


<section title="Denial of Private Discovery Service" >
<t>
The Private Discovery Service is only available through a 
mutually authenticated TLS connection, which provides state-of-the-art protection mechanisms.
However, adversaries can mount a denial of service 
attack against the service. In the absence of shared secrets,
the connections will fail, but the servers will expend some
CPU cycles defending against them.
</t>
<t>
To mitigate such attacks, nodes SHOULD restrict the 
range of network addresses from which they accept connections,
matching the expected scope of the service. 
</t>
<t>
This mitigation will not prevent denial of service attacks performed by locally connected 
adversaries; but protecting against local denial of service attacks is generally very difficult.
For example, local attackers can also attack mDNS and DNS-SD by generating a large number of
multicast requests.
</t>

</section>

<section title="Replay Attacks against the Private Discovery Service" >
<t>
Adversaries may record the PSK Key Identifiers used in successful
connections to a private discovery service. They could attempt
to replay them later against nodes advertising the private 
service at other times or at other locations. If the PSK identifier
is still valid, the server will accept the TLS connection, and in doing 
so will reveal being the same server observed at a previous time or
location.
</t>
<t>
The PSK identifiers defined in <xref target="solution:tls"/>
start with the 24 most significant bits of the "UNIX" time. In order to
mitigate replay attacks, servers SHOULD verify that this time
is reasonably recent, and fail the connection if it is too old,
or if it occurs too far in the future. 
</t>
<t>
The processing of
timestamps is however affected by the accuracy of computer clocks. 
If the check is too strict, reasonable connections could fail. To
further mitigate replay attacks, servers MAY record the list of 
valid PSK identifiers received in a recent past, and fail connections
if one of these identifiers is replayed.
</t>
</section>


<section title="Replay attacks and clock synchronization" >
<t>
The mitigation of replay attacks relies on verification of the time
encoded in the nonce. This verification assumes that the hosts engaged
in discovery have a reasonably accurate sense of the current time.
</t>
</section>

<section title="Fingerprinting the number of published instances" >
<t>
Adversaries could monitor the number of instances published by a
particular device, which in the absence of mitigations will reflect
the number of pairings established by that device. This number
will probably vary between 1 and maybe 100, providing the adversary
with maybe 6 or 7 bits of input in a fingerprinting algorithm.
</t>
<t>
Devices MAY protect against this fingerprinting by publishing
a number of
"fake" instances in addition to the real ones. The fake instance
identifiers will contain the same nonce as the genuine instance
identifiers, and random bits instead of the proof. Peers should
be able to quickly discard these fake instances, as the proof
will not match any of the values that they expect. One plausible
padding strategy is to ensure that the total number of published
instances, either fake or genuine, matches one of a few values
such as 16, 32, 64, or higher powers of 2.
</t>
</section>

</section>

<section title="IANA Considerations" anchor="iana">
<t> 
This draft does not require any IANA action.
</t> 
</section>

<section title="Acknowledgments">
    <t>
This draft results from initial discussions with Dave Thaler, and encouragements from 
the DNS-SD working group members. We would like to thank Stephane Bortzmeyer and 
Ted Lemon for their
detailed reviews of the working draft.
    </t>
</section>

</middle>

<back>
<references title="Normative References">
       &rfc2045;
       &rfc2119;
       &rfc4055;
       &rfc4075;
       &rfc6763;
       &rfc4279;
       &rfc5246;
</references>
<references title="Informative References">
       &rfc4648;
       &rfc6762;
       &rfc7626;
       &rfc7844;
       &rfc7858;
       &rfc8117;
       &rfc8094;
       &I-D.ietf-tls-tls13;
       &I-D.ietf-dnssd-push;
       &I-D.ietf-dnssd-pairing;
       &I-D.ietf-dnssd-privacyscaling;
       &I-D.ietf-dnssd-prireq;

<reference anchor="KW14a" target="http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=7011331">
  <front>
    <title>Adding Privacy to Multicast DNS Service Discovery</title>
    <author initials="D." surname="Kaiser" fullname="Daniel Kaiser">
      <organization/>
    </author>
    <author initials="M." surname="Waldvogel" fullname="Marcel Waldvogel">
      <organization/>
    </author>
    <date year="2014"/>
  </front>
  <seriesInfo name="DOI" value="10.1109/TrustCom.2014.107"/>
</reference>

<reference anchor="KW14b" target="http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=7056899">
  <front>
    <title>Efficient Privacy Preserving Multicast DNS Service Discovery</title>
    <author initials="D." surname="Kaiser" fullname="Daniel Kaiser">
      <organization/>
    </author>
    <author initials="M." surname="Waldvogel" fullname="Marcel Waldvogel">
      <organization/>
    </author>
    <date year="2014"/>
  </front>
  <seriesInfo name="DOI" value="10.1109/HPCC.2014.141"/>
</reference>

<reference anchor="K17" target="http://nbn-resolving.de/urn:nbn:de:bsz:352-0-422757">
  <front>
    <title>Efficient Privacy-Preserving Configurationless Service Discovery Supporting Multi-Link Networks</title>
    <author initials="D." surname="Kaiser" fullname="Daniel Kaiser">
      <organization/>
    </author>
    <date year="2017"/>
  </front>
</reference>

</references>  

</back>
</rfc>