From afaf0d9282c7150127cd74529dbfba31b991bd04 Mon Sep 17 00:00:00 2001 From: Quinn Damerell Date: Fri, 13 Dec 2024 21:47:51 -0800 Subject: [PATCH] Updating some logic to handle absoulte URLs better. --- homeway/homeway/WebStream/headerimpl.py | 56 ++++++++- .../homeway/WebStream/webstreamhttphelper.py | 11 +- homeway/homeway/httprequest.py | 1 + homeway/homeway/mdns.py | 107 ++++++++++++------ 4 files changed, 135 insertions(+), 40 deletions(-) diff --git a/homeway/homeway/WebStream/headerimpl.py b/homeway/homeway/WebStream/headerimpl.py index 8296e16..3ff295a 100644 --- a/homeway/homeway/WebStream/headerimpl.py +++ b/homeway/homeway/WebStream/headerimpl.py @@ -4,6 +4,7 @@ from ..streammsgbuilder import StreamMsgBuilder from ..httprequest import HttpRequest +from ..Proto.PathTypes import PathTypes from ..Proto.HttpInitialContext import HttpInitialContext # Indicates the base protocol, not if it's secure or not. @@ -21,7 +22,8 @@ class HeaderHelper: @staticmethod def GatherRequestHeaders(logger:logging.Logger, httpInitialContextOptional:HttpInitialContext, protocol) : - hostAddress = HttpRequest.GetDirectServiceAddress() + # Get the correct host address for this request type. + hostAddress = HeaderHelper._HostHostAddress(logger, httpInitialContextOptional) # Get the count of headers in the message. sendHeaders = {} @@ -117,6 +119,56 @@ def GatherRequestHeaders(logger:logging.Logger, httpInitialContextOptional:HttpI return sendHeaders + # Determine the host address. + # If this is an absolute URL, we need to use the host from the URL. + @staticmethod + def _HostHostAddress(logger:logging.Logger, httpInitialContextOptional:HttpInitialContext) -> str: + + # Start with the default host address for this device. + # If we can't get the path type, we use it. + hostAddress = HttpRequest.GetDirectServiceAddress() + if httpInitialContextOptional is None: + return hostAddress + + pathType = httpInitialContextOptional.PathType() + if pathType != PathTypes.Absolute: + return hostAddress + + # If we have an absolute path, we need to parse the host out of it, + # because we don't want to use this device's host name as the host. + try: + # Get the URL + absoluteUrl = StreamMsgBuilder.BytesToString(httpInitialContextOptional.Path()) + + # Find the protocol + protocolEnd = absoluteUrl.find("://") + if protocolEnd == -1: + raise Exception("GatherRequestHeaders failed to find protocol in host address.") + + # Move past the :// + protocolEnd += 3 + + # Find the end, if not found, assume the end of the string. + hostEnd = absoluteUrl.find("/", protocolEnd+3) + if hostEnd == -1: + hostEnd = len(absoluteUrl) + + host = absoluteUrl[protocolEnd:hostEnd] + + # According to the spec, if the port is 80 or 443, it should be omitted. + # Otherwise, if there is a port, then it should be included. + if host.find(":") != -1: + knownPortIndex = host.find(":80") + if knownPortIndex == -1: + knownPortIndex = host.find(":443") + if knownPortIndex != -1: + host = host[:knownPortIndex] + return host + except Exception as e: + Sentry.Exception("GatherRequestHeaders failed to parse absolute path.", e) + return hostAddress + + # Called only for websockets to get headers. @staticmethod def GatherWebsocketRequestHeaders(logger:logging.Logger, httpInitialContext) -> dict: @@ -173,7 +225,7 @@ def GetWebSocketSubProtocols(logger:logging.Logger, httpInitialContext) -> list: # # This function must return the location value string again, either corrected or not. @staticmethod - def CorrectLocationResponseHeaderIfNeeded(logger:logging.Logger, requestUri:str, locationValue:str, sendHeaders): + def CorrectLocationResponseHeaderIfNeeded(logger:logging.Logger, requestUri:str, locationValue:str, sendHeaders, httpInitialContext:HttpInitialContext): # The sendHeaders is an dict that was generated by GatherRequestHeaders and were used to send the request. # Make sure the location is http(s) or ws(s), since that's all we deal with right now. diff --git a/homeway/homeway/WebStream/webstreamhttphelper.py b/homeway/homeway/WebStream/webstreamhttphelper.py index 44102c1..d105efa 100644 --- a/homeway/homeway/WebStream/webstreamhttphelper.py +++ b/homeway/homeway/WebStream/webstreamhttphelper.py @@ -267,7 +267,8 @@ def executeHttpRequest(self): # The boundary stream is used for webcam streams, and it's an ideal place to package and send each frame boundaryStr = None # Pull out the content type value, so we can use it to figure out if we want to compress this data or not - contentTypeLower =None + contentTypeLower = None + ogLocationHeaderValue = None headers = hwHttpResult.Headers for name, value in headers.items(): nameLower = name.lower() @@ -292,7 +293,13 @@ def executeHttpRequest(self): elif nameLower == "location": # We have noticed that some proxy servers aren't setup correctly to forward the x-forwarded-for and such headers. # So when the web server responds back with a 301 or 302, the location header might not have the correct hostname, instead an ip like 127.0.0.1. - hwHttpResult.Headers[name] = HeaderHelper.CorrectLocationResponseHeaderIfNeeded(self.Logger, uri, value, sendHeaders) + ogLocationHeaderValue = value + hwHttpResult.Headers[name] = HeaderHelper.CorrectLocationResponseHeaderIfNeeded(self.Logger, uri, value, sendHeaders, httpInitialContext) + + if ogLocationHeaderValue is not None: + # Also set the og location, so the server has it if it needs it. + # This has to be set out of the loop. + hwHttpResult.Headers["x-og-location"] = ogLocationHeaderValue # We also look at the content-type to determine if we should add compression to this request or not. # general rule of thumb is that compression is quite cheap but really helps with text, so we should compress when we diff --git a/homeway/homeway/httprequest.py b/homeway/homeway/httprequest.py index 1c2f5f4..ac6b2a5 100644 --- a/homeway/homeway/httprequest.py +++ b/homeway/homeway/httprequest.py @@ -371,6 +371,7 @@ def MakeHttpCall(logger, pathOrUrl, pathOrUrlType, method, headers, data=None, a # requests lib and everything will work. However, on some systems mDNS isn't support and the call will fail. On top of that, mDNS # is super flakey, and it will randomly stop working often. For both of those reasons, we will check if we find a local address, and try # to resolve it manually. Our logic has a cache and local disk backup, so if mDNS is being flakey, our logic will recover it. + # TODO - This could break servers that need the hostname to use the right service - but the fallback should cover it. localResolvedUrl = MDns.Get().TryToResolveIfLocalHostnameFound(url) if localResolvedUrl is not None: # The function will only return back the full URL if a local hostname was found and it was able to resolve to an IP. diff --git a/homeway/homeway/mdns.py b/homeway/homeway/mdns.py index f087235..55fe9b7 100644 --- a/homeway/homeway/mdns.py +++ b/homeway/homeway/mdns.py @@ -2,6 +2,7 @@ import time import os import json +from typing import Optional import dns.resolver @@ -49,14 +50,20 @@ def __init__(self, logger, pluginDataFolderPath): # Now that we support only PY3, this should never fail. try: - # Setup the client + # Setup the clients + # This is a normal DNS resolver so we can test the host file / local DNS self.dnsResolver = dns.resolver.Resolver() + + # This is the mDNS resolver, which will broadcast to the local network. + self.mdnsResolver = dns.resolver.Resolver() # Use the mdns multicast address - self.dnsResolver.nameservers = ["224.0.0.251"] + self.mdnsResolver.nameservers = ["224.0.0.251"] # Use the mdns port. - self.dnsResolver.port = 5353 + self.mdnsResolver.port = 5353 + except Exception as e: self.dnsResolver = None + self.mdnsResolver = None self.Logger.warn("Failed to create DNS class, local dns resolve is disabled. "+str(e)) @@ -144,8 +151,29 @@ def TryToGetLocalIp(self, domain): self.LogDebug("We didn't use a cached entry and the resolved failed, and no existing cache entry was found.") return None + # Returns a string with the local IP if the IP can be found, otherwise, it returns None. - def _TryToResolve(self, domain): + def _TryToResolve(self, domain:str) -> Optional[str]: + + # Before we try to resolve with the mdns, first do a quick normal DNS lookup. + # This will handle cases where the user has setup the host file or local DNS to resolve the domain. + # We do this first, because we know it will be quick to succeeded or fail. + try: + # We can use a short timeout, because a local DNS should be really fast. + # Even 50ms is a long time. + answers = self.dnsResolver.resolve(domain, lifetime=0.050, raise_on_no_answer=False) + + # If we find a valid answer, then we are done! + ip = self._HandleDnsAnswer(domain, answers) + if ip is not None: + self.LogDebug(f"Domain {domain} resolved with the standard DNS resolver.") + return ip + + except dns.resolver.LifetimeTimeout: + pass + except Exception as e: + self.Logger.error("Failed to resolve DNS for domain "+str(domain)+" e:"+str(e)) + # If we fail, move on to the mdns resolve. # We have seen that occasionally a first resolve won't work, but future resolves will. # For this reason, we do shorter lifetime resolves, but try a few times. @@ -174,38 +202,12 @@ def _TryToResolve(self, domain): # Since we do caching, we allow the lifetime of the lookup to be longer, so we have a better chance of getting it. # Don't allow this to throw, so we don't get nosy exceptions on lookup failures. - answers = self.dnsResolver.resolve(domain, lifetime=1.0, raise_on_no_answer=False, source=localAdapterIp) - - # Look get the list of IPs returned from the query. Sometimes, there's a multiples. For example, we have seen if docker is installed - # there are sometimes 172.x addresses. - ipList = [] - if answers is not None: - for data in answers: - # Validate. - if data is None or data.address is None or len(data.address) == 0: - self.Logger.warn("Dns result had data, but there was no IP address") - continue - - self.LogDebug("Resolver found ip "+data.address+" for local hostname "+domain) - ipList.append(data.address) - - # If there are no ips, continue trying. - if len(ipList) == 0: - continue + answers = self.mdnsResolver.resolve(domain, lifetime=1.0, raise_on_no_answer=False, source=localAdapterIp) - # Find which is the primary. - primaryIp = self.GetSameLanIp(ipList) - - # Always update the cache - with self.Lock: - self.Cache[domain.lower()] = self.CreateCacheEntryDict(primaryIp) - - # Save the cache file. - # TODO - We could async this, but since this will usually be called in the background as a cache refresh anyways, there's no need. - self._SaveCacheFile() - - # Return the result. - return primaryIp + # Try to find a valid IP from the results. + ip = self._HandleDnsAnswer(domain, answers) + if ip is not None: + return ip except dns.resolver.LifetimeTimeout: # This happens if no one responds, which is expected if the domain has no one listening. @@ -216,9 +218,42 @@ def _TryToResolve(self, domain): # If we failed to find anything or it threw, don't return so we try again. + # Returns a successful IP address if one is found, otherwise, it returns None. + def _HandleDnsAnswer(self, domain:str, answers: dns.resolver.Answer) -> Optional[str]: + # Look get the list of IPs returned from the query. Sometimes, there's a multiples. For example, we have seen if docker is installed + # there are sometimes 172.x addresses. + ipList = [] + if answers is not None: + for data in answers: + # Validate. + if data is None or data.address is None or len(data.address) == 0: + self.Logger.warn("Dns result had data, but there was no IP address") + continue + self.LogDebug("Resolver found ip "+data.address+" for local hostname "+domain) + ipList.append(data.address) + + # If there are no ips, continue trying. + if len(ipList) == 0: + return None + + # Find which is the primary. + primaryIp = self.GetSameLanIp(ipList) + + # Always update the cache + with self.Lock: + self.Cache[domain.lower()] = self.CreateCacheEntryDict(primaryIp) + + # Save the cache file. + # TODO - We could async this, but since this will usually be called in the background as a cache refresh anyways, there's no need. + self._SaveCacheFile() + + # Return the result. + return primaryIp + + # Given a list of at least 1 IP, this will always return a string that's an IP. It should be the IP we think # is the correct IP address for the same local LAN we are on. - def GetSameLanIp(self, ipList): + def GetSameLanIp(self, ipList) -> str: # If there is just one, return it. if len(ipList) == 1: self.LogDebug("Only one ip returned in the query, returning it")