Skip to content

Commit

Permalink
Add optional media proxy (disabled by default)
Browse files Browse the repository at this point in the history
  • Loading branch information
luk3yx committed Oct 5, 2024
1 parent 9535a18 commit 5c8f1fd
Show file tree
Hide file tree
Showing 3 changed files with 138 additions and 12 deletions.
29 changes: 29 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,35 @@ and `PART` commands should work as expected.
Note that events sent before the client connects to Matrix are ignored. Your
system must have an accurate clock for this to work properly.

## Downloading media

Matrix has recently started to require authentication for media endpoints. By
default, miniirc_matrix now translates media files into MXC URLs. It does,
however, have a built-in HTTP proxy (disabled by default, see below).

### Proxying requests (experimental)

**Warning: I don't know how secure this is, it uses Python's `http.server`**

If you want to convert media to a normal URL, for example for use with relay
bots or code that expects normal links, you can provide a `media_proxy_port`
argument to miniirc_matrix.Matrix.

```py
miniirc_matrix.Matrix('example.com', token='my_token',
media_proxy_port=8080)
```

This will start a HTTP server on `http://127.0.0.1:8080` to listen for ports.
The server only listens on localhost.

To expose this to the public, you must use a reverse proxy, and should set up
caching and some kind of rate limiting to prevent abuse. You can set the
`media_proxy_url` keyword argument to the public proxy URL.

A HMAC is created based on the API token and URL to prevent using the proxy to
fetch arbitrary attachment URLs.

## Installation

You can install `miniirc_matrix` with `pip install miniirc_matrix`.
119 changes: 108 additions & 11 deletions miniirc_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@

from __future__ import annotations
from collections.abc import Callable
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from typing import Any, Optional, TypeVar, overload
from urllib.parse import quote as _url_quote, urlparse as _urlparse
import functools, html.parser, itertools, json, math, re, time, uuid
import miniirc, requests, traceback # type: ignore
import functools, hmac, html.parser, itertools, json, math, re, time, uuid
import miniirc, requests, threading, traceback # type: ignore


ver = (0, 0, 11)
ver = (0, 0, 12)
__version__ = '.'.join(map(str, ver))


Expand Down Expand Up @@ -52,7 +53,6 @@ def _register(f: Callable[[Matrix, str, _Event], None]
# Hex colours
r'|\x04([0-9a-fA-F]{6})?(?:,([0-9a-fA-F]{6}))?'
)

_full_formatting_re = re.compile(
_invisible_formatting_re.pattern +

Expand All @@ -63,6 +63,9 @@ def _register(f: Callable[[Matrix, str, _Event], None]
)
_html_tags = {'\x02': 'strong', '\x1d': 'em', '\x1f': 'u', '\x1e': 'del',
'\x11': 'code'}
_media_url_re = re.compile(
r'^mxc://([A-Za-z0-9_\-\.]+/[A-Za-z0-9_\-\.]+)(?:/(.*))?$'
)


class _TagManager:
Expand Down Expand Up @@ -311,6 +314,48 @@ def _matrix_html_to_irc(content: _Event) -> tuple[str, bool]:
return content.body[str], False


class _MediaProxyHandler(BaseHTTPRequestHandler):
irc: Matrix

def do_GET(self) -> None:
try:
with self.irc._download_media('mxc:/' + self.path) as resp:
if resp.status_code != 200:
self.send_error(resp.status_code)
return

self.send_response(200)
self.send_header('X-Content-Type-Options', 'nosniff')
self.send_header('Content-Security-Policy',
"default-src 'none'")

if 'Content-Length' in resp.headers:
self.send_header('Content-Length',
resp.headers['Content-Length'])

# Only allow probably safe content types
content_type = resp.headers.get('Content-Type', '')
if (content_type.startswith(('image/', 'audio/', 'video/')) or
content_type == 'text/plain'):
self.send_header('Content-Type', content_type)
else:
self.send_header('Content-Type',
'application/octet-stream')

self.end_headers()

# Copy content
for chunk in resp.iter_content(8192):
self.wfile.write(chunk)
except ValueError as exc:
self.send_error(400, explain=str(exc))
return

def log_message(self, format: str, *args) -> None:
if self.irc.debug:
super().log_message(format, *args)


class _InvalidEventError(Exception):
pass

Expand Down Expand Up @@ -389,9 +434,14 @@ class Matrix(miniirc.IRC):
connected: Optional[bool]
msglen = 4096

def __init__(self, ip: str, port: int = 0, nick: str = '', *args,
auto_connect: bool = True,
token: Optional[str] = None, **kwargs):
def __init__(
self, ip: str, port: int = 0, nick: str = '', *args,
auto_connect: bool = True,
token: Optional[str] = None,
media_proxy_port: Optional[int] = None,
media_proxy_url: Optional[str] = None,
**kwargs
) -> None:
# Cache _get_room_url
# This is done here so that each class instance gets its own cache and
# the cache doesn't store class instances.
Expand All @@ -411,6 +461,12 @@ def __init__(self, ip: str, port: int = 0, nick: str = '', *args,
if token:
self.token = token

self._media_proxy: Optional[ThreadingHTTPServer] = None
self._media_proxy_port = media_proxy_port
if media_proxy_port and not media_proxy_port:
media_proxy_url = f'http://127.0.0.1:{media_proxy_port}'
self._media_proxy_url = media_proxy_url and media_proxy_url.rstrip('/')

# Stop miniirc from trying to access the (non-existent) socket
kwargs['ping_interval'] = kwargs['ping_timeout'] = None
super().__init__(ip, port, nick, *args, auto_connect=False, **kwargs)
Expand Down Expand Up @@ -455,7 +511,7 @@ def _update_baseurl(self) -> None:
raise ValueError(f'Status code {res.status_code} returned')

self._baseurl = f'{baseurl}/_matrix/client/{api_version}'
self._media_baseurl = f'{baseurl}/_matrix/media/{api_version}'
self._media_baseurl = f'{baseurl}/_matrix/client/v1/media'

def __get(self, endpoint: str, timeout: int = 5, /,
**params: Optional[str | int]) -> Any:
Expand Down Expand Up @@ -484,6 +540,26 @@ def _get_room_url_no_cache(self, room_id: str) -> str:

return f'rooms/{_url_quote(room_id)}'

def __make_url_digest(self, path: str) -> str:
return hmac.digest(
b'miniirc_matrix hmac v1 ' + self.token.encode('ascii'),
path.encode('ascii'),
'sha256'
).hex()

def _download_media(self, url: str) -> requests.Response:
url_base, _, key = url.partition('?key=')
match = _media_url_re.match(url_base)
if not match:
raise ValueError('Invalid media URL')

path = match.group(1)
if not hmac.compare_digest(self.__make_url_digest(path), key):
raise ValueError('Invalid key parameter')

return self.__session.get(f'{self._media_baseurl}/download/{path}',
timeout=15, stream=True)

@functools.cached_property
def current_nick(self) -> str:
return self.__get('account/whoami')['user_id']
Expand All @@ -492,6 +568,7 @@ def connect(self) -> None:
if self.connected is not None:
return
with self._send_lock:
self.connected = False
self._update_baseurl()
self.active_caps = self.ircv3_caps & {
'account-tag', 'echo-message', 'message-tags',
Expand All @@ -500,8 +577,26 @@ def connect(self) -> None:
self.debug('Starting main loop (Matrix)')
self._start_main_loop()

if self._media_proxy_port:
self.debug('Starting media proxy')

class _handler(_MediaProxyHandler):
irc = self

self._media_proxy = ThreadingHTTPServer(
('127.0.0.1', self._media_proxy_port),
_handler,
)
th = threading.Thread(target=self._media_proxy.serve_forever)
th.daemon = True
th.start()

def disconnect(self) -> None:
self.connected = False
with self._send_lock:
self.connected = False
if self._media_proxy is not None:
self._media_proxy.shutdown()
self._media_proxy = None

def _main(self) -> None:
try:
Expand Down Expand Up @@ -691,8 +786,10 @@ def _message_event(self, room_id: str, event: _Event) -> None:
msg: str
if 'url' in content:
msg = content.url[str]
if msg.startswith('mxc://'):
msg = f'{self._media_baseurl}/download/{msg[6:]}'
if self._media_proxy_url and (match := _media_url_re.match(msg)):
path = match.group(1)
key = self.__make_url_digest(path)
msg = f'{self._media_proxy_url}/{path}?key={key}'
else:
msg, html_parsed_ok = _matrix_html_to_irc(content)

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name='miniirc_matrix',
version='0.0.11',
version='0.0.12',
py_modules=['miniirc_matrix'],
author='luk3yx',
description='A Matrix wrapper for miniirc.',
Expand Down

0 comments on commit 5c8f1fd

Please sign in to comment.