From 6b65cf5dfff0fbda893b15dbaa0dd680aaceddda Mon Sep 17 00:00:00 2001 From: Brad Murray Date: Tue, 30 Jan 2024 09:50:01 -0500 Subject: [PATCH] Include more headers with logins (#79) * Include more headers with logins * Fix lint --- linkedin_matrix/commands/auth.py | 57 +++++- linkedin_matrix/db/__init__.py | 4 +- linkedin_matrix/db/http_header.py | 54 ++++++ linkedin_matrix/db/upgrade/__init__.py | 2 + .../db/upgrade/v10_http_header_table.py | 18 ++ linkedin_matrix/user.py | 15 +- linkedin_matrix/web/provisioning_api.py | 2 +- linkedin_messaging/linkedin.py | 183 +++++++++--------- 8 files changed, 239 insertions(+), 96 deletions(-) create mode 100644 linkedin_matrix/db/http_header.py create mode 100644 linkedin_matrix/db/upgrade/v10_http_header_table.py diff --git a/linkedin_matrix/commands/auth.py b/linkedin_matrix/commands/auth.py index 2c4b70d..a3c79f1 100644 --- a/linkedin_matrix/commands/auth.py +++ b/linkedin_matrix/commands/auth.py @@ -1,4 +1,5 @@ import logging +import re from mautrix.bridge.commands import HelpSection, command_handler @@ -83,7 +84,61 @@ async def login(evt: CommandEvent): return try: - await evt.sender.on_logged_in(cookies) + await evt.sender.on_logged_in(cookies, None) + await evt.reply("Successfully logged in") + except Exception as e: + logging.exception("Failed to log in") + await evt.reply(f"Failed to log in: {e}") + return + + +@command_handler( + needs_auth=False, + management_only=False, + help_section=SECTION_AUTH, + help_text=""" + Log in to LinkedIn using a "Copy as cURL" export from an existing LinkedIn browser session. + """, + help_args="<_curl command_>", +) +async def login_curl(evt: CommandEvent): + # if evt.sender.client and await evt.sender.client.logged_in(): + # await evt.reply("You're already logged in.") + # return + + if len(evt.args) == 0: + await evt.reply("**Usage:** `$cmdprefix+sp login_curl `") + return + + # await evt.redact() + + curl_command = " ".join(evt.args) + + cookies: dict[str, str] = {} + headers: dict[str, str] = {} + + curl_command_regex = r"-H '(?P[^:]+): (?P[^\']+)'" + header_matches = re.findall(curl_command_regex, curl_command) + for m in header_matches: + (name, value) = m + + if name == "cookie": + cookie_items = value.split("; ") + for c in cookie_items: + n, v = c.split("=", 1) + cookies[n] = v + elif name == "accept": + # Every request will have a different value for this + pass + else: + headers[name] = value + + if not cookies.get("li_at") or not cookies.get("JSESSIONID"): + await evt.reply("Missing li_at or JSESSIONID cookie") + return + + try: + await evt.sender.on_logged_in(cookies, headers) await evt.reply("Successfully logged in") except Exception as e: logging.exception("Failed to log in") diff --git a/linkedin_matrix/db/__init__.py b/linkedin_matrix/db/__init__.py index c3a7c9a..375c695 100644 --- a/linkedin_matrix/db/__init__.py +++ b/linkedin_matrix/db/__init__.py @@ -1,6 +1,7 @@ from mautrix.util.async_db import Database from .cookie import Cookie +from .http_header import HttpHeader from .message import Message from .model_base import Model from .portal import Portal @@ -12,7 +13,7 @@ def init(db: Database): - for table in (Cookie, Message, Portal, Puppet, Reaction, User, UserPortal): + for table in (HttpHeader, Cookie, Message, Portal, Puppet, Reaction, User, UserPortal): table.db = db # type: ignore @@ -20,6 +21,7 @@ def init(db: Database): "init", "upgrade_table", # Models + "HttpHeader", "Cookie", "Message", "Model", diff --git a/linkedin_matrix/db/http_header.py b/linkedin_matrix/db/http_header.py new file mode 100644 index 0000000..dead523 --- /dev/null +++ b/linkedin_matrix/db/http_header.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +from asyncpg import Record +from attr import dataclass + +from mautrix.types import UserID + +from .model_base import Model + + +@dataclass +class HttpHeader(Model): + mxid: UserID + name: str + value: str + + _table_name = "http_header" + _field_list = [ + "mxid", + "name", + "value", + ] + + @classmethod + def _from_row(cls, row: Record | None) -> HttpHeader | None: + if row is None: + return None + return cls(**row) + + @classmethod + async def get_for_mxid(cls, mxid: id.UserID) -> list[HttpHeader]: + query = HttpHeader.select_constructor("mxid=$1") + rows = await cls.db.fetch(query, mxid) + return [cls._from_row(row) for row in rows if row] + + @classmethod + async def delete_all_for_mxid(cls, mxid: id.UserID): + await cls.db.execute("DELETE FROM http_header WHERE mxid=$1", mxid) + + @classmethod + async def bulk_upsert(cls, mxid: id.UserID, http_headers: dict[str, str]): + for name, value in http_headers.items(): + http_header = cls(mxid, name, value) + await http_header.upsert() + + async def upsert(self): + query = """ + INSERT INTO http_header (mxid, name, value) + VALUES ($1, $2, $3) + ON CONFLICT (mxid, name) + DO UPDATE + SET value=excluded.value + """ + await self.db.execute(query, self.mxid, self.name, self.value) diff --git a/linkedin_matrix/db/upgrade/__init__.py b/linkedin_matrix/db/upgrade/__init__.py index ea4cb9a..2cee5df 100644 --- a/linkedin_matrix/db/upgrade/__init__.py +++ b/linkedin_matrix/db/upgrade/__init__.py @@ -12,6 +12,7 @@ v07_puppet_contact_info_set, v08_splat_pickle_data, v09_cookie_table, + v10_http_header_table, ) __all__ = ( @@ -24,4 +25,5 @@ "v07_puppet_contact_info_set", "v08_splat_pickle_data", "v09_cookie_table", + "v10_http_header_table", ) diff --git a/linkedin_matrix/db/upgrade/v10_http_header_table.py b/linkedin_matrix/db/upgrade/v10_http_header_table.py new file mode 100644 index 0000000..4447fc2 --- /dev/null +++ b/linkedin_matrix/db/upgrade/v10_http_header_table.py @@ -0,0 +1,18 @@ +from mautrix.util.async_db import Connection + +from . import upgrade_table + + +@upgrade_table.register(description="Add a header table for storing all of the headers") +async def upgrade_v10(conn: Connection): + await conn.execute( + """ + CREATE TABLE http_header ( + mxid TEXT, + name TEXT, + value TEXT, + + PRIMARY KEY (mxid, name) + ) + """ + ) diff --git a/linkedin_matrix/user.py b/linkedin_matrix/user.py index a8a33c7..c14fc26 100644 --- a/linkedin_matrix/user.py +++ b/linkedin_matrix/user.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, AsyncGenerator, AsyncIterable, Awaitable, cast +from typing import TYPE_CHECKING, AsyncGenerator, AsyncIterable, Awaitable, Optional, cast from asyncio.futures import Future from datetime import datetime import asyncio @@ -26,7 +26,7 @@ from . import portal as po, puppet as pu from .config import Config -from .db import Cookie, User as DBUser +from .db import Cookie, HttpHeader, User as DBUser if TYPE_CHECKING: from .__main__ import LinkedInBridge @@ -195,7 +195,10 @@ async def load_session(self, is_startup: bool = False) -> bool: await self.push_bridge_state(BridgeStateEvent.BAD_CREDENTIALS, error="logged-out") return False - self.client = LinkedInMessaging.from_cookies({c.name: c.value for c in cookies}) + self.client = LinkedInMessaging.from_cookies_and_headers( + {c.name: c.value for c in cookies}, + {h.name: h.value for h in await HttpHeader.get_for_mxid(self.mxid)}, + ) backoff = 1.0 while True: @@ -255,10 +258,12 @@ async def is_logged_in(self) -> bool: self.user_profile_cache = None return self._is_logged_in or False - async def on_logged_in(self, cookies: dict[str, str]): + async def on_logged_in(self, cookies: dict[str, str], headers: Optional[dict[str, str]]): cookies = {k: v.strip('"') for k, v in cookies.items()} await Cookie.bulk_upsert(self.mxid, cookies) - self.client = LinkedInMessaging.from_cookies(cookies) + if headers: + await HttpHeader.bulk_upsert(self.mxid, headers) + self.client = LinkedInMessaging.from_cookies_and_headers(cookies, headers) self.listener_event_handlers_created = False self.user_profile_cache = await self.client.get_user_profile() if (mp := self.user_profile_cache.mini_profile) and mp.entity_urn: diff --git a/linkedin_matrix/web/provisioning_api.py b/linkedin_matrix/web/provisioning_api.py index 7f2a643..5a8e300 100644 --- a/linkedin_matrix/web/provisioning_api.py +++ b/linkedin_matrix/web/provisioning_api.py @@ -106,7 +106,7 @@ async def login(self, request: web.Request) -> web.Response: return web.HTTPBadRequest(body='{"error": "Missing keys"}', headers=self._headers) try: - await user.on_logged_in(data) + await user.on_logged_in(data, None) track(user, "$login_success") except Exception as e: track(user, "$login_failed", {"error": str(e)}) diff --git a/linkedin_messaging/linkedin.py b/linkedin_messaging/linkedin.py index c82aa40..e200c75 100644 --- a/linkedin_messaging/linkedin.py +++ b/linkedin_messaging/linkedin.py @@ -37,13 +37,11 @@ f"{LINKEDIN_BASE_URL}/realtime/realtimeFrontendClientConnectivityTracking" ) - SEED_URL = f"{LINKEDIN_BASE_URL}/login" """ URL to seed all of the auth requests """ - T = TypeVar("T", bound=DataClassJsonMixin) @@ -75,51 +73,50 @@ class ChallengeException(Exception): pass -class LinkedInMessaging: - _request_headers = { - "user-agent": " ".join( - [ - "Mozilla/5.0 (X11; Linux x86_64)", - "AppleWebKit/537.36 (KHTML, like Gecko)", - "Chrome/120.0.0.0 Safari/537.36", - ] - ), - "accept-language": "en-US,en;q=0.9", - "x-li-lang": "en_US", - "x-restli-protocol-version": "2.0.0", - "x-li-track": json.dumps( - { - "clientVersion": "1.13.8751", - "mpVersion": "1.13.8751", - "osName": "web", - "timezoneOffset": -7, - "timezone": "America/Denver", - "deviceFormFactor": "DESKTOP", - "mpName": "voyager-web", - "displayDensity": 1, - "displayWidth": 2560, - "displayHeight": 1440, - } - ), - "Authority": "www.linkedin.com", - "referer": "https://www.linkedin.com/feed/", - "sec-ch-ua": '"Not_A Brand";v="8", "Chromium";v="120"', - "sec-ch-ua-mobile": "?0", - "sec-ch-ua-platform": '"Linux"', - "sec-fetch-dest": "empty", - "sec-fetch-mode": "cors", - "sec-fetch-site": "same-origin", - "x-li-page-instance": "urn:li:page:feed_index_index;bcfe9fd6-239a-49e9-af15-44b7e5895eaa", - "x-li-recipe-accept": "application/vnd.linkedin.normalized+json+2.1", - "x-li-recipe-map": json.dumps( - { - "inAppAlertsTopic": "com.linkedin.voyager.dash.deco.identity.notifications.InAppAlert-51", # noqa: E501 - "professionalEventsTopic": "com.linkedin.voyager.dash.deco.events.ProfessionalEventDetailPage-53", # noqa: E501 - "topCardLiveVideoTopic": "com.linkedin.voyager.dash.deco.video.TopCardLiveVideo-9", - } - ), - } +fallback_headers = { + "user-agent": " ".join( + [ + "Mozilla/5.0 (X11; Linux x86_64)", + "AppleWebKit/537.36 (KHTML, like Gecko)", + "Chrome/120.0.0.0 Safari/537.36", + ] + ), + "accept-language": "en-US,en;q=0.9", + "x-li-lang": "en_US", + "x-restli-protocol-version": "2.0.0", + "x-li-track": json.dumps( + { + "clientVersion": "1.13.8751", + "mpVersion": "1.13.8751", + "osName": "web", + "timezoneOffset": -7, + "timezone": "America/Denver", + "deviceFormFactor": "DESKTOP", + "mpName": "voyager-web", + "displayDensity": 1, + "displayWidth": 2560, + "displayHeight": 1440, + } + ), + "Authority": "www.linkedin.com", + "referer": "https://www.linkedin.com/feed/", + "sec-ch-ua": '"Not_A Brand";v="8", "Chromium";v="120"', + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": '"Linux"', + "sec-fetch-dest": "empty", + "sec-fetch-mode": "cors", + "sec-fetch-site": "same-origin", + "x-li-page-instance": "urn:li:page:feed_index_index;bcfe9fd6-239a-49e9-af15-44b7e5895eaa", + "x-li-recipe-accept": "application/vnd.linkedin.normalized+json+2.1", + "x-li-recipe-map": json.dumps({ + "inAppAlertsTopic": "com.linkedin.voyager.dash.deco.identity.notifications.InAppAlert-51", + "professionalEventsTopic": "com.linkedin.voyager.dash.deco.events.ProfessionalEventDetailPage-53", # noqa: E501 + "topCardLiveVideoTopic": "com.linkedin.voyager.dash.deco.video.TopCardLiveVideo-9", + }), +} + +class LinkedInMessaging: session: aiohttp.ClientSession two_factor_payload: dict[str, Any] event_listeners: defaultdict[ @@ -131,6 +128,7 @@ class LinkedInMessaging: ] ], ] + headers: dict[str, str] _realtime_session_id: uuid.UUID _realtime_connection_id: Optional[uuid.UUID] = None @@ -139,11 +137,21 @@ def __init__(self): self.session = aiohttp.ClientSession() self.event_listeners = defaultdict(list) + def update_headers_from_cookies(self): + self.headers["csrf-token"] = self.cookies["JSESSIONID"].strip('"') + @staticmethod - def from_cookies(cookies: dict[str, str]) -> "LinkedInMessaging": + def from_cookies_and_headers(cookies: dict[str, str], + headers: Optional[dict[str, str]]) -> "LinkedInMessaging": linkedin = LinkedInMessaging() linkedin.session.cookie_jar.update_cookies(cookies) - linkedin._request_headers["csrf-token"] = cookies["JSESSIONID"].strip('"') + + if headers: + linkedin.headers = headers + else: + linkedin.headers = fallback_headers + linkedin.update_headers_from_cookies() + return linkedin def cookies(self) -> dict[str, str]: @@ -154,13 +162,21 @@ async def close(self): async def _get(self, relative_url: str, **kwargs: Any) -> aiohttp.ClientResponse: headers = kwargs.pop("headers", {}) - headers.update(self._request_headers) - return await self.session.get(API_BASE_URL + relative_url, headers=headers, **kwargs) + headers.update(self.headers) + return await self.session.get( + API_BASE_URL + relative_url, + headers=headers, + **kwargs + ) async def _post(self, relative_url: str, **kwargs: Any) -> aiohttp.ClientResponse: headers = kwargs.pop("headers", {}) - headers.update(self._request_headers) - return await self.session.post(API_BASE_URL + relative_url, headers=headers, **kwargs) + headers.update(self.headers) + return await self.session.post( + API_BASE_URL + relative_url, + headers=headers, + **kwargs + ) # region Authentication @@ -178,14 +194,6 @@ async def logged_in(self) -> bool: logging.exception(f"Failed getting the user profile: {e}") return False - async def login_manual(self, cookies: dict[str, str], new_session: bool = True): - if new_session: - if self.session: - await self.session.close() - self.session = aiohttp.ClientSession() - self.session.cookie_jar.update_cookies(cookies) - self._request_headers["csrf-token"] = cookies["JSESSIONID"].strip('"') - async def login(self, email: str, password: str, new_session: bool = True): if new_session: if self.session: @@ -212,9 +220,7 @@ async def login(self, email: str, password: str, new_session: bool = True): # Check to see if the user was successfully logged in with just email and # password. if self.has_auth_cookies: - for c in self.session.cookie_jar: - if c.key == "JSESSIONID": - self._request_headers["csrf-token"] = c.value.strip('"') + self.update_headers_from_cookies() return # 2FA is required. Throw an exception. @@ -251,15 +257,13 @@ async def enter_2fa(self, two_factor_code: str): VERIFY_URL, data={**self.two_factor_payload, "pin": two_factor_code} ): if self.has_auth_cookies: - for c in self.session.cookie_jar: - if c.key == "JSESSIONID": - self._request_headers["csrf-token"] = c.value.strip('"') + self.update_headers_from_cookies() return # TODO (#1) can we scrape anything from the page? raise Exception("Failed to log in.") async def logout(self) -> bool: - csrf_token = self._request_headers.get("csrf-token") + csrf_token = self.headers.get("csrf-token") if not csrf_token: return True response = await self.session.get( @@ -549,8 +553,7 @@ async def _listen_to_event_stream(self): headers = { "accept": "text/event-stream", - "x-li-realtime-session": str(self._realtime_session_id), - **self._request_headers, + **self.headers, } async with self.session.get( @@ -571,6 +574,8 @@ async def _listen_to_event_stream(self): continue data = json.loads(line.decode("utf-8")[6:]) + logging.debug("Got data from event stream") + # Special handling for ALL_EVENTS handler. if all_events_handlers := self.event_listeners.get("ALL_EVENTS"): for handler in all_events_handlers: @@ -594,25 +599,27 @@ async def _listen_to_event_stream(self): logging.info("Event stream closed") async def _send_heartbeat(self, user_urn: URN): - logging.info("Starting heartbeat task") - while True: - await asyncio.sleep(60) - logging.info("Sending heartbeat") - - await self._post( - CONNECTIVITY_TRACKING_URL, - params={"action": "sendHeartbeat"}, - json={ - "isFirstHeartbeat": False, - "isLastHeartbeat": False, - "realtimeSessionId": str(self._realtime_session_id), - "mpName": "voyager-web", - "mpVersion": "1.13.8094", - "clientId": "voyager-web", - "actorUrn": str(user_urn), - "contextUrns": [str(user_urn)], - }, - ) + return + + # logging.info("Starting heartbeat task") + # while True: + # await asyncio.sleep(60) + # logging.info("Sending heartbeat") + + # await self._post( + # CONNECTIVITY_TRACKING_URL, + # params={"action": "sendHeartbeat"}, + # json={ + # "isFirstHeartbeat": False, + # "isLastHeartbeat": False, + # "realtimeSessionId": str(self._realtime_session_id), + # "mpName": "voyager-web", + # "mpVersion": "1.13.8094", + # "clientId": "voyager-web", + # "actorUrn": str(user_urn), + # "contextUrns": [str(user_urn)], + # }, + # ) async def start_listener(self, user_urn: URN): self._realtime_session_id = uuid.uuid4()