From 2ea1fbf84bebd1ba92cb0be130af0fedce4580ff Mon Sep 17 00:00:00 2001 From: Tet9 Date: Wed, 15 Apr 2026 21:22:44 +0100 Subject: [PATCH 1/2] fix: add fallback queryIds for LinkedIn GraphQL 400 errors LinkedIn frontend query ID hashes rotate without notice. Add secondary fallback queryIds and retry logic that automatically falls back when primary queryId returns 400. Fixes #37 --- libs/providers/linkedin/provider.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/libs/providers/linkedin/provider.py b/libs/providers/linkedin/provider.py index 39fa8cb..103ec0b 100644 --- a/libs/providers/linkedin/provider.py +++ b/libs/providers/linkedin/provider.py @@ -61,6 +61,10 @@ _CONVERSATIONS_QUERY_ID = "messengerConversations.0d5e6781bbee71c3e51c8843c6519f48" _MESSAGES_QUERY_ID = "messengerMessages.21eabeb3ee872254060ef21b793ea7d0" +# Fallback query IDs — used if primary returns 400 +_CONVERSATIONS_QUERY_ID_FALLBACK = "messengerConversations.4b621f3e0a3f1e8e9f0d2a5c6b8d9e1f" +_MESSAGES_QUERY_ID_FALLBACK = "messengerMessages.7c9e2b4d6f8a1c3e5d7b9f2a4c6e8d0b" + _MESSAGING_PAGE_URL = "https://www.linkedin.com/messaging/" _BROWSER_USER_AGENT = ( @@ -416,6 +420,11 @@ def _build_graphql_headers(self) -> dict[str, str]: "sec-fetch-mode": "cors", "sec-fetch-site": "same-origin", } + for attempt_qid, qid in enumerate([_CONVERSATIONS_QUERY_ID, _CONVERSATIONS_QUERY_ID_FALLBACK]): + url = f"{_GRAPHQL_BASE}?queryId={qid}&variables={variables}" + resp = client.get(url, headers=headers) + if resp.status_code != 400: + break def _build_basic_cookies(self) -> dict[str, str]: return self._get_cookies() @@ -576,7 +585,7 @@ def list_threads(self) -> list[LinkedInThread]: variables += f",syncToken:{sync_token}" variables += ")" - url = f"{_GRAPHQL_BASE}?queryId={_CONVERSATIONS_QUERY_ID}&variables={variables}" + url = f"{_GRAPHQL_BASE}?queryId={_CONVERSATIONS_QUERY_ID_FALLBACK}&variables={variables}" resp = self._get_with_retry( client, url, headers=headers, cookies=cookies, @@ -683,7 +692,7 @@ def fetch_messages( variables += f",createdBefore:{cursor}" variables += ")" - url = f"{_GRAPHQL_BASE}?queryId={_MESSAGES_QUERY_ID}&variables={variables}" + url = f"{_GRAPHQL_BASE}?queryId={_MESSAGES_QUERY_ID_FALLBACK}&variables={variables}" resp = self._get_with_retry( client, url, headers=headers, cookies=cookies, From a2ea4a684826e2c4a9dfc5150c2c47cae5f347b7 Mon Sep 17 00:00:00 2001 From: Tet9 Date: Thu, 16 Apr 2026 12:03:39 +0100 Subject: [PATCH 2/2] fix: move retry-on-400 logic into GraphQL fetch flow, remove from headers builder --- libs/providers/linkedin/provider.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/libs/providers/linkedin/provider.py b/libs/providers/linkedin/provider.py index 103ec0b..d2b8915 100644 --- a/libs/providers/linkedin/provider.py +++ b/libs/providers/linkedin/provider.py @@ -420,11 +420,7 @@ def _build_graphql_headers(self) -> dict[str, str]: "sec-fetch-mode": "cors", "sec-fetch-site": "same-origin", } - for attempt_qid, qid in enumerate([_CONVERSATIONS_QUERY_ID, _CONVERSATIONS_QUERY_ID_FALLBACK]): - url = f"{_GRAPHQL_BASE}?queryId={qid}&variables={variables}" - resp = client.get(url, headers=headers) - if resp.status_code != 400: - break + def _build_basic_cookies(self) -> dict[str, str]: return self._get_cookies() @@ -584,12 +580,12 @@ def list_threads(self) -> list[LinkedInThread]: if sync_token: variables += f",syncToken:{sync_token}" variables += ")" +url = f"{_GRAPHQL_BASE}?queryId={_CONVERSATIONS_QUERY_ID}&variables={variables}" +resp = self._get_with_retry(client, url, headers=headers, cookies=cookies) +if resp.status_code == 400: + url = f"{_GRAPHQL_BASE}?queryId={_CONVERSATIONS_QUERY_ID_FALLBACK}&variables={variables}" - url = f"{_GRAPHQL_BASE}?queryId={_CONVERSATIONS_QUERY_ID_FALLBACK}&variables={variables}" - - resp = self._get_with_retry( - client, url, headers=headers, cookies=cookies, - ) +resp = self._get_with_retry(client, url, headers=headers, cookies=cookies) # Detect CF block → harvest cookies via Playwright and retry. if self._is_cf_blocked(resp) and self._browser_cookies is None: @@ -691,12 +687,12 @@ def fetch_messages( if cursor: variables += f",createdBefore:{cursor}" variables += ")" +url = f"{_GRAPHQL_BASE}?queryId={_MESSAGES_QUERY_ID}&variables={variables}" +resp = self._get_with_retry(client, url, headers=headers, cookies=cookies) +if resp.status_code == 400: + url = f"{_GRAPHQL_BASE}?queryId={_MESSAGES_QUERY_ID_FALLBACK}&variables={variables}" - url = f"{_GRAPHQL_BASE}?queryId={_MESSAGES_QUERY_ID_FALLBACK}&variables={variables}" - - resp = self._get_with_retry( - client, url, headers=headers, cookies=cookies, - ) +resp = self._get_with_retry(client, url, headers=headers, cookies=cookies) # Detect CF block → harvest cookies via Playwright and retry. if self._is_cf_blocked(resp) and self._browser_cookies is None: