From e4121b8ecc8a537372744ac8c5ce843996652416 Mon Sep 17 00:00:00 2001 From: Taylor Steinberg Date: Wed, 8 Apr 2026 23:24:39 -0400 Subject: [PATCH 1/2] fix(connect): rewrite Session to use rebuild_method override MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the hand-rolled redirect loop in Session.post with a minimal rebuild_method override. Inheriting from requests.SessionRedirectMixin restores rebuild_auth (strips Authorization on cross-origin hops), cookie propagation, response.history, TooManyRedirects, proxy rebuild, and streaming semantics — all of which the previous implementation silently dropped. Supersedes the defensive patch in #464. --- src/posit/connect/sessions.py | 125 ++++++++++----------------- tests/posit/connect/test_sessions.py | 76 +++++++++------- 2 files changed, 88 insertions(+), 113 deletions(-) diff --git a/src/posit/connect/sessions.py b/src/posit/connect/sessions.py index 5ce73e5e..dc439543 100644 --- a/src/posit/connect/sessions.py +++ b/src/posit/connect/sessions.py @@ -1,53 +1,41 @@ -from urllib.parse import urljoin - import requests class Session(requests.Session): - """Custom session that implements CURLOPT_POSTREDIR. - - This class mimics the functionality of CURLOPT_POSTREDIR from libcurl by - providing a custom implementation of the POST method. It allows the caller - to control whether the original POST data is preserved on redirects or if the - request should be converted to a GET when a redirect occurs. This is achieved - by disabling automatic redirect handling and manually following the redirect - chain with the desired behavior. - - Notes - ----- - The custom `post` method in this class: - - - Disables automatic redirect handling by setting ``allow_redirects=False``. - - Manually follows redirects up to a specified ``max_redirects``. - - Determines the HTTP method for subsequent requests based on the response - status code and the ``preserve_post`` flag: - - - For HTTP status codes 307 and 308, the method and request body are - always preserved as POST. - - For other redirects (e.g., 301, 302, 303), the behavior is determined - by ``preserve_post``: - - If ``preserve_post=True``, the POST method is maintained. - - If ``preserve_post=False``, the method is converted to GET and the - request body is discarded. + """Custom session that preserves POST bodies across 301/302 redirects. + + RFC 7231 allows clients to downgrade POST to GET on 301/302, and + ``requests`` does so by default. Some Connect endpoints issue a 302 and + expect the client to re-POST the original body to the new location + (mirroring libcurl's ``CURLOPT_POSTREDIR`` behavior). Overriding + :meth:`rebuild_method` keeps every other piece of the ``requests`` + redirect machinery intact — ``rebuild_auth`` (which strips the + ``Authorization`` header on cross-origin hops), cookie propagation, + ``response.history``, ``TooManyRedirects``, proxy rebuild, and streaming + semantics — so this class is a minimal, safe override rather than a + hand-rolled redirect loop. + """ - Examples - -------- - Create a session and send a POST request while preserving POST data on redirects: + def __init__(self): + super().__init__() + self._preserve_post_on_redirect = True - >>> session = Session() - >>> response = session.post( - ... "https://example.com/api", data={"key": "value"}, preserve_post=True - ... ) - >>> print(response.status_code) + def rebuild_method(self, prepared_request, response): + """Preserve POST across 301/302 when ``preserve_post_on_redirect`` is set. - See Also - -------- - requests.Session : The base session class from the requests library. - """ + 303 always downgrades to GET per the HTTP spec. 307/308 already + preserve the method in the base implementation. + """ + if ( + self._preserve_post_on_redirect + and prepared_request.method == "POST" + and response.status_code in (301, 302) + ): + return + super().rebuild_method(prepared_request, response) def post(self, url, data=None, json=None, preserve_post=True, max_redirects=5, **kwargs): - """ - Send a POST request and handle redirects manually. + """Send a POST request. Parameters ---------- @@ -58,46 +46,21 @@ def post(self, url, data=None, json=None, preserve_post=True, max_redirects=5, * json : any, optional The JSON data to send. preserve_post : bool, optional - If True, re-send POST data on redirects (mimicking CURLOPT_POSTREDIR); - if False, converts to GET on 301/302/303 responses. + If True (default), re-send POST data on 301/302 redirects + (mimicking ``CURLOPT_POSTREDIR``). If False, fall back to the + default ``requests`` behavior (downgrade to GET on 301/302). max_redirects : int, optional - Maximum number of redirects to follow. + Maximum number of redirects to follow before raising + :class:`requests.exceptions.TooManyRedirects`. **kwargs - Additional keyword arguments passed to the request. - - Returns - ------- - requests.Response - The final response after following redirects. + Additional keyword arguments passed to :meth:`requests.Session.request`. """ - # Force manual redirect handling by disabling auto redirects. - kwargs["allow_redirects"] = False - - # Initial POST request - response = super().post(url, data=data, json=json, **kwargs) - redirect_count = 0 - - # Manually follow redirects, if any - while response.is_redirect and redirect_count < max_redirects: - redirect_url = response.headers.get("location") - if not redirect_url: - break # No redirect URL; exit loop - - redirect_url = urljoin(response.url, redirect_url) - - # For 307 and 308 the HTTP spec mandates preserving the method and body. - if response.status_code in (307, 308): - method = "POST" - else: - if preserve_post: - method = "POST" - else: - method = "GET" - data = None - json = None - - # Perform the next request in the redirect chain. - response = self.request(method, redirect_url, data=data, json=json, **kwargs) - redirect_count += 1 - - return response + previous_preserve = self._preserve_post_on_redirect + previous_max = self.max_redirects + self._preserve_post_on_redirect = preserve_post + self.max_redirects = max_redirects + try: + return super().post(url, data=data, json=json, **kwargs) + finally: + self._preserve_post_on_redirect = previous_preserve + self.max_redirects = previous_max diff --git a/tests/posit/connect/test_sessions.py b/tests/posit/connect/test_sessions.py index de5af25b..37d7a106 100644 --- a/tests/posit/connect/test_sessions.py +++ b/tests/posit/connect/test_sessions.py @@ -1,4 +1,5 @@ import pytest +import requests import responses from posit.connect.sessions import Session @@ -14,7 +15,6 @@ def test_post_no_redirect(): assert response.status_code == 200 assert len(responses.calls) == 1 - # Confirm that the request method was POST. assert responses.calls[0].request.method == "POST" @@ -31,10 +31,11 @@ def test_post_with_redirect_preserve(): assert response.status_code == 200 assert len(responses.calls) == 2 - - # Both calls should use the POST method. assert responses.calls[0].request.method == "POST" assert responses.calls[1].request.method == "POST" + # The final response should expose the redirect chain via history. + assert len(response.history) == 1 + assert response.history[0].status_code == 302 @responses.activate @@ -50,7 +51,6 @@ def test_post_with_redirect_no_preserve(): assert response.status_code == 200 assert len(responses.calls) == 2 - # The initial call is a POST, but the follow-up should be a GET since preserve_post is False assert responses.calls[0].request.method == "POST" assert responses.calls[1].request.method == "GET" @@ -61,74 +61,86 @@ def test_post_redirect_307_308(status_code): initial_url = "http://connect.example.com/api" redirect_url = "http://connect.example.com/redirect" - # For 307 and 308 redirects, the HTTP spec mandates preserving the method. responses.add( responses.POST, initial_url, status=status_code, headers={"location": "/redirect"} ) responses.add(responses.POST, redirect_url, json={"result": "redirected"}, status=200) session = Session() - # Even with preserve_post=False, a 307 or 308 redirect should use POST. + # Even with preserve_post=False, 307/308 preserve the method per RFC 7231. response = session.post(initial_url, data={"key": "value"}, preserve_post=False) assert response.status_code == 200 assert len(responses.calls) == 2 - # Confirm that the method for the redirect is still POST. assert responses.calls[1].request.method == "POST" @responses.activate -def test_post_redirect_max_redirects(): +def test_post_redirect_max_redirects_raises(): initial_url = "http://connect.example.com/api" redirect1_url = "http://connect.example.com/redirect1" redirect2_url = "http://connect.example.com/redirect2" - # Build a chain of 3 redirects. responses.add(responses.POST, initial_url, status=302, headers={"location": "/redirect1"}) responses.add(responses.POST, redirect1_url, status=302, headers={"location": "/redirect2"}) responses.add(responses.POST, redirect2_url, status=302, headers={"location": "/redirect3"}) session = Session() - # Limit to 2 redirects; thus, the third redirect response should not be followed. - response = session.post( - initial_url, data={"key": "value"}, max_redirects=2, preserve_post=True - ) - - # The calls should include: initial, first redirect, and second redirect. - assert len(responses.calls) == 3 - # The final response is the one from the second redirect. - assert response.status_code == 302 - # The Location header should point to the third URL. - assert response.headers.get("location") == "/redirect3" + # Exceeding max_redirects now raises TooManyRedirects (the behavior of + # requests.Session), instead of silently returning the last 3xx response. + with pytest.raises(requests.exceptions.TooManyRedirects): + session.post(initial_url, data={"key": "value"}, max_redirects=2, preserve_post=True) @responses.activate def test_post_redirect_no_location(): url = "http://connect.example.com/api" - # Simulate a redirect response that lacks a Location header. responses.add(responses.POST, url, status=302, headers={}) session = Session() response = session.post(url, data={"key": "value"}) - # The loop should break immediately since there is no location to follow. + # With no Location header, requests stops following redirects. assert len(responses.calls) == 1 assert response.status_code == 302 @responses.activate -def test_post_redirect_location_none_explicit(): - url = "http://connect.example.com/api" +def test_post_cross_origin_redirect_strips_authorization(): + """Session-level Authorization must not be forwarded across origins. - # Use a callback to explicitly return a None for the "location" header. - def request_callback(request): - return (302, {"location": ""}, "Redirect without location") + This was the vulnerability that motivated the rewrite. Because the class + now inherits ``SessionRedirectMixin.rebuild_auth``, cross-origin hops + automatically drop the ``Authorization`` header. + """ + initial_url = "https://connect.example.com/api" + attacker_url = "https://attacker.example/collect" - responses.add_callback(responses.POST, url, callback=request_callback) + responses.add(responses.POST, initial_url, status=302, headers={"location": attacker_url}) + responses.add(responses.POST, attacker_url, json={}, status=200) session = Session() - response = session.post(url, data={"key": "value"}) + session.headers["Authorization"] = "Key super-secret" + session.post(initial_url, data={"key": "value"}, preserve_post=True) - # The redirect loop should break since location is None. - assert len(responses.calls) == 1 - assert response.status_code == 302 + assert len(responses.calls) == 2 + # First hop carries the credential; cross-origin follow-up must not. + assert responses.calls[0].request.headers.get("Authorization") == "Key super-secret" + assert "Authorization" not in responses.calls[1].request.headers + + +@responses.activate +def test_post_same_origin_redirect_preserves_authorization(): + initial_url = "https://connect.example.com/api" + follow_url = "https://connect.example.com/next" + + responses.add(responses.POST, initial_url, status=302, headers={"location": "/next"}) + responses.add(responses.POST, follow_url, json={}, status=200) + + session = Session() + session.headers["Authorization"] = "Key super-secret" + session.post(initial_url, data={"key": "value"}, preserve_post=True) + + assert len(responses.calls) == 2 + assert responses.calls[0].request.headers.get("Authorization") == "Key super-secret" + assert responses.calls[1].request.headers.get("Authorization") == "Key super-secret" From 5d5e3778fe0bc94c398a73ded1b5355af42603cd Mon Sep 17 00:00:00 2001 From: Taylor Date: Wed, 8 Apr 2026 23:31:25 -0400 Subject: [PATCH 2/2] test: add integration tests --- integration/tests/posit/test_sessions.py | 188 +++++++++++++++++++++++ 1 file changed, 188 insertions(+) create mode 100644 integration/tests/posit/test_sessions.py diff --git a/integration/tests/posit/test_sessions.py b/integration/tests/posit/test_sessions.py new file mode 100644 index 00000000..9bb34620 --- /dev/null +++ b/integration/tests/posit/test_sessions.py @@ -0,0 +1,188 @@ +"""Integration tests for ``posit.connect.sessions.Session``. + +These tests exercise the real ``requests`` / ``urllib3`` / socket stack +against a local HTTP server, rather than monkey-patching ``HTTPAdapter.send`` +the way unit tests with ``responses`` do. That lets us verify that the +redirect semantics we rely on (POST body preservation on 301/302, +``Authorization`` stripping on cross-origin hops, ``response.history`` +population, ``TooManyRedirects`` on overflow) actually hold end-to-end. + +The server is an in-process ``http.server`` instance on ``127.0.0.1`` — no +Connect instance is required, so these run in any environment. +""" + +from __future__ import annotations + +import threading +from http.server import BaseHTTPRequestHandler, HTTPServer +from typing import Callable, List, Tuple + +import pytest +import requests + +from posit.connect.sessions import Session + + +class _Recorder: + """Shared state between the test and the request handler thread.""" + + def __init__(self) -> None: + self.requests: List[Tuple[str, str, dict, bytes]] = [] # method, path, headers, body + self.responder: Callable[[BaseHTTPRequestHandler], None] | None = None + + +def _make_handler(recorder: _Recorder): + class Handler(BaseHTTPRequestHandler): + def log_message(self, format, *args): # noqa: A002 - silence default stderr logging + pass + + def _record(self) -> None: + length = int(self.headers.get("Content-Length") or 0) + body = self.rfile.read(length) if length else b"" + recorder.requests.append( + (self.command, self.path, dict(self.headers), body), + ) + + def do_GET(self) -> None: # noqa: N802 + self._record() + assert recorder.responder is not None + recorder.responder(self) + + def do_POST(self) -> None: # noqa: N802 + self._record() + assert recorder.responder is not None + recorder.responder(self) + + return Handler + + +@pytest.fixture +def server(): + recorder = _Recorder() + httpd = HTTPServer(("127.0.0.1", 0), _make_handler(recorder)) + thread = threading.Thread(target=httpd.serve_forever, daemon=True) + thread.start() + host, port = httpd.server_address + base = f"http://{host}:{port}" + try: + yield base, recorder + finally: + httpd.shutdown() + httpd.server_close() + thread.join(timeout=5) + + +def _ok(handler: BaseHTTPRequestHandler) -> None: + handler.send_response(200) + handler.send_header("Content-Type", "application/json") + handler.send_header("Content-Length", "2") + handler.end_headers() + handler.wfile.write(b"{}") + + +def _redirect(status: int, location: str) -> Callable[[BaseHTTPRequestHandler], None]: + def respond(handler: BaseHTTPRequestHandler) -> None: + handler.send_response(status) + handler.send_header("Location", location) + handler.send_header("Content-Length", "0") + handler.end_headers() + + return respond + + +def test_post_preserves_body_across_302(server): + """A POST that 302s to a new path re-POSTs the body (libcurl POSTREDIR).""" + base, recorder = server + hops = iter([_redirect(302, "/next"), _ok]) + recorder.responder = lambda h: next(hops)(h) + + session = Session() + response = session.post(f"{base}/start", data=b"payload-bytes", preserve_post=True) + + assert response.status_code == 200 + assert len(recorder.requests) == 2 + assert recorder.requests[0][0] == "POST" + assert recorder.requests[0][3] == b"payload-bytes" + assert recorder.requests[1][0] == "POST" + assert recorder.requests[1][3] == b"payload-bytes" + # response.history should carry the 302 hop. + assert [r.status_code for r in response.history] == [302] + + +def test_post_downgrades_to_get_when_preserve_post_false(server): + base, recorder = server + hops = iter([_redirect(302, "/next"), _ok]) + recorder.responder = lambda h: next(hops)(h) + + session = Session() + response = session.post(f"{base}/start", data=b"payload", preserve_post=False) + + assert response.status_code == 200 + assert recorder.requests[0][0] == "POST" + assert recorder.requests[1][0] == "GET" + assert recorder.requests[1][3] == b"" # body dropped on downgrade + + +@pytest.mark.parametrize("status", [307, 308]) +def test_post_preserves_method_on_307_308(server, status): + base, recorder = server + hops = iter([_redirect(status, "/next"), _ok]) + recorder.responder = lambda h: next(hops)(h) + + session = Session() + response = session.post(f"{base}/start", data=b"payload", preserve_post=False) + + assert response.status_code == 200 + assert recorder.requests[1][0] == "POST" + assert recorder.requests[1][3] == b"payload" + + +def test_cross_origin_redirect_strips_authorization(server): + """A cross-origin redirect must not forward the Authorization header. + + We use ``127.0.0.1`` as the request origin and redirect to ``localhost`` + on the same port — different hostnames, so ``rebuild_auth`` must strip + the session-level Authorization header on the follow-up hop. + """ + base, recorder = server + _, port = base.rsplit(":", 1) + cross_origin_next = f"http://localhost:{port}/collect" + + hops = iter([_redirect(302, cross_origin_next), _ok]) + recorder.responder = lambda h: next(hops)(h) + + session = Session() + session.headers["Authorization"] = "Key super-secret-token" + session.post(f"{base}/start", data=b"payload", preserve_post=True) + + assert len(recorder.requests) == 2 + assert recorder.requests[0][2].get("Authorization") == "Key super-secret-token" + # The cross-origin second hop must NOT carry the Authorization header. + assert "Authorization" not in recorder.requests[1][2] + assert "authorization" not in {k.lower() for k in recorder.requests[1][2]} + + +def test_same_origin_redirect_preserves_authorization(server): + base, recorder = server + hops = iter([_redirect(302, "/next"), _ok]) + recorder.responder = lambda h: next(hops)(h) + + session = Session() + session.headers["Authorization"] = "Key super-secret-token" + session.post(f"{base}/start", data=b"payload", preserve_post=True) + + assert len(recorder.requests) == 2 + assert recorder.requests[0][2].get("Authorization") == "Key super-secret-token" + assert recorder.requests[1][2].get("Authorization") == "Key super-secret-token" + + +def test_max_redirects_raises_too_many_redirects(server): + base, recorder = server + recorder.responder = _redirect(302, "/loop") + + session = Session() + with pytest.raises(requests.exceptions.TooManyRedirects): + session.post(f"{base}/start", data=b"payload", max_redirects=3, preserve_post=True) + + # Initial POST + 3 followed redirects before requests raises. + assert len(recorder.requests) == 4