Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ The supported method of passing ClickHouse server settings is to prefix such arg

## UNRELEASED

### Improvements
- Added `utc_tz_aware="schema"` mode which returns timezone-aware datetimes only when the server's column schema explicitly defines a timezone (e.g. `DateTime('UTC')`), and naive datetimes for bare `DateTime` columns. This matches the ClickHouse schema definition exactly. Not yet supported for Arrow-based query methods. Closes [#645](https://github.com/ClickHouse/clickhouse-connect/issues/645)

### Bug Fixes
- Recognize `UPDATE` as a command so lightweight updates work correctly via `client.query()` and SQLAlchemy.

Expand Down
12 changes: 8 additions & 4 deletions clickhouse_connect/driver/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,10 @@ def create_client(*,
:param server_host_name This is the server host name that will be checked against a TLS certificate for
validity. This option can be used if using an ssh_tunnel or other indirect means to an ClickHouse server
where the `host` argument refers to the tunnel or proxy and not the actual ClickHouse server
:param utc_tz_aware When True, ClickHouse Connect will return timezone-aware UTC datetimes instead of the
legacy naive UTC datetimes.
:param utc_tz_aware Controls timezone-aware behavior for UTC DateTime columns. False (default) returns
naive UTC timestamps. True forces timezone-aware UTC datetimes. "schema" returns datetimes that
match the server's column definition which means timezone-aware when the column defines a timezone and naive
for bare DateTime columns.
:param autogenerate_session_id If set, this will override the 'autogenerate_session_id' common setting.
:param form_encode_query_params If True, query parameters will be sent as form-encoded data in the request body
instead of as URL parameters. This is useful for queries with large parameter sets that might exceed URL length
Expand Down Expand Up @@ -205,8 +207,10 @@ async def create_async_client(*,
:param server_host_name This is the server host name that will be checked against a TLS certificate for
validity. This option can be used if using an ssh_tunnel or other indirect means to an ClickHouse server
where the `host` argument refers to the tunnel or proxy and not the actual ClickHouse server
:param utc_tz_aware When True, ClickHouse Connect will return timezone-aware UTC datetimes instead of the
legacy naive UTC datetimes.
:param utc_tz_aware Controls timezone-aware behavior for UTC DateTime columns. False (default) returns
naive UTC timestamps. True forces timezone-aware UTC datetimes. "schema" returns datetimes that
match the server's column definition which means timezone-aware when the column defines a timezone and naive
for bare DateTime columns.
:param autogenerate_session_id If set, this will override the 'autogenerate_session_id' common setting.
:param form_encode_query_params If True, query parameters will be sent as form-encoded data in the request body
instead of as URL parameters. This is useful for queries with large parameter sets that might exceed URL length
Expand Down
14 changes: 7 additions & 7 deletions clickhouse_connect/driver/asyncclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
from concurrent.futures.thread import ThreadPoolExecutor
from datetime import tzinfo
from typing import Optional, Union, Dict, Any, Sequence, Iterable, Generator, BinaryIO
from typing import Literal, Optional, Union, Dict, Any, Sequence, Iterable, Generator, BinaryIO

from clickhouse_connect.driver.client import Client
from clickhouse_connect.driver.common import StreamContext
Expand Down Expand Up @@ -110,7 +110,7 @@ async def query(self,
context: QueryContext = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
utc_tz_aware: Optional[bool] = None,
utc_tz_aware: Optional[Union[bool, Literal["schema"]]] = None,
external_data: Optional[ExternalData] = None,
transport_settings: Optional[Dict[str, str]] = None) -> QueryResult:
"""
Expand Down Expand Up @@ -142,7 +142,7 @@ async def query_column_block_stream(self,
context: QueryContext = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
utc_tz_aware: Optional[bool] = None,
utc_tz_aware: Optional[Union[bool, Literal["schema"]]] = None,
external_data: Optional[ExternalData] = None,
transport_settings: Optional[Dict[str, str]] = None,
) -> StreamContext:
Expand Down Expand Up @@ -175,7 +175,7 @@ async def query_row_block_stream(self,
context: QueryContext = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
utc_tz_aware: Optional[bool] = None,
utc_tz_aware: Optional[Union[bool, Literal["schema"]]] = None,
external_data: Optional[ExternalData] = None,
transport_settings: Optional[Dict[str, str]] = None) -> StreamContext:
"""
Expand Down Expand Up @@ -207,7 +207,7 @@ async def query_rows_stream(self,
context: QueryContext = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
utc_tz_aware: Optional[bool] = None,
utc_tz_aware: Optional[Union[bool, Literal["schema"]]] = None,
external_data: Optional[ExternalData] = None,
transport_settings: Optional[Dict[str, str]] = None) -> StreamContext:
"""
Expand Down Expand Up @@ -354,7 +354,7 @@ async def query_df(self,
use_na_values: Optional[bool] = None,
query_tz: Optional[str] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
utc_tz_aware: Optional[bool] = None,
utc_tz_aware: Optional[Union[bool, Literal["schema"]]] = None,
context: QueryContext = None,
external_data: Optional[ExternalData] = None,
use_extended_dtypes: Optional[bool] = None,
Expand Down Expand Up @@ -430,7 +430,7 @@ async def query_df_stream(self,
use_na_values: Optional[bool] = None,
query_tz: Optional[str] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
utc_tz_aware: Optional[bool] = None,
utc_tz_aware: Optional[Union[bool, Literal["schema"]]] = None,
context: QueryContext = None,
external_data: Optional[ExternalData] = None,
use_extended_dtypes: Optional[bool] = None,
Expand Down
67 changes: 45 additions & 22 deletions clickhouse_connect/driver/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytz

from abc import ABC, abstractmethod
from typing import Iterable, Optional, Any, Union, Sequence, Dict, Generator, BinaryIO
from typing import Iterable, Literal, Optional, Any, Union, Sequence, Dict, Generator, BinaryIO
from pytz.exceptions import UnknownTimeZoneError

from clickhouse_connect import common
Expand Down Expand Up @@ -53,6 +53,25 @@ def _strip_utc_timezone_from_arrow(table: "arrow.Table") -> "arrow.Table":
return table


def _apply_arrow_tz_policy(table: "arrow.Table", utc_tz_aware: Union[bool, Literal["schema"]]) -> "arrow.Table":
"""Apply the utc_tz_aware policy to an Arrow table before conversion.

Handles UTC stripping when utc_tz_aware is False and warns when
utc_tz_aware is "schema" since that mode is not yet implemented for
Arrow-based queries.
"""
if utc_tz_aware == "schema":
logger.warning(
'utc_tz_aware="schema" is not yet supported for Arrow-based query methods. '
"It would require a separate schema lookup since ClickHouse attaches the server "
"timezone to all DateTime columns in Arrow format. Use query/query_df for "
"schema-matching behavior or open an issue if you need Arrow support."
)
if not utc_tz_aware:
table = _strip_utc_timezone_from_arrow(table)
return table


# pylint: disable=too-many-lines
# pylint: disable=too-many-public-methods,too-many-arguments,too-many-positional-arguments,too-many-instance-attributes
class Client(ABC):
Expand All @@ -67,7 +86,7 @@ class Client(ABC):
database = None
max_error_message = 0
apply_server_timezone = False
utc_tz_aware = False
utc_tz_aware: Union[bool, Literal["schema"]] = False
show_clickhouse_errors = True

def __init__(self,
Expand All @@ -77,15 +96,17 @@ def __init__(self,
query_retries: int,
server_host_name: Optional[str],
apply_server_timezone: Optional[Union[str, bool]],
utc_tz_aware: Optional[bool],
utc_tz_aware: Optional[Union[bool, Literal["schema"]]],
show_clickhouse_errors: Optional[bool]):
"""
Shared initialization of ClickHouse Connect client
:param database: database name
:param query_limit: default LIMIT for queries
:param uri: uri for error messages
:param utc_tz_aware: Default timezone behavior when the active timezone resolves to UTC. If True,
timezone-aware UTC datetimes are returned; otherwise legacy naive datetimes are used.
:param utc_tz_aware: Controls timezone-aware behavior for UTC DateTime columns. False (default) returns
naive UTC timestamps. True forces timezone-aware UTC datetimes. "schema" returns datetimes that
match the server's column definition which means timezone-aware when the column defines a timezone and naive
for bare DateTime columns.
"""
self.query_limit = coerce_int(query_limit)
self.query_retries = coerce_int(query_retries)
Expand All @@ -95,7 +116,12 @@ def __init__(self,
self.show_clickhouse_errors = coerce_bool(show_clickhouse_errors)
self.server_host_name = server_host_name
self.uri = uri
self.utc_tz_aware = bool(utc_tz_aware)
if isinstance(utc_tz_aware, str):
if utc_tz_aware != "schema":
raise ProgrammingError(f'utc_tz_aware must be True, False, or "schema", got "{utc_tz_aware}"')
self.utc_tz_aware = utc_tz_aware
else:
self.utc_tz_aware = bool(utc_tz_aware)
self._init_common_settings(apply_server_timezone)

def _init_common_settings(self, apply_server_timezone: Optional[Union[str, bool]]):
Expand Down Expand Up @@ -241,7 +267,7 @@ def query(self,
context: QueryContext = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
utc_tz_aware: Optional[bool] = None,
utc_tz_aware: Optional[Union[bool, Literal["schema"]]] = None,
external_data: Optional[ExternalData] = None,
transport_settings: Optional[Dict[str, str]] = None) -> QueryResult:
"""
Expand Down Expand Up @@ -277,7 +303,7 @@ def query_column_block_stream(self,
context: QueryContext = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
utc_tz_aware: Optional[bool] = None,
utc_tz_aware: Optional[Union[bool, Literal["schema"]]] = None,
external_data: Optional[ExternalData] = None,
transport_settings: Optional[Dict[str, str]] = None) -> StreamContext:
"""
Expand All @@ -298,7 +324,7 @@ def query_row_block_stream(self,
context: QueryContext = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
utc_tz_aware: Optional[bool] = None,
utc_tz_aware: Optional[Union[bool, Literal["schema"]]] = None,
external_data: Optional[ExternalData] = None,
transport_settings: Optional[Dict[str, str]] = None) -> StreamContext:
"""
Expand All @@ -319,7 +345,7 @@ def query_rows_stream(self,
context: QueryContext = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
utc_tz_aware: Optional[bool] = None,
utc_tz_aware: Optional[Union[bool, Literal["schema"]]] = None,
external_data: Optional[ExternalData] = None,
transport_settings: Optional[Dict[str, str]] = None) -> StreamContext:
"""
Expand Down Expand Up @@ -428,7 +454,7 @@ def query_df(self,
use_na_values: Optional[bool] = None,
query_tz: Optional[str] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
utc_tz_aware: Optional[bool] = None,
utc_tz_aware: Optional[Union[bool, Literal["schema"]]] = None,
context: QueryContext = None,
external_data: Optional[ExternalData] = None,
use_extended_dtypes: Optional[bool] = None,
Expand All @@ -455,7 +481,7 @@ def query_df_stream(self,
use_na_values: Optional[bool] = None,
query_tz: Optional[str] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
utc_tz_aware: Optional[bool] = None,
utc_tz_aware: Optional[Union[bool, Literal["schema"]]] = None,
context: QueryContext = None,
external_data: Optional[ExternalData] = None,
use_extended_dtypes: Optional[bool] = None,
Expand Down Expand Up @@ -485,7 +511,7 @@ def create_query_context(self,
context: Optional[QueryContext] = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
utc_tz_aware: Optional[bool] = None,
utc_tz_aware: Optional[Union[bool, Literal["schema"]]] = None,
use_na_values: Optional[bool] = None,
streaming: bool = False,
as_pandas: bool = False,
Expand Down Expand Up @@ -515,7 +541,8 @@ def create_query_context(self,
:param column_tzs: A dictionary of column names to tzinfo objects (or strings that will be converted to
tzinfo objects). The timezone will be applied to datetime objects returned in the query
:param utc_tz_aware: Override the client default for handling UTC results. True forces timezone-aware
UTC datetimes while False returns naive UTC datetimes.
UTC datetimes, False returns naive UTC datetimes, and "schema" returns datetimes matching the
server's column definition.
:param use_na_values: Deprecated alias for use_advanced_dtypes
:param as_pandas Return the result columns as pandas.Series objects
:param streaming Marker used to correctly configure streaming queries
Expand Down Expand Up @@ -660,17 +687,15 @@ def query_df_arrow(self,
raise ProgrammingError("PyArrow-backed dtypes are only supported when using pandas 2.x.")

def converter(table: arrow.Table) -> pd.DataFrame:
if not self.utc_tz_aware:
table = _strip_utc_timezone_from_arrow(table)
table = _apply_arrow_tz_policy(table, self.utc_tz_aware)
return table.to_pandas(types_mapper=pd.ArrowDtype, safe=False)

elif dataframe_library == "polars":
check_polars()
self._add_integration_tag("polars")

def converter(table: arrow.Table) -> pl.DataFrame:
if not self.utc_tz_aware:
table = _strip_utc_timezone_from_arrow(table)
table = _apply_arrow_tz_policy(table, self.utc_tz_aware)
return pl.from_arrow(table)

else:
Expand Down Expand Up @@ -716,16 +741,14 @@ def query_df_arrow_stream(self,
raise ProgrammingError("PyArrow-backed dtypes are only supported when using pandas 2.x.")

def converter(table: "arrow.Table") -> "pd.DataFrame":
if not self.utc_tz_aware:
table = _strip_utc_timezone_from_arrow(table)
table = _apply_arrow_tz_policy(table, self.utc_tz_aware)
return table.to_pandas(types_mapper=pd.ArrowDtype, safe=False)
elif dataframe_library == "polars":
check_polars()
self._add_integration_tag("polars")

def converter(table: arrow.Table) -> pl.DataFrame:
if not self.utc_tz_aware:
table = _strip_utc_timezone_from_arrow(table)
table = _apply_arrow_tz_policy(table, self.utc_tz_aware)
return pl.from_arrow(table)
else:
raise ValueError(f"dataframe_library must be 'pandas' or 'polars', got '{dataframe_library}'")
Expand Down
4 changes: 2 additions & 2 deletions clickhouse_connect/driver/httpclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from importlib import import_module
from importlib.metadata import version as dist_version
from base64 import b64encode
from typing import Optional, Dict, Any, Sequence, Union, List, Callable, Generator, BinaryIO
from typing import Literal, Optional, Dict, Any, Sequence, Union, List, Callable, Generator, BinaryIO
from urllib.parse import urlencode

from urllib3 import Timeout
Expand Down Expand Up @@ -77,7 +77,7 @@ def __init__(self,
https_proxy: Optional[str] = None,
server_host_name: Optional[str] = None,
apply_server_timezone: Optional[Union[str, bool]] = None,
utc_tz_aware: Optional[bool] = None,
utc_tz_aware: Optional[Union[bool, Literal["schema"]]] = None,
show_clickhouse_errors: Optional[bool] = None,
autogenerate_session_id: Optional[bool] = None,
autogenerate_query_id: Optional[bool] = None,
Expand Down
Loading