Source code for benchbox.platforms.clickhouse.adapter
"""Primary adapter for ClickHouse platforms."""
from __future__ import annotations
import logging
from benchbox.platforms.base import PlatformAdapter
from benchbox.utils.dependencies import check_platform_dependencies, get_dependency_error_message
from .diagnostics import ClickHouseDiagnosticsMixin
from .metadata import ClickHouseMetadataMixin
from .setup import ClickHouseSetupMixin
from .tuning import ClickHouseTuningMixin
from .workload import ClickHouseWorkloadMixin
logger = logging.getLogger(__name__)
[docs]
class ClickHouseAdapter(
ClickHouseMetadataMixin,
ClickHouseSetupMixin,
ClickHouseDiagnosticsMixin,
ClickHouseWorkloadMixin,
ClickHouseTuningMixin,
PlatformAdapter,
):
"""High-level adapter coordinating ClickHouse operations.
Known Limitations:
TPC-DS queries with known incompatibilities:
- Query 14: INTERSECT DISTINCT requires manual alias addition
- Query 30: Query plan cloning not implemented for aggregation steps (Code: 48)
- Query 66: Nested aggregation not supported - requires query rewrite
- Query 81: Query plan cloning not implemented for aggregation steps (Code: 48)
These queries may fail even with transformations applied and require
manual query rewriting or ClickHouse engine improvements.
"""
# Known incompatible queries that may fail despite transformations
KNOWN_INCOMPATIBLE_QUERIES = {
"tpcds": [14, 30, 66, 81],
}
[docs]
def __init__(self, **config):
super().__init__(**config)
self._dialect = "clickhouse"
# Determine deployment mode with priority:
# 1. deployment_mode (from factory via colon syntax: clickhouse:local)
# 2. mode (legacy config key)
# Default to 'local' (easiest onboarding - no credentials required)
deployment_mode = config.get("deployment_mode")
legacy_mode = config.get("mode")
if deployment_mode:
self.deployment_mode = deployment_mode.lower()
elif legacy_mode:
self.deployment_mode = legacy_mode.lower()
# Log deprecation warning for legacy mode config key
logger.warning(
"Config key 'mode' is deprecated. Use deployment mode syntax "
"(clickhouse:local, clickhouse:server) or 'deployment_mode' config key."
)
else:
self.deployment_mode = "local" # Default to local (chDB)
# Support 'embedded' as alias for 'local' for backward compatibility
if self.deployment_mode == "embedded":
self.deployment_mode = "local"
# Validate deployment mode
valid_modes = {"local", "server", "cloud"}
if self.deployment_mode not in valid_modes:
raise ValueError(
f"Invalid ClickHouse deployment mode '{self.deployment_mode}'. "
f"Valid modes: {', '.join(sorted(valid_modes))}"
)
# Mode-specific validation and setup
if self.deployment_mode == "server":
available, missing = check_platform_dependencies("clickhouse", ["clickhouse-driver"])
if not available:
error_msg = get_dependency_error_message("clickhouse", missing)
raise ImportError(error_msg)
self._setup_server_mode(config)
elif self.deployment_mode == "local":
import importlib.util
if importlib.util.find_spec("chdb") is None:
raise ImportError(
"ClickHouse local mode requires chDB but it is not installed.\n"
"To resolve this issue:\n"
" 1. Install chDB: uv add chdb\n"
" 2. Or switch to server mode: --platform clickhouse:server\n"
" 3. Or use a different platform (e.g., DuckDB)\n"
"\nFor more information about chDB, visit: https://github.com/chdb-io/chdb"
)
self._setup_local_mode(config)
elif self.deployment_mode == "cloud":
# Check for clickhouse-connect dependency
import importlib.util
if importlib.util.find_spec("clickhouse_connect") is None:
raise ImportError(
"ClickHouse Cloud mode requires clickhouse-connect but it is not installed.\n"
"To resolve this issue:\n"
" 1. Install clickhouse-connect: uv add clickhouse-connect\n"
" 2. Or use local mode: --platform clickhouse:local\n"
"\nFor more information, visit: https://clickhouse.com/docs/en/integrations/python"
)
self._setup_cloud_mode(config)
# Store deployment_mode as mode for backward compatibility with existing code
self.mode = self.deployment_mode
__all__ = ["ClickHouseAdapter"]