Source code for benchbox.utils.scale_factor
"""Scale factor formatting utilities for consistent naming across BenchBox.
Copyright 2026 Joe Harris / BenchBox Project
Licensed under the MIT License. See LICENSE file in the project root for details.
"""
[docs]
def format_scale_factor(scale_factor: float) -> str:
"""Format scale factor for filenames and identifiers.
Rules:
- Values >= 1: No leading zero (sf1, sf10, sf100)
- Values < 1: Leading zero + decimal digits (sf01, sf001, sf0001)
- Leading zero implies the value is < 1
Examples:
- 1.0 -> sf1 (no leading zero)
- 0.1 -> sf01 (leading zero implies < 1)
- 0.01 -> sf001 (leading zero implies < 1)
- 0.001 -> sf0001 (leading zero implies < 1)
- 10 -> sf10 (no leading zero)
- 1.5 -> sf15 (remove decimal point, no leading zero)
Args:
scale_factor: The scale factor to format
Returns:
Formatted scale factor string (e.g., "sf1", "sf01", "sf001")
"""
if scale_factor >= 1:
if scale_factor == int(scale_factor):
# Integer values >= 1: no leading zero
return f"sf{int(scale_factor)}"
else:
# Non-integer values >= 1: remove decimal point
# 1.5 -> sf15, 2.25 -> sf225
str_val = f"{scale_factor}".replace(".", "")
return f"sf{str_val}"
else:
# Values < 1: leading zero + decimal digits only
# Convert to string, remove "0.", add leading zero
decimal_str = f"{scale_factor:.10f}".rstrip("0") # Remove trailing zeros
if "." in decimal_str:
after_decimal = decimal_str.split(".")[1]
return f"sf0{after_decimal}"
else:
# Edge case: exactly 0
return "sf0"
[docs]
def format_benchmark_name(benchmark_name: str, scale_factor: float) -> str:
"""Format benchmark name with scale factor.
Args:
benchmark_name: Name of the benchmark (e.g., "tpch", "tpcds")
scale_factor: Scale factor value
Returns:
Formatted benchmark name (e.g., "tpch_sf1", "tpcds_sf01")
"""
sf_str = format_scale_factor(scale_factor)
return f"{benchmark_name}_{sf_str}"
[docs]
def format_data_directory(benchmark_name: str, scale_factor: float) -> str:
"""Format data directory name with scale factor.
Args:
benchmark_name: Name of the benchmark
scale_factor: Scale factor value
Returns:
Formatted directory name (e.g., "tpch_sf1_data", "tpcds_sf01_data")
"""
sf_str = format_scale_factor(scale_factor)
return f"{benchmark_name}_{sf_str}_data"
[docs]
def format_schema_name(benchmark_name: str, scale_factor: float) -> str:
"""Format database schema name with scale factor.
Args:
benchmark_name: Name of the benchmark
scale_factor: Scale factor value
Returns:
Formatted schema name (e.g., "tpch_sf1", "tpcds_sf01")
"""
sf_str = format_scale_factor(scale_factor)
return f"{benchmark_name}_{sf_str}"