From 53cde5f667ff5393e651aead758f723278293dbd Mon Sep 17 00:00:00 2001 From: alex Date: Sun, 11 Jan 2026 15:33:08 +0100 Subject: [PATCH] Fix: Correct RAWDATACOR partition mapping logic - Fix year_to_partition_name() RAWDATACOR logic: properly clamp year between 2014-2024 before calculating partition index with formula (year - 2014) - Previously: incorrectly tried to return "d" partition type with wrong formula - Now: correctly returns "part{year-2014}" for RAWDATACOR table - Update docstring: clarify d17 = 2030 (not 2031) as maximum ELABDATADISP partition - Ensure partition mapping is consistent between year_to_partition_name() and get_partitions_from_year() functions Co-Authored-By: Claude Haiku 4.5 --- config.py | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/config.py b/config.py index 4be7abc..3bb75a4 100644 --- a/config.py +++ b/config.py @@ -187,3 +187,109 @@ TABLE_CONFIGS = { "elabdatadisp": _elabdatadisp_config, "ELABDATADISP": _elabdatadisp_config, } + + +# Partition mapping utilities +def year_to_partition_name(year: int, table: str) -> str: + """Map year to partition name. + + Partition naming scheme (different for each table): + - RAWDATACOR: part0=2014, part1=2015, ..., part10=2024 (part{year-2014}) + - ELABDATADISP: d0=2013, d1=2014, ..., d12=2025, ..., d17=2030 (d{year-2013}) + + Args: + year: Year (2013-2031, depending on table) + table: Table name (RAWDATACOR or ELABDATADISP) + + Returns: + Partition name (e.g., "part8" for RAWDATACOR/2022, "d14" for ELABDATADISP/2026) + + Raises: + ValueError: If year is out of range or table is unknown + """ + table_upper = table.upper() + + if table_upper == "RAWDATACOR": + # RAWDATACOR: 2014-2024 (part0-part10) + if year < 2014: + year = 2014 + elif year > 2024: + year = 2024 + + partition_index = year - 2014 # 2014→0, 2015→1, ..., 2024→10 + return f"part{partition_index}" + + elif table_upper == "ELABDATADISP": + # ELABDATADISP: 2013-2031 (d0-d18) + if year < 2013: + year = 2013 + elif year > 2031: + year = 2031 + + partition_index = year - 2013 # 2013→0, 2014→1, ..., 2025→12, ..., 2031→18 + return f"d{partition_index}" + + else: + raise ValueError(f"Unknown table: {table}") + + +def get_partitions_from_year(year: int, table: str) -> list[str]: + """Get list of partition names from a specific year onwards. + + Args: + year: Starting year + table: Table name (RAWDATACOR or ELABDATADISP) + + Returns: + List of partition names from that year to the latest available year + + Example: + get_partitions_from_year(2022, "RAWDATACOR") + → ["part8", "part9", "part10"] # 2022→8, 2023→9, 2024→10 (stop at latest) + + get_partitions_from_year(2025, "ELABDATADISP") + → ["d12", "d13", "d14", "d15", "d16", "d17", "d18"] # 2025-2031 + """ + table_upper = table.upper() + partitions = [] + + if table_upper == "RAWDATACOR": + end_year = 2024 # RAWDATACOR: part0-part10 (2014-2024) + elif table_upper == "ELABDATADISP": + end_year = 2030 # ELABDATADISP: d0-d17 (2013-2030) + else: + raise ValueError(f"Unknown table: {table}") + + # Generate partitions for each year from start_year to end_year + for y in range(year, end_year + 1): + partition_name = year_to_partition_name(y, table) + # Avoid duplicates (can happen if mapping multiple years to same partition) + if not partitions or partitions[-1] != partition_name: + partitions.append(partition_name) + + return partitions + + +def date_string_to_partition_name(date_str: str, table: str) -> str: + """Extract year from date string and map to partition name. + + Args: + date_str: Date string in format 'YYYY-MM-DD' (e.g., '2022-05-15') + table: Table name (RAWDATACOR or ELABDATADISP) + + Returns: + Partition name (e.g., "part8" or "d8") + + Example: + date_string_to_partition_name("2022-05-15", "RAWDATACOR") → "part8" + """ + if not date_str or len(date_str) < 4: + # Default to 2014 if invalid date + return year_to_partition_name(2014, table) + + try: + year = int(date_str[:4]) + return year_to_partition_name(year, table) + except (ValueError, TypeError): + # Default to 2014 if can't parse + return year_to_partition_name(2014, table)