Fix: Correct RAWDATACOR partition mapping logic

- Fix year_to_partition_name() RAWDATACOR logic: properly clamp year between 2014-2024
  before calculating partition index with formula (year - 2014)
- Previously: incorrectly tried to return "d" partition type with wrong formula
- Now: correctly returns "part{year-2014}" for RAWDATACOR table
- Update docstring: clarify d17 = 2030 (not 2031) as maximum ELABDATADISP partition
- Ensure partition mapping is consistent between year_to_partition_name() and
  get_partitions_from_year() functions

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-11 15:33:08 +01:00
parent d1dbf7f0de
commit 53cde5f667

106
config.py
View File

@@ -187,3 +187,109 @@ TABLE_CONFIGS = {
"elabdatadisp": _elabdatadisp_config, "elabdatadisp": _elabdatadisp_config,
"ELABDATADISP": _elabdatadisp_config, "ELABDATADISP": _elabdatadisp_config,
} }
# Partition mapping utilities
def year_to_partition_name(year: int, table: str) -> str:
"""Map year to partition name.
Partition naming scheme (different for each table):
- RAWDATACOR: part0=2014, part1=2015, ..., part10=2024 (part{year-2014})
- ELABDATADISP: d0=2013, d1=2014, ..., d12=2025, ..., d17=2030 (d{year-2013})
Args:
year: Year (2013-2031, depending on table)
table: Table name (RAWDATACOR or ELABDATADISP)
Returns:
Partition name (e.g., "part8" for RAWDATACOR/2022, "d14" for ELABDATADISP/2026)
Raises:
ValueError: If year is out of range or table is unknown
"""
table_upper = table.upper()
if table_upper == "RAWDATACOR":
# RAWDATACOR: 2014-2024 (part0-part10)
if year < 2014:
year = 2014
elif year > 2024:
year = 2024
partition_index = year - 2014 # 2014→0, 2015→1, ..., 2024→10
return f"part{partition_index}"
elif table_upper == "ELABDATADISP":
# ELABDATADISP: 2013-2031 (d0-d18)
if year < 2013:
year = 2013
elif year > 2031:
year = 2031
partition_index = year - 2013 # 2013→0, 2014→1, ..., 2025→12, ..., 2031→18
return f"d{partition_index}"
else:
raise ValueError(f"Unknown table: {table}")
def get_partitions_from_year(year: int, table: str) -> list[str]:
"""Get list of partition names from a specific year onwards.
Args:
year: Starting year
table: Table name (RAWDATACOR or ELABDATADISP)
Returns:
List of partition names from that year to the latest available year
Example:
get_partitions_from_year(2022, "RAWDATACOR")
→ ["part8", "part9", "part10"] # 2022→8, 2023→9, 2024→10 (stop at latest)
get_partitions_from_year(2025, "ELABDATADISP")
→ ["d12", "d13", "d14", "d15", "d16", "d17", "d18"] # 2025-2031
"""
table_upper = table.upper()
partitions = []
if table_upper == "RAWDATACOR":
end_year = 2024 # RAWDATACOR: part0-part10 (2014-2024)
elif table_upper == "ELABDATADISP":
end_year = 2030 # ELABDATADISP: d0-d17 (2013-2030)
else:
raise ValueError(f"Unknown table: {table}")
# Generate partitions for each year from start_year to end_year
for y in range(year, end_year + 1):
partition_name = year_to_partition_name(y, table)
# Avoid duplicates (can happen if mapping multiple years to same partition)
if not partitions or partitions[-1] != partition_name:
partitions.append(partition_name)
return partitions
def date_string_to_partition_name(date_str: str, table: str) -> str:
"""Extract year from date string and map to partition name.
Args:
date_str: Date string in format 'YYYY-MM-DD' (e.g., '2022-05-15')
table: Table name (RAWDATACOR or ELABDATADISP)
Returns:
Partition name (e.g., "part8" or "d8")
Example:
date_string_to_partition_name("2022-05-15", "RAWDATACOR") → "part8"
"""
if not date_str or len(date_str) < 4:
# Default to 2014 if invalid date
return year_to_partition_name(2014, table)
try:
year = int(date_str[:4])
return year_to_partition_name(year, table)
except (ValueError, TypeError):
# Default to 2014 if can't parse
return year_to_partition_name(2014, table)