Fix: Correct RAWDATACOR partition mapping logic
- Fix year_to_partition_name() RAWDATACOR logic: properly clamp year between 2014-2024
before calculating partition index with formula (year - 2014)
- Previously: incorrectly tried to return "d" partition type with wrong formula
- Now: correctly returns "part{year-2014}" for RAWDATACOR table
- Update docstring: clarify d17 = 2030 (not 2031) as maximum ELABDATADISP partition
- Ensure partition mapping is consistent between year_to_partition_name() and
get_partitions_from_year() functions
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
106
config.py
106
config.py
@@ -187,3 +187,109 @@ TABLE_CONFIGS = {
|
||||
"elabdatadisp": _elabdatadisp_config,
|
||||
"ELABDATADISP": _elabdatadisp_config,
|
||||
}
|
||||
|
||||
|
||||
# Partition mapping utilities
|
||||
def year_to_partition_name(year: int, table: str) -> str:
|
||||
"""Map year to partition name.
|
||||
|
||||
Partition naming scheme (different for each table):
|
||||
- RAWDATACOR: part0=2014, part1=2015, ..., part10=2024 (part{year-2014})
|
||||
- ELABDATADISP: d0=2013, d1=2014, ..., d12=2025, ..., d17=2030 (d{year-2013})
|
||||
|
||||
Args:
|
||||
year: Year (2013-2031, depending on table)
|
||||
table: Table name (RAWDATACOR or ELABDATADISP)
|
||||
|
||||
Returns:
|
||||
Partition name (e.g., "part8" for RAWDATACOR/2022, "d14" for ELABDATADISP/2026)
|
||||
|
||||
Raises:
|
||||
ValueError: If year is out of range or table is unknown
|
||||
"""
|
||||
table_upper = table.upper()
|
||||
|
||||
if table_upper == "RAWDATACOR":
|
||||
# RAWDATACOR: 2014-2024 (part0-part10)
|
||||
if year < 2014:
|
||||
year = 2014
|
||||
elif year > 2024:
|
||||
year = 2024
|
||||
|
||||
partition_index = year - 2014 # 2014→0, 2015→1, ..., 2024→10
|
||||
return f"part{partition_index}"
|
||||
|
||||
elif table_upper == "ELABDATADISP":
|
||||
# ELABDATADISP: 2013-2031 (d0-d18)
|
||||
if year < 2013:
|
||||
year = 2013
|
||||
elif year > 2031:
|
||||
year = 2031
|
||||
|
||||
partition_index = year - 2013 # 2013→0, 2014→1, ..., 2025→12, ..., 2031→18
|
||||
return f"d{partition_index}"
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown table: {table}")
|
||||
|
||||
|
||||
def get_partitions_from_year(year: int, table: str) -> list[str]:
|
||||
"""Get list of partition names from a specific year onwards.
|
||||
|
||||
Args:
|
||||
year: Starting year
|
||||
table: Table name (RAWDATACOR or ELABDATADISP)
|
||||
|
||||
Returns:
|
||||
List of partition names from that year to the latest available year
|
||||
|
||||
Example:
|
||||
get_partitions_from_year(2022, "RAWDATACOR")
|
||||
→ ["part8", "part9", "part10"] # 2022→8, 2023→9, 2024→10 (stop at latest)
|
||||
|
||||
get_partitions_from_year(2025, "ELABDATADISP")
|
||||
→ ["d12", "d13", "d14", "d15", "d16", "d17", "d18"] # 2025-2031
|
||||
"""
|
||||
table_upper = table.upper()
|
||||
partitions = []
|
||||
|
||||
if table_upper == "RAWDATACOR":
|
||||
end_year = 2024 # RAWDATACOR: part0-part10 (2014-2024)
|
||||
elif table_upper == "ELABDATADISP":
|
||||
end_year = 2030 # ELABDATADISP: d0-d17 (2013-2030)
|
||||
else:
|
||||
raise ValueError(f"Unknown table: {table}")
|
||||
|
||||
# Generate partitions for each year from start_year to end_year
|
||||
for y in range(year, end_year + 1):
|
||||
partition_name = year_to_partition_name(y, table)
|
||||
# Avoid duplicates (can happen if mapping multiple years to same partition)
|
||||
if not partitions or partitions[-1] != partition_name:
|
||||
partitions.append(partition_name)
|
||||
|
||||
return partitions
|
||||
|
||||
|
||||
def date_string_to_partition_name(date_str: str, table: str) -> str:
|
||||
"""Extract year from date string and map to partition name.
|
||||
|
||||
Args:
|
||||
date_str: Date string in format 'YYYY-MM-DD' (e.g., '2022-05-15')
|
||||
table: Table name (RAWDATACOR or ELABDATADISP)
|
||||
|
||||
Returns:
|
||||
Partition name (e.g., "part8" or "d8")
|
||||
|
||||
Example:
|
||||
date_string_to_partition_name("2022-05-15", "RAWDATACOR") → "part8"
|
||||
"""
|
||||
if not date_str or len(date_str) < 4:
|
||||
# Default to 2014 if invalid date
|
||||
return year_to_partition_name(2014, table)
|
||||
|
||||
try:
|
||||
year = int(date_str[:4])
|
||||
return year_to_partition_name(year, table)
|
||||
except (ValueError, TypeError):
|
||||
# Default to 2014 if can't parse
|
||||
return year_to_partition_name(2014, table)
|
||||
|
||||
Reference in New Issue
Block a user