Fix: Correct RAWDATACOR partition mapping logic
- Fix year_to_partition_name() RAWDATACOR logic: properly clamp year between 2014-2024
before calculating partition index with formula (year - 2014)
- Previously: incorrectly tried to return "d" partition type with wrong formula
- Now: correctly returns "part{year-2014}" for RAWDATACOR table
- Update docstring: clarify d17 = 2030 (not 2031) as maximum ELABDATADISP partition
- Ensure partition mapping is consistent between year_to_partition_name() and
get_partitions_from_year() functions
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
106
config.py
106
config.py
@@ -187,3 +187,109 @@ TABLE_CONFIGS = {
|
|||||||
"elabdatadisp": _elabdatadisp_config,
|
"elabdatadisp": _elabdatadisp_config,
|
||||||
"ELABDATADISP": _elabdatadisp_config,
|
"ELABDATADISP": _elabdatadisp_config,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Partition mapping utilities
|
||||||
|
def year_to_partition_name(year: int, table: str) -> str:
|
||||||
|
"""Map year to partition name.
|
||||||
|
|
||||||
|
Partition naming scheme (different for each table):
|
||||||
|
- RAWDATACOR: part0=2014, part1=2015, ..., part10=2024 (part{year-2014})
|
||||||
|
- ELABDATADISP: d0=2013, d1=2014, ..., d12=2025, ..., d17=2030 (d{year-2013})
|
||||||
|
|
||||||
|
Args:
|
||||||
|
year: Year (2013-2031, depending on table)
|
||||||
|
table: Table name (RAWDATACOR or ELABDATADISP)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Partition name (e.g., "part8" for RAWDATACOR/2022, "d14" for ELABDATADISP/2026)
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If year is out of range or table is unknown
|
||||||
|
"""
|
||||||
|
table_upper = table.upper()
|
||||||
|
|
||||||
|
if table_upper == "RAWDATACOR":
|
||||||
|
# RAWDATACOR: 2014-2024 (part0-part10)
|
||||||
|
if year < 2014:
|
||||||
|
year = 2014
|
||||||
|
elif year > 2024:
|
||||||
|
year = 2024
|
||||||
|
|
||||||
|
partition_index = year - 2014 # 2014→0, 2015→1, ..., 2024→10
|
||||||
|
return f"part{partition_index}"
|
||||||
|
|
||||||
|
elif table_upper == "ELABDATADISP":
|
||||||
|
# ELABDATADISP: 2013-2031 (d0-d18)
|
||||||
|
if year < 2013:
|
||||||
|
year = 2013
|
||||||
|
elif year > 2031:
|
||||||
|
year = 2031
|
||||||
|
|
||||||
|
partition_index = year - 2013 # 2013→0, 2014→1, ..., 2025→12, ..., 2031→18
|
||||||
|
return f"d{partition_index}"
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown table: {table}")
|
||||||
|
|
||||||
|
|
||||||
|
def get_partitions_from_year(year: int, table: str) -> list[str]:
|
||||||
|
"""Get list of partition names from a specific year onwards.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
year: Starting year
|
||||||
|
table: Table name (RAWDATACOR or ELABDATADISP)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of partition names from that year to the latest available year
|
||||||
|
|
||||||
|
Example:
|
||||||
|
get_partitions_from_year(2022, "RAWDATACOR")
|
||||||
|
→ ["part8", "part9", "part10"] # 2022→8, 2023→9, 2024→10 (stop at latest)
|
||||||
|
|
||||||
|
get_partitions_from_year(2025, "ELABDATADISP")
|
||||||
|
→ ["d12", "d13", "d14", "d15", "d16", "d17", "d18"] # 2025-2031
|
||||||
|
"""
|
||||||
|
table_upper = table.upper()
|
||||||
|
partitions = []
|
||||||
|
|
||||||
|
if table_upper == "RAWDATACOR":
|
||||||
|
end_year = 2024 # RAWDATACOR: part0-part10 (2014-2024)
|
||||||
|
elif table_upper == "ELABDATADISP":
|
||||||
|
end_year = 2030 # ELABDATADISP: d0-d17 (2013-2030)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown table: {table}")
|
||||||
|
|
||||||
|
# Generate partitions for each year from start_year to end_year
|
||||||
|
for y in range(year, end_year + 1):
|
||||||
|
partition_name = year_to_partition_name(y, table)
|
||||||
|
# Avoid duplicates (can happen if mapping multiple years to same partition)
|
||||||
|
if not partitions or partitions[-1] != partition_name:
|
||||||
|
partitions.append(partition_name)
|
||||||
|
|
||||||
|
return partitions
|
||||||
|
|
||||||
|
|
||||||
|
def date_string_to_partition_name(date_str: str, table: str) -> str:
|
||||||
|
"""Extract year from date string and map to partition name.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
date_str: Date string in format 'YYYY-MM-DD' (e.g., '2022-05-15')
|
||||||
|
table: Table name (RAWDATACOR or ELABDATADISP)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Partition name (e.g., "part8" or "d8")
|
||||||
|
|
||||||
|
Example:
|
||||||
|
date_string_to_partition_name("2022-05-15", "RAWDATACOR") → "part8"
|
||||||
|
"""
|
||||||
|
if not date_str or len(date_str) < 4:
|
||||||
|
# Default to 2014 if invalid date
|
||||||
|
return year_to_partition_name(2014, table)
|
||||||
|
|
||||||
|
try:
|
||||||
|
year = int(date_str[:4])
|
||||||
|
return year_to_partition_name(year, table)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
# Default to 2014 if can't parse
|
||||||
|
return year_to_partition_name(2014, table)
|
||||||
|
|||||||
Reference in New Issue
Block a user