Skip to content

year_consistency

Year consistency checking logic extracted from YearRetriever.

This module handles year dominance calculation, parity detection, consensus checking, and anomalous track identification.

YearConsistencyChecker

YearConsistencyChecker(
    *,
    console_logger,
    top_years_count=TOP_YEARS_COUNT,
    parity_threshold=PARITY_THRESHOLD,
    dominance_min_share=DOMINANCE_MIN_SHARE,
    suspicion_threshold_years=DEFAULT_SUSPICION_THRESHOLD_YEARS
)

Handles year consistency analysis for album tracks.

Responsibilities: - Calculate dominant year using majority rule - Detect year parity between top candidates - Find consensus release year across tracks - Identify tracks with anomalous years

Initialize the year consistency checker.

Parameters:

Name Type Description Default
console_logger Logger

Logger for console output

required
top_years_count int

Number of top years to consider for parity

TOP_YEARS_COUNT
parity_threshold int

Max difference for parity detection

PARITY_THRESHOLD
dominance_min_share float

Min share of tracks for dominance (0.0-1.0)

DOMINANCE_MIN_SHARE
suspicion_threshold_years int

If dominant year is this many years older than earliest track added date, trigger API verification

DEFAULT_SUSPICION_THRESHOLD_YEARS
Source code in src/core/tracks/year_consistency.py
def __init__(
    self,
    *,
    console_logger: logging.Logger,
    top_years_count: int = TOP_YEARS_COUNT,
    parity_threshold: int = PARITY_THRESHOLD,
    dominance_min_share: float = DOMINANCE_MIN_SHARE,
    suspicion_threshold_years: int = DEFAULT_SUSPICION_THRESHOLD_YEARS,
) -> None:
    """Initialize the year consistency checker.

    Args:
        console_logger: Logger for console output
        top_years_count: Number of top years to consider for parity
        parity_threshold: Max difference for parity detection
        dominance_min_share: Min share of tracks for dominance (0.0-1.0)
        suspicion_threshold_years: If dominant year is this many years older
            than earliest track added date, trigger API verification

    """
    self.console_logger = console_logger
    self.top_years_count = top_years_count
    self.parity_threshold = parity_threshold
    self.dominance_min_share = dominance_min_share
    self.suspicion_threshold_years = suspicion_threshold_years

get_dominant_year

get_dominant_year(tracks)

Find dominant year among tracks using majority rule.

Calculates dominance based on ALL tracks in album, not just tracks for years. A year is dominant only if >50% of ALL album tracks have that year.

Note: Years "0" and empty strings are excluded from dominance calculation as they represent placeholder/default values in Music.app.

Parameters:

Name Type Description Default
tracks list[TrackDict]

List of ALL tracks in the album to analyze

required

Returns:

Type Description
str | None

Dominant year string if found, None if no clear majority or parity

Source code in src/core/tracks/year_consistency.py
def get_dominant_year(self, tracks: list[TrackDict]) -> str | None:
    """Find dominant year among tracks using majority rule.

    Calculates dominance based on ALL tracks in album, not just tracks for years.
    A year is dominant only if >50% of ALL album tracks have that year.

    Note: Years "0" and empty strings are excluded from dominance calculation
    as they represent placeholder/default values in Music.app.

    Args:
        tracks: List of ALL tracks in the album to analyze

    Returns:
        Dominant year string if found, None if no clear majority or parity

    """
    years = self._collect_valid_years(tracks)
    if not years:
        return None

    year_counts: Counter[str] = Counter(years)
    total_tracks = len(tracks)
    most_common: tuple[str, int] = year_counts.most_common(1)[0]

    # Check for release_year inconsistency case
    if result := self._check_release_year_inconsistency(tracks, years, most_common[0]):
        self._log_anomalous_tracks(tracks, result)
        return result

    # Check for clear majority
    majority_result, was_suspicious = self._check_majority_dominance(most_common, total_tracks, tracks)
    if majority_result:
        self._log_anomalous_tracks(tracks, majority_result)
        return majority_result
    if was_suspicious:
        # Year was dominant but suspicious - already logged, skip "no dominant" message
        return None

    # Handle collaboration albums (some empty years but otherwise consistent)
    if result := self._check_collaboration_pattern(year_counts, years, most_common, total_tracks, tracks):
        self._log_anomalous_tracks(tracks, result)
        return result

    # Check for parity
    if self._check_year_parity(year_counts):
        return None

    # Most frequent year but not a strong majority (genuinely below threshold)
    self.console_logger.info(
        "No dominant year (below %.0f%%): %s has %d/%d album tracks (%.1f%%) - need API",
        self.dominance_min_share * 100,
        most_common[0],
        most_common[1],
        total_tracks,
        (most_common[1] / total_tracks) * 100,
    )
    return None

get_most_common_year staticmethod

get_most_common_year(tracks)

Get most common year among tracks for comparison purposes.

Unlike get_dominant_year(), this method returns the most common year without any trust checks (suspicious year detection, dominance threshold). Used specifically for year-match comparison with API results.

Parameters:

Name Type Description Default
tracks list[TrackDict]

List of tracks to analyze

required

Returns:

Type Description
str | None

Most common year string, or None if no valid years found

Source code in src/core/tracks/year_consistency.py
@staticmethod
def get_most_common_year(tracks: list[TrackDict]) -> str | None:
    """Get most common year among tracks for comparison purposes.

    Unlike get_dominant_year(), this method returns the most common year
    without any trust checks (suspicious year detection, dominance threshold).
    Used specifically for year-match comparison with API results.

    Args:
        tracks: List of tracks to analyze

    Returns:
        Most common year string, or None if no valid years found
    """
    years = YearConsistencyChecker._collect_valid_years(tracks)
    if not years:
        return None

    year_counts: Counter[str] = Counter(years)
    return year_counts.most_common(1)[0][0]

get_consensus_release_year

get_consensus_release_year(tracks)

Get release_year if all tracks agree (consensus).

Parameters:

Name Type Description Default
tracks list[TrackDict]

List of tracks to check

required

Returns:

Type Description
str | None

Consensus release_year string if found, None otherwise

Source code in src/core/tracks/year_consistency.py
def get_consensus_release_year(self, tracks: list[TrackDict]) -> str | None:
    """Get release_year if all tracks agree (consensus).

    Args:
        tracks: List of tracks to check

    Returns:
        Consensus release_year string if found, None otherwise

    """
    release_years = [str(track.get("release_year")) for track in tracks if track.get("release_year")]

    if not release_years:
        return None

    # Check if ALL tracks have the same release_year (consensus)
    unique_years = set(release_years)
    if len(unique_years) == 1:
        year = next(iter(unique_years))
        if _is_reasonable_year(year):
            self.console_logger.info(
                "Consensus release_year: %s (all %d tracks agree)",
                year,
                len(release_years),
            )
            return year

    # Multiple release years - no consensus
    if len(unique_years) > 1:
        self.console_logger.info(
            "Multiple release_years found: %s - no consensus",
            ", ".join(f"{y} ({release_years.count(y)})" for y in unique_years),
        )

    return None

get_earliest_track_added_year staticmethod

get_earliest_track_added_year(tracks)

Extract earliest year any track was added to library.

Useful for detecting current year contamination - if tracks were added this year and library year is current year, it's likely legitimate.

Parameters:

Name Type Description Default
tracks list[TrackDict]

List of tracks to analyze

required

Returns:

Type Description
int | None

Earliest year a track was added, or None if no dates found

Source code in src/core/tracks/year_consistency.py
@staticmethod
def get_earliest_track_added_year(tracks: list[TrackDict]) -> int | None:
    """Extract earliest year any track was added to library.

    Useful for detecting current year contamination - if tracks were added
    this year and library year is current year, it's likely legitimate.

    Args:
        tracks: List of tracks to analyze

    Returns:
        Earliest year a track was added, or None if no dates found

    """
    earliest: int | None = None
    for track in tracks:
        date_added = track.get("date_added")
        if not date_added:
            continue
        try:
            # Parse date_added format: "2025-10-01 00:19:04"
            year = int(str(date_added)[:4])
            if earliest is None or year < earliest:
                earliest = year
        except (ValueError, TypeError, IndexError):
            continue
    return earliest