Skip to content

pending_verification

Pending Verification Module.

This module maintains a list of albums that need re-verification in the future. When an album's year cannot be definitely determined from external sources, it is added to this list with a timestamp. On future runs, albums whose verification period has elapsed will be checked again.

File operations (_load_pending_albums, _save_pending_albums) are asynchronous using asyncio's run_in_executor to avoid blocking the event loop.

Refactored: Initial asynchronous loading handled in a separate async initialize method, called by DependencyContainer after service instantiation.

Usage

service = PendingVerificationService(config, console_logger, error_logger) await service.initialize() # IMPORTANT: Call this after creating the instance

Mark album for future verification (now an async method)

await service.mark_for_verification("Pink Floyd", "The Dark Side of the Moon")

Check if album needs verification now (now an async method)

if await service.is_verification_needed("Pink Floyd", "The Dark Side of the Moon"): # Perform verification pass

Get all pending albums (now an async method)

pending_list = await service.get_all_pending_albums()

Get verified album keys (now an async method)

verified_keys = await service.get_verified_album_keys()

PendingVerificationService

PendingVerificationService(
    config, console_logger, error_logger
)

Service to track albums needing future verification of their release year.

Uses hash-based keys for album data. File operations are asynchronous. Initializes asynchronously.

Initialize the PendingVerificationService.

Does NOT perform file loading here. Use the async initialize method.

Parameters:

Name Type Description Default
config AppConfig

Typed application configuration

required
console_logger Logger

Logger for console output

required
error_logger Logger

Logger for error logging.

required
Source code in src/services/pending_verification.py
def __init__(
    self,
    config: AppConfig,
    console_logger: logging.Logger,
    error_logger: logging.Logger,
) -> None:
    """Initialize the PendingVerificationService.

    Does NOT perform file loading here. Use the async initialize method.

    Args:
        config: Typed application configuration
        console_logger: Logger for console output
        error_logger: Logger for error logging.

    """
    self.config = config
    self.console_logger = console_logger
    self.error_logger = error_logger

    # Get verification interval from config or use default (30 days)
    processing_config = config.year_retrieval.processing
    self.verification_interval_days = processing_config.pending_verification_interval_days
    self.prerelease_recheck_days = self._normalize_recheck_days(processing_config.prerelease_recheck_days) or self.verification_interval_days

    # Set up the pending file path using the utility function
    self.pending_file_path = get_full_log_path(
        config,
        "pending_verification_file",
        "csv/pending_year_verification.csv",
    )
    # In-memory cache: key -> PendingAlbumEntry
    self.pending_albums: dict[str, PendingAlbumEntry] = {}

    # asyncio.Lock for thread-safe access to pending_albums cache
    self._lock = asyncio.Lock()

    # Error callback for blocking operations (bridges sync code to logger)
    self._error_callback: ErrorCallback = self.error_logger.warning

initialize async

initialize()

Asynchronously initializes the PendingVerificationService by loading data from the disk.

This method must be called after instantiation.

Source code in src/services/pending_verification.py
async def initialize(self) -> None:
    """Asynchronously initializes the PendingVerificationService by loading data from the disk.

    This method must be called after instantiation.
    """
    self.console_logger.info(
        "Initializing PendingVerificationService asynchronously...",
    )
    await self._load_pending_albums()

    # Normalize keys to ensure compatibility with cleaned album names
    await self._normalize_pending_album_keys()

    self.console_logger.info(
        "PendingVerificationService asynchronous initialization complete.",
    )

generate_album_key

generate_album_key(artist, album)

Public method to generate a unique hash key for an album.

Source code in src/services/pending_verification.py
def generate_album_key(self, artist: str, album: str) -> str:
    """Public method to generate a unique hash key for an album."""
    return self._generate_album_key(artist, album)

mark_for_verification async

mark_for_verification(
    artist,
    album,
    reason=NO_YEAR_FOUND,
    metadata=None,
    recheck_days=None,
)

Mark an album for future verification with reason and optional metadata.

Uses a hash key for storage. Saves asynchronously. If the album is already pending, increments the attempt counter.

Parameters:

Name Type Description Default
artist str

Artist name

required
album str

Album name

required
reason VerificationReason | str

Reason for verification (default: NO_YEAR_FOUND, can be PRERELEASE, etc.)

NO_YEAR_FOUND
metadata dict[str, Any] | None

Optional metadata dictionary to store additional information

None
recheck_days int | None

Optional override for verification interval in days

None
Source code in src/services/pending_verification.py
async def mark_for_verification(
    self,
    artist: str,
    album: str,
    reason: VerificationReason | str = VerificationReason.NO_YEAR_FOUND,
    metadata: dict[str, Any] | None = None,
    recheck_days: int | None = None,
) -> None:
    """Mark an album for future verification with reason and optional metadata.

    Uses a hash key for storage. Saves asynchronously.
    If the album is already pending, increments the attempt counter.

    Args:
        artist: Artist name
        album: Album name
        reason: Reason for verification (default: NO_YEAR_FOUND, can be PRERELEASE, etc.)
        metadata: Optional metadata dictionary to store additional information
        recheck_days: Optional override for verification interval in days

    """
    # Normalize reason to enum
    reason_enum = VerificationReason.from_string(reason) if isinstance(reason, str) else reason

    interval_override = self._normalize_recheck_days(recheck_days)
    if interval_override is None and reason_enum == VerificationReason.PRERELEASE:
        interval_override = self.prerelease_recheck_days

    # Acquire lock before modifying the in-memory cache
    async with self._lock:
        # Generate the hash key for the album
        key_hash = self._generate_album_key(artist, album)

        # Check if entry already exists to get previous attempt count
        existing_entry = self.pending_albums.get(key_hash)
        new_attempt_count = (existing_entry.attempt_count + 1) if existing_entry else 1

        # Serialize metadata dict to JSON string to preserve type information
        metadata_payload: dict[str, Any] = {}
        if metadata:
            metadata_payload |= metadata

        if interval_override is not None:
            metadata_payload["recheck_days"] = interval_override

        metadata_str = json.dumps(metadata_payload) if metadata_payload else ""

        # Store the entry using PendingAlbumEntry with updated attempt count
        self.pending_albums[key_hash] = PendingAlbumEntry(
            timestamp=datetime.now(UTC),
            artist=artist.strip(),
            album=album.strip(),
            reason=reason_enum,
            metadata=metadata_str,
            attempt_count=new_attempt_count,
        )

    # Log with the appropriate message based on reason
    effective_interval = interval_override if interval_override is not None else self.verification_interval_days

    if reason_enum == VerificationReason.PRERELEASE:
        self.console_logger.info(
            "Marked prerelease album '%s - %s' for future verification in %d days (attempt #%d)",
            artist,
            album,
            effective_interval,
            new_attempt_count,
        )
    else:
        self.console_logger.info(
            "Marked '%s - %s' for verification in %d days (reason: %s, attempt #%d)",
            artist,
            album,
            effective_interval,
            reason_enum.value,
            new_attempt_count,
        )

    # Save asynchronously after modifying the cache
    await self._save_pending_albums()

get_entry async

get_entry(artist, album)

Get pending entry for artist/album if exists.

Parameters:

Name Type Description Default
artist str

Artist name

required
album str

Album name

required

Returns:

Type Description
PendingAlbumEntry | None

PendingAlbumEntry if found, None otherwise.

Source code in src/services/pending_verification.py
async def get_entry(self, artist: str, album: str) -> PendingAlbumEntry | None:
    """Get pending entry for artist/album if exists.

    Args:
        artist: Artist name
        album: Album name

    Returns:
        PendingAlbumEntry if found, None otherwise.

    """
    async with self._lock:
        album_key = self._generate_album_key(artist, album)
        return self.pending_albums.get(album_key)

get_attempt_count async

get_attempt_count(artist, album)

Get current verification attempt count for an album.

Parameters:

Name Type Description Default
artist str

Artist name

required
album str

Album name

required

Returns:

Type Description
int

Number of verification attempts made (0 if not in pending list).

Source code in src/services/pending_verification.py
async def get_attempt_count(self, artist: str, album: str) -> int:
    """Get current verification attempt count for an album.

    Args:
        artist: Artist name
        album: Album name

    Returns:
        Number of verification attempts made (0 if not in pending list).

    """
    async with self._lock:
        album_key = self._generate_album_key(artist, album)
        entry = self.pending_albums.get(album_key)
        return entry.attempt_count if entry else 0

is_verification_needed async

is_verification_needed(artist, album)

Check if an album needs verification now.

Uses the hash key for lookup. Reads from an in-memory cache (async with lock).

Parameters:

Name Type Description Default
artist str

Artist name

required
album str

Album name

required

Returns:

Type Description
bool

True if the verification period has elapsed, False otherwise

Source code in src/services/pending_verification.py
async def is_verification_needed(self, artist: str, album: str) -> bool:
    """Check if an album needs verification now.

    Uses the hash key for lookup. Reads from an in-memory cache (async with lock).

    Args:
        artist: Artist name
        album: Album name

    Returns:
        True if the verification period has elapsed, False otherwise

    """
    # Acquire lock before reading from the in-memory cache
    async with self._lock:
        # Generate the hash key for the album
        key_hash = self._generate_album_key(artist, album)

        # Check if the hash key exists in the in-memory cache
        if key_hash not in self.pending_albums:
            return False

        # Get the entry
        entry = self.pending_albums[key_hash]
        metadata = self._parse_metadata(entry.metadata)
        interval_days = self.verification_interval_days

        if entry.reason == VerificationReason.PRERELEASE:
            override = self._normalize_recheck_days(metadata.get("recheck_days"))
            interval_days = override if override is not None else self.prerelease_recheck_days

        verification_time = entry.timestamp + timedelta(days=interval_days)

        if datetime.now(UTC) >= verification_time:
            # Verification period has elapsed
            self.console_logger.info(
                "Verification period elapsed for '%s - %s'",
                entry.artist,
                entry.album,
            )
            return True

        return False

remove_from_pending async

remove_from_pending(artist, album)

Remove an album from the pending verification list.

Uses the hash key for removal. Saves asynchronously.

Parameters:

Name Type Description Default
artist str

Artist name

required
album str

Album name

required
Source code in src/services/pending_verification.py
async def remove_from_pending(self, artist: str, album: str) -> None:
    """Remove an album from the pending verification list.

    Uses the hash key for removal. Saves asynchronously.

    Args:
        artist: Artist name
        album: Album name

    """
    # Acquire lock before modifying the in-memory cache
    async with self._lock:
        # Generate the hash key for the album
        key_hash = self._generate_album_key(artist, album)

        # Remove from the in-memory cache if the hash key exists
        if key_hash in self.pending_albums:
            # Retrieve the original artist / album from the stored entry for logging
            entry = self.pending_albums[key_hash]
            del self.pending_albums[key_hash]
            self.console_logger.info(
                "Removed '%s - %s' from pending verification",
                entry.artist,
                entry.album,
            )
        # No need to save if the item wasn't found
        else:
            self.console_logger.debug(
                "Attempted to remove '%s - %s' from pending verification, but it was not found.",
                artist,
                album,
            )
            return  # Exit without saving if no removal occurred

    # Save asynchronously after modifying the cache
    await self._save_pending_albums()

get_all_pending_albums async

get_all_pending_albums()

Get a list of all pending albums with their verification data.

Retrieves all PendingAlbumEntry objects from the in-memory cache. Accesses the in-memory cache asynchronously with a lock.

Returns:

Type Description
list[PendingAlbumEntry]

List of PendingAlbumEntry objects

Source code in src/services/pending_verification.py
async def get_all_pending_albums(self) -> list[PendingAlbumEntry]:
    """Get a list of all pending albums with their verification data.

    Retrieves all PendingAlbumEntry objects from the in-memory cache.
    Accesses the in-memory cache asynchronously with a lock.

    Returns:
        List of PendingAlbumEntry objects

    """
    # Acquire lock before accessing the in-memory cache
    async with self._lock:
        # Return all values from the cache
        return list(self.pending_albums.values())

get_pending_albums_by_reason async

get_pending_albums_by_reason(reason)

Get pending albums filtered by reason.

Parameters:

Name Type Description Default
reason VerificationReason | str

The reason to filter by (e.g., PRERELEASE, NO_YEAR_FOUND)

required

Returns:

Type Description
list[PendingAlbumEntry]

List of PendingAlbumEntry objects matching the reason

Source code in src/services/pending_verification.py
async def get_pending_albums_by_reason(
    self,
    reason: VerificationReason | str,
) -> list[PendingAlbumEntry]:
    """Get pending albums filtered by reason.

    Args:
        reason: The reason to filter by (e.g., PRERELEASE, NO_YEAR_FOUND)

    Returns:
        List of PendingAlbumEntry objects matching the reason

    """
    # Normalize reason to enum
    reason_enum = VerificationReason.from_string(reason) if isinstance(reason, str) else reason

    async with self._lock:
        return [entry for entry in self.pending_albums.values() if entry.reason == reason_enum]

get_verified_album_keys async

get_verified_album_keys()

Get the set of album hash keys that need verification now.

Checks the timestamp in the stored tuple. Accesses the in-memory cache asynchronously with a lock.

Returns:

Type Description
set[str]

Set of album hash keys needing verification

Source code in src/services/pending_verification.py
async def get_verified_album_keys(self) -> set[str]:
    """Get the set of album hash keys that need verification now.

    Checks the timestamp in the stored tuple.
    Accesses the in-memory cache asynchronously with a lock.

    Returns:
        Set of album hash keys needing verification

    """
    now = datetime.now(UTC)
    verified_keys: set[str] = set()

    # Acquire lock before accessing the in-memory cache
    async with self._lock:
        # Iterate through items (key_hash, value_tuple) in the in-memory cache
        # Iterate over entries
        for key_hash, entry in self.pending_albums.items():
            verification_time = entry.timestamp + timedelta(
                days=self.verification_interval_days,
            )
            if now >= verification_time:
                self.console_logger.info(
                    "Album '%s - %s' needs verification",
                    entry.artist,
                    entry.album,
                )
                verified_keys.add(key_hash)
            # self.console_logger.debug(

    return verified_keys

generate_problematic_albums_report async

generate_problematic_albums_report(
    min_attempts=3, report_path=None
)

Generate a report of albums that failed to get year after multiple attempts.

Parameters:

Name Type Description Default
min_attempts int

Minimum number of verification attempts to include in the report

3
report_path str | None

Path to save the report (uses config default if None)

None

Returns:

Type Description
int

Number of problematic albums found

Source code in src/services/pending_verification.py
async def generate_problematic_albums_report(
    self,
    min_attempts: int = 3,
    report_path: str | None = None,
) -> int:
    """Generate a report of albums that failed to get year after multiple attempts.

    Args:
        min_attempts: Minimum number of verification attempts to include in the report
        report_path: Path to save the report (uses config default if None)

    Returns:
        Number of problematic albums found

    """
    if report_path is None:
        report_path = get_full_log_path(
            self.config,
            "reporting",
            self.config.reporting.problematic_albums_path,
        )

    # Track attempts per album
    album_attempts: dict[str, list[datetime]] = {}

    async with self._lock:
        current_time = datetime.now(UTC)

        for key, entry in self.pending_albums.items():
            # Calculate how many verification periods have passed
            time_diff = current_time - entry.timestamp
            periods_passed = int(time_diff.total_seconds() / (self.verification_interval_days * 86400))

            if periods_passed >= min_attempts - 1:
                if key not in album_attempts:
                    album_attempts[key] = []

                # Reconstruct verification dates
                for i in range(periods_passed + 1):
                    attempt_time = entry.timestamp + timedelta(seconds=i * self.verification_interval_days * 86400)
                    album_attempts[key].append(attempt_time)

    # Generate report
    try:
        Path(report_path).parent.mkdir(parents=True, exist_ok=True)

        # Use run_in_executor for async file operation
        loop = asyncio.get_running_loop()

        def _write_report() -> None:
            with Path(report_path).open("w", newline="", encoding="utf-8") as f:
                writer = csv.writer(f)
                writer.writerow(
                    [
                        "Artist",
                        "Album",
                        "First Attempt",
                        "Last Attempt",
                        "Total Attempts",
                        "Days Since First Attempt",
                        "Status",
                    ]
                )

                for album_key, attempts in sorted(
                    album_attempts.items(),
                    key=lambda x: len(x[1]),
                    reverse=True,
                ):
                    album_entry = self.pending_albums[album_key]
                    first_attempt = min(attempts)
                    last_attempt = max(attempts)
                    days_since_first = (datetime.now(UTC) - first_attempt).days

                    writer.writerow(
                        [
                            album_entry.artist,
                            album_entry.album,
                            first_attempt.strftime("%Y-%m-%d"),
                            last_attempt.strftime("%Y-%m-%d"),
                            len(attempts),
                            days_since_first,
                            "Pending verification",
                        ]
                    )

        await loop.run_in_executor(None, _write_report)

        self.console_logger.info(
            "Generated problematic albums report: %s (%d albums)",
            report_path,
            len(album_attempts),
        )

        return len(album_attempts)

    except (OSError, csv.Error) as e:
        self.error_logger.exception(
            "Failed to generate problematic albums report: %s",
            e,
        )
        return 0

should_auto_verify async

should_auto_verify()

Check if automatic pending verification should run.

Returns True if: - auto_verify_days has passed since last verification - No previous verification exists - auto_verify_days > 0 (feature enabled)

Returns:

Type Description
bool

True if auto-verify should run, False otherwise

Source code in src/services/pending_verification.py
async def should_auto_verify(self) -> bool:
    """Check if automatic pending verification should run.

    Returns True if:
    - auto_verify_days has passed since last verification
    - No previous verification exists
    - auto_verify_days > 0 (feature enabled)

    Returns:
        True if auto-verify should run, False otherwise

    """
    auto_verify_days = self.config.pending_verification.auto_verify_days

    if auto_verify_days <= 0:
        return False

    pending_path = Path(self.pending_file_path)
    last_verify_path = pending_path.with_name(pending_path.stem + PENDING_LAST_VERIFY_SUFFIX)

    if not last_verify_path.exists():
        self.console_logger.debug("No previous pending verification found, auto-verify needed")
        return True

    try:
        loop = asyncio.get_running_loop()

        def _read_last_verify() -> str:
            with last_verify_path.open(encoding="utf-8") as f:
                return f.read().strip()

        last_verify_str = await loop.run_in_executor(None, _read_last_verify)
        last_verify = datetime.fromisoformat(last_verify_str)

        if last_verify.tzinfo is None:
            last_verify = last_verify.replace(tzinfo=UTC)

        days_since = (datetime.now(tz=UTC) - last_verify).days

        if days_since >= auto_verify_days:
            self.console_logger.info(
                "%s needed: %s days since last check %s",
                LogFormat.label("AUTO-VERIFY-PENDING"),
                LogFormat.number(days_since),
                LogFormat.dim(f"(threshold: {auto_verify_days})"),
            )
            return True

        self.console_logger.debug(
            "Auto-verify pending not needed: %d days since last check (threshold: %d)",
            days_since,
            auto_verify_days,
        )
        return False

    except (OSError, ValueError, RuntimeError) as e:
        self.error_logger.warning(
            "Error checking auto-verify pending status for %s; auto-verify fallback due to error: %s",
            last_verify_path,
            e,
        )
        return True  # Run verification if we can't determine last run

update_verification_timestamp async

update_verification_timestamp()

Update the last pending verification timestamp file.

Source code in src/services/pending_verification.py
async def update_verification_timestamp(self) -> None:
    """Update the last pending verification timestamp file."""
    pending_path = Path(self.pending_file_path)
    last_verify_path = pending_path.with_name(pending_path.stem + PENDING_LAST_VERIFY_SUFFIX)

    try:
        loop = asyncio.get_running_loop()

        def _write_last_verify() -> None:
            with last_verify_path.open("w", encoding="utf-8") as f:
                f.write(datetime.now(tz=UTC).isoformat())

        await loop.run_in_executor(None, _write_last_verify)
    except (OSError, ValueError, RuntimeError) as e:
        self.error_logger.warning(
            "Error updating last pending verification date at %s: %s",
            last_verify_path,
            e,
        )