commit 9ab125b1a66bbfc597cb81371497c7aa8d5eb12e Author: heboba Date: Thu Feb 26 19:33:05 2026 +0000 A kind of initial commit diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..b05197c --- /dev/null +++ b/.dockerignore @@ -0,0 +1,9 @@ +__pycache__/ +.pytest_cache/ +.venv/ +*.pyc +*.pyo +*.pyd +*.db +data/ +.env diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5790ef6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,50 @@ +# Python bytecode / caches +__pycache__/ +*.py[cod] +*.pyo +*.pyd + +# Test / tooling caches +.pytest_cache/ +.mypy_cache/ +.ruff_cache/ +.pyre/ +.hypothesis/ + +# Coverage +.coverage +.coverage.* +htmlcov/ + +# Build artifacts +build/ +dist/ +*.egg-info/ +.eggs/ + +# Virtual environments +.venv/ +venv/ +env/ +ENV/ + +# Local environment / secrets +.env +.env.* +!.env.example + +# Local runtime data +data/ + +# Logs +*.log + +# Editors / IDE +.idea/ +.vscode/ +*.swp +*.swo + +# OS files +.DS_Store +Thumbs.db diff --git a/DESIGN.md b/DESIGN.md new file mode 100644 index 0000000..227b0f3 --- /dev/null +++ b/DESIGN.md @@ -0,0 +1,363 @@ +# Design / Architecture + +## Цель + +Сервис генерирует Spotify playlist "daily vibe" на основе: + +- recent listening пользователя +- кэша liked tracks +- истории уже выданных рекомендаций + +Управление идет через Telegram-бота (`/generate`, `/connect`, `/status` и т.д.), а также опционально через nightly cron trigger. + +## Высокоуровневая схема + +Компоненты: + +- `FastAPI` приложение + - healthcheck + - Spotify OAuth start/callback + - internal endpoint для cron (`/internal/jobs/nightly`) +- `TelegramBotRunner` (polling) + - принимает команды пользователей + - запускает генерацию и отправляет статусы +- `PlaylistJobService` + - orchestration одного run (token -> sync likes -> candidates -> playlist -> persist) +- `RecommendationEngine` + - строит seed profile + - собирает candidate pool + - ранжирует и отбирает треки +- `SpotifyClient` / `LastFmClient` + - внешние API вызовы +- `SQLite` (через SQLAlchemy async) + - пользователи, кэш лайков, история рекомендаций, run log + +## Runtime / Lifecycle + +Точка входа: `app/main.py`. + +На startup: + +1. Загружается `Settings` (`app/config.py`) +2. Создается async SQLAlchemy engine и session factory (`app/db/session.py`) +3. Выполняется `create_all` (автосоздание таблиц) +4. Создается общий `httpx.AsyncClient` +5. Создаются клиенты: + - `SpotifyClient` + - `LastFmClient` +6. Создаются сервисы: + - `SpotifyAuthService` + - `RecommendationEngine` + - `PlaylistJobService` +7. Инициализируется `TelegramBotRunner` и запускается polling +8. Все объекты складываются в `app.state.runtime` и `app.state.services` + +На shutdown: + +- останавливается Telegram polling +- закрывается `httpx.AsyncClient` +- закрывается DB engine + +## Контейнеры / Deployment + +`docker-compose.yml`: + +- `app` (основной сервис, FastAPI + Telegram polling) +- `cron` (опциональный сервис с `supercronic`) + +Важно: + +- `cron` помечен `profiles: ["cron"]` и по умолчанию не стартует +- manual-first режим: пользователь генерирует плейлисты через Telegram `/generate` + +`cron` выполняет `scripts/run_nightly.sh`, который вызывает: + +- `POST /internal/jobs/nightly` с `Authorization: Bearer ` + +## Слои приложения + +### 1. API Layer (`app/api/routes.py`) + +Назначение: + +- HTTP endpoints для OAuth и internal jobs + +Endpoints: + +- `GET /health` +- `GET /auth/spotify/start` +- `GET /auth/spotify/callback` +- `POST /internal/jobs/nightly` + +Особенности: + +- OAuth callback после успеха отправляет сообщение в Telegram пользователю +- internal nightly endpoint защищен `INTERNAL_JOB_TOKEN` + +### 2. Bot Layer (`app/bot/telegram_bot.py`) + +Назначение: + +- пользовательский интерфейс через Telegram команды + +Поддерживаемые команды: + +- `/start` +- `/help` +- `/connect` +- `/status` +- `/generate` +- `/latest` +- `/setsize` +- `/setratio` +- `/sync` + +Особенности: + +- `/generate` запускает `PlaylistJobService.generate_for_user(..., force=True, notify=False)` +- `/sync` только обновляет кэш лайков +- у каждой команды свой короткий DB session через `session_factory` + +### 3. Service Layer + +#### `SpotifyAuthService` (`app/services/spotify_auth.py`) + +Роли: + +- создание OAuth state +- обмен `code` на токены +- refresh access token +- защита от истекшего access token + +Особенности: + +- сравнение дат нормализуется в UTC (важно для SQLite naive datetime) +- сохраняет scopes и expiry в таблице `users` + +#### `RecommendationEngine` (`app/services/recommendation.py`) + +Роли: + +- sync liked tracks в локальный кэш +- build seed profile +- collect candidates из нескольких источников +- rank/select итоговый список + +Текущие источники кандидатов: + +- Spotify recommendations +- Spotify artist top tracks +- Spotify search (seed artist fallback) +- Last.fm track similar -> Spotify search +- Last.fm artist similar -> Spotify search + +Ключевые особенности: + +- соблюдение лимита Spotify recommendations: максимум `5` seed'ов на запрос +- мягкая деградация при частичных ошибках источников +- liked fallback (если весь пул оказался уже в лайках) + +#### `PlaylistJobService` (`app/services/playlist_job.py`) + +Роли: + +- orchestration полного run +- создание Spotify playlist и добавление треков +- запись run и треков в БД +- обновление recommendation history +- отправка уведомления в Telegram (если задан notifier) + +Порядок выполнения run: + +1. Проверка пользователя / Spotify connection +2. Создание записи `playlist_runs` со статусом `running` +3. Получение valid access token +4. Sync liked tracks +5. Сборка плейлиста через `RecommendationEngine` +6. Создание playlist в Spotify +7. Добавление треков в playlist +8. Сохранение run-трека/истории/метаданных +9. Commit и возврат `JobOutcome` + +При ошибке: + +- `playlist_runs.status = failed` +- в `notes` записывается сообщение ошибки + +## Client Layer + +### `SpotifyClient` (`app/clients/spotify.py`) + +Инкапсулирует Spotify Web API. + +Что важно в текущей реализации: + +- `create_playlist()` использует `POST /me/playlists` + - это выбранный маршрут, потому что `POST /users/{id}/playlists` может давать `403` в некоторых аккаунтах/приложениях +- `add_playlist_items()` использует `POST /playlists/{playlist_id}/items` + - маршрут `/tracks` в практике может отдавать `403`, хотя `/items` работает +- `delete_playlist()` вызывает `DELETE /playlists/{playlist_id}/followers` + - это "unfollow" (Spotify не поддерживает hard delete playlist) +- встроены retry на `429` (rate-limit) с `Retry-After` + +### `LastFmClient` (`app/clients/lastfm.py`) + +Используется как optional enrichment layer. + +- может быть отключен (если `LASTFM_API_KEY` пустой) +- ошибки Last.fm не должны ломать весь run, если другие источники работают + +## Persistence Layer (SQLite + SQLAlchemy) + +### Таблицы (`app/db/models.py`) + +#### `users` + +Хранит: + +- Telegram identity (`telegram_chat_id`, `telegram_username`) +- Spotify identity/tokens/scopes (`spotify_user_id`, access/refresh token, expiry, scopes) +- пользовательские настройки (`playlist_size`, `min_new_ratio`, timezone) +- последние результаты (`last_generated_date`, `latest_playlist_id`, `latest_playlist_url`) + +#### `auth_states` + +Временные OAuth state для callback: + +- `state` +- `telegram_chat_id` +- `expires_at` + +#### `saved_tracks` + +Локальный кэш `Liked Songs` пользователя: + +- `spotify_track_id` +- название/артисты/album/popularity +- `added_at` + +#### `recommendation_history` + +История ранее рекомендованных треков: + +- `spotify_track_id` +- `first_recommended_at` +- `last_recommended_at` +- `times_recommended` + +#### `playlist_runs` + +Run log генерации: + +- статус (`running/success/failed`) +- метаданные Spotify playlist +- статистика (`total/new/reused`) +- `notes` + +#### `playlist_run_tracks` + +Снимок состава конкретного run: + +- track id / name / artists +- source (из какого источника пришел) +- позиция +- `is_new_to_bot` + +### Repository Layer (`app/db/repositories.py`) + +Паттерн: + +- thin repositories над SQLAlchemy AsyncSession +- изолируют CRUD/query-логику от service layer + +Примеры: + +- `UserRepository` +- `AuthStateRepository` +- `SavedTrackRepository` +- `RecommendationHistoryRepository` +- `PlaylistRunRepository` + +## Потоки данных + +### OAuth Flow + +1. Telegram `/connect` +2. `SpotifyAuthService.create_connect_url()` +3. Пользователь идет в Spotify auth page +4. `GET /auth/spotify/callback` +5. `SpotifyAuthService.handle_callback()` +6. Токены и Spotify profile сохраняются в `users` +7. Пользователю отправляется сообщение в Telegram + +### Manual Generate Flow (`/generate`) + +1. Telegram `/generate` +2. `PlaylistJobService.generate_for_user(..., force=True)` +3. Sync likes + recent listening + candidate collection +4. Playlist create + add items в Spotify +5. Persist run/history +6. Ответ пользователю в Telegram + +### Nightly Cron Flow (optional) + +1. `supercronic` в `cron` контейнере +2. `scripts/run_nightly.sh` +3. `POST /internal/jobs/nightly` +4. `PlaylistJobService.generate_for_all_connected_users()` + +## Concurrency / Consistency + +- Генерация защищена одним `asyncio.Lock` (`generate_lock`) в `PlaylistJobService` + - предотвращает одновременные run'ы и гонки обновления history +- Большинство операций run выполняются в одной DB session +- Ошибки внутри run переводят запись run в `failed` + +## Алгоритм рекомендаций (кратко) + +Подробно см. `README.md`, но архитектурно pipeline такой: + +1. Seed profile (recent + liked) +2. Candidate pool (Spotify + Last.fm + fallback search) +3. Dedupe +4. Rank (score penalties/boosts) +5. Select (min_new_ratio + artist caps) +6. Persist stats/history + +## Конфигурация + +Основные env-переменные (`app/config.py`): + +- `TELEGRAM_BOT_TOKEN` +- `SPOTIFY_CLIENT_ID` +- `SPOTIFY_CLIENT_SECRET` +- `SPOTIFY_REDIRECT_URI` +- `SPOTIFY_DEFAULT_MARKET` +- `LASTFM_API_KEY` (optional) +- `INTERNAL_JOB_TOKEN` +- `DB_PATH` +- `DEFAULT_PLAYLIST_SIZE` +- `MIN_NEW_RATIO` +- `RECENT_DAYS_WINDOW` +- `PLAYLIST_VISIBILITY` + +## Диагностика / Наблюдаемость + +Сейчас: + +- основной feedback идет через Telegram сообщения и `playlist_runs.notes` +- HTTP `/health` для liveness +- тесты покрывают критичные Spotify routes и части recommendation pipeline + +Что можно улучшить: + +- структурированные логи по source coverage (сколько кандидатов из каждого источника) +- метрики latency/ошибок Spotify/Last.fm +- отдельный debug endpoint для dry-run (без создания playlist) + +## Известные ограничения + +- SQLite подходит для small-scale / single-node сценария +- Telegram polling + FastAPI живут в одном процессе/контейнере +- per-user timezone используется ограниченно (cron общий) +- внешние API ограничения (Spotify/Last.fm) могут различаться между приложениями/аккаунтами diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..7babbbf --- /dev/null +++ b/Dockerfile @@ -0,0 +1,28 @@ +FROM python:3.12-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /app + +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl \ + ca-certificates \ + tzdata \ + wget \ + && rm -rf /var/lib/apt/lists/* + +ARG SUPERCRONIC_VERSION=v0.2.31 +RUN wget -O /usr/local/bin/supercronic "https://github.com/aptible/supercronic/releases/download/${SUPERCRONIC_VERSION}/supercronic-linux-amd64" \ + && chmod +x /usr/local/bin/supercronic + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +RUN chmod +x /app/scripts/start_app.sh /app/scripts/start_cron.sh /app/scripts/run_nightly.sh + +EXPOSE 8000 + +CMD ["/app/scripts/start_app.sh"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..2ba957d --- /dev/null +++ b/README.md @@ -0,0 +1,302 @@ +# Spotify Daily Vibe Bot (Telegram + Spotify + Docker) + +Готовый backend-сервис, который: + +- привязывается к вашему Spotify-аккаунту +- читает лайкнутые треки (`Liked Songs`) +- учитывает недавние прослушивания (последние дни) +- генерирует Spotify-плейлист с похожим вайбом по команде `/generate` +- опционально может запускаться по расписанию через `cron` +- минимизирует повторы и старается держать `>=80%` новых треков (не лайкнутых и не рекомендованных ранее ботом) +- управляется через Telegram +- запускается в Docker (`app`, опционально `cron`) + +## Что внутри + +- `FastAPI` backend (OAuth callback + internal job endpoint) +- `python-telegram-bot` (polling) +- `SQLite` (история рекомендаций, кэш лайкнутых, run log) +- `supercronic` в отдельном контейнере для nightly cron trigger (опционально) + +## Важный момент по Spotify API + +Spotify endpoint `/recommendations` может быть ограничен/недоступен для некоторых приложений. В сервисе реализован fallback: + +- Spotify recommendations (если доступен) +- top tracks по артистам из вашего recent listening / liked library +- Spotify search по seed-артистам (если recommendations/top-tracks недоступны) +- optional Last.fm similarity (очень желательно для лучшего "вайба") + +Для лучшего качества рекомендаций рекомендуется добавить `LASTFM_API_KEY`. + +## Быстрый старт + +1. Создайте Telegram-бота через `@BotFather` и получите токен. +2. Создайте Spotify App: https://developer.spotify.com/dashboard +3. В Spotify App добавьте Redirect URI (должен совпасть 1 в 1), например: + - `https://your-domain.com/auth/spotify/callback` + - или для локальной разработки через tunnel: `https://xxxx.ngrok-free.app/auth/spotify/callback` +4. Скопируйте `.env.example` в `.env` и заполните значения. +5. Запустите: + +```bash +docker compose up -d --build +``` + +По умолчанию это поднимет только `app` (ручной режим через Telegram `/generate`). + +Если захотите включить ночной `cron`, запустите отдельно: + +```bash +docker compose --profile cron up -d cron +``` + +6. Откройте Telegram и напишите боту: + - `/start` + - `/connect` (получите ссылку на Spotify auth) + - после подключения: `/generate` + +## Настройка `.env` + +Минимально обязательные поля: + +- `TELEGRAM_BOT_TOKEN` +- `SPOTIFY_CLIENT_ID` +- `SPOTIFY_CLIENT_SECRET` +- `SPOTIFY_REDIRECT_URI` +- `INTERNAL_JOB_TOKEN` + +Рекомендуемые: + +- `LASTFM_API_KEY` (улучшает похожесть треков) +- `APP_TIMEZONE` / `TZ` +- `SPOTIFY_DEFAULT_MARKET` (двухбуквенный код страны, например `NL`, `DE`, `US`) +- `CRON_SCHEDULE` (например `15 2 * * *`, только если включаете `cron`) + +## Telegram команды + +- `/connect` — привязать Spotify +- `/status` — статус подключения и последний плейлист +- `/generate` — сгенерировать плейлист сейчас +- `/latest` — ссылка на последний плейлист +- `/setsize 30` — размер плейлиста (5..100) +- `/setratio 0.8` — целевая доля новых треков (0.5..1.0) +- `/sync` — принудительно обновить лайкнутые треки + +## Алгоритм подбора рекомендаций + +Ниже описан фактический пайплайн генерации плейлиста (как он сейчас работает в коде). + +### 1. Подготовка входных данных + +Перед генерацией бот: + +- обновляет Spotify access token по refresh token (если нужно) +- синхронизирует лайкнутые треки из `Liked Songs` в локальный кэш (`saved_tracks`) +- загружает recent listening за окно `RECENT_DAYS_WINDOW` (по умолчанию `5` дней) +- загружает историю ранее рекомендованных треков (`recommendation_history`) + +### 2. Построение seed-профиля + +Бот собирает seed'ы из двух источников: recent plays и liked library. + +- Recent plays: + - каждый recent track получает вес с убыванием по позиции (более свежие прослушивания важнее) + - накапливаются веса по трекам и артистам +- Liked tracks: + - берется срез последних лайков (`~120`) + - плюс случайная выборка из более старых лайков (для разнообразия) + - из них также накапливаются веса по артистам + +На выходе seed-профиля формируются: + +- `seed_track_ids` (до ~10 треков) +- `seed_artists` (до ~20 артистов) +- `seed_artist_names` (для Last.fm и Spotify Search fallback) +- `recent_track_meta` (нужно для Last.fm track-similar) + +### 3. Сбор кандидатов (candidate pool) + +Бот собирает общий пул кандидатов из нескольких источников и дедуплицирует их. + +Источники (по порядку): + +1. `Spotify recommendations` + - вызывается батчами + - соблюдается лимит Spotify: максимум `5` seed'ов на запрос (суммарно track + artist) +2. `Spotify artist top tracks` + - по seed-артистам +3. `Spotify search` по seed-артистам (fallback) + - используется, если recommendations / top-tracks ограничены или дали мало результатов +4. `Last.fm track similar` -> `Spotify search` + - для recent seed-треков +5. `Last.fm artist similar` -> `Spotify search` + - для seed-артистов + +Если Spotify/Last.fm отдают ошибки на отдельных вызовах, бот старается деградировать мягко (использовать другие источники), а не валить весь run сразу. + +### 4. Дедупликация кандидатов + +Кандидаты дедуплицируются: + +- по `spotify_track_id` +- по нормализованной сигнатуре `track_name + artist_names` (на случай дублей / разных версий) + +Если один и тот же трек найден из нескольких источников: + +- сохраняется лучший score +- источник объединяется (например, `source1+source2`) + +### 5. Фильтрация и ранжирование + +Базовая логика: + +- сначала исключаются треки, которые уже есть в ваших лайках (`liked_ids`) +- если после этого пул пустой, включается fallback: + - разрешается использовать already-liked треки (с penalty), чтобы не падать с пустым результатом + +Дополнительные коррекции score: + +- penalty за уже рекомендованные раньше ботом (`history_ids`) +- penalty за лайкнутые (если включился liked fallback) +- небольшой boost за коллаборации / нескольких артистов +- небольшой boost за накопленные причины/источники +- popularity scoring слегка тяготеет к mid-popularity (не только мейнстрим и не только deep cuts) + +### 6. Отбор финального списка (selection) + +После ранжирования кандидаты делятся на: + +- `novel` — не были рекомендованы ранее и не в лайках +- `reused` — уже были рекомендованы или (fallback) уже лайкнуты + +Далее бот: + +- сначала пытается набрать минимум по `min_new_ratio` +- соблюдает artist caps (ограничение количества треков одного артиста) +- если новых треков недостаточно, ослабляет ограничения +- затем дозаполняет повторными кандидатами + +Результат: + +- `tracks` — финальный порядок треков +- `new_count` / `reused_count` +- `notes` — пояснение, если не удалось выдержать target по новым трекам + +### 7. Создание плейлиста и запись истории + +После сборки списка бот: + +- создает Spotify playlist +- добавляет треки +- записывает run в `playlist_runs` и `playlist_run_tracks` +- обновляет `recommendation_history` +- сохраняет `latest_playlist_url` у пользователя + +## Как работает анти-повтор + +Бот хранит: + +- все треки, которые уже рекомендовал раньше +- все ваши лайкнутые треки (кэш обновляется) + +При сборке нового плейлиста: + +- сначала исключает лайкнутые треки (если это возможно) +- отдает приоритет трекам, которых не было в рекомендациях ранее +- если новых треков не хватает, дозаполняет повторами из истории +- если кандидаты есть только среди лайкнутых, может использовать liked fallback вместо полного фейла run +- пишет статистику в БД (`new / reused`) + +Если в доступном пуле не хватает новых треков для `80%`, бот сообщит об этом в статусе run. + +## Cron (ночной запуск) + +По умолчанию `cron` отключен (manual-first режим: запускаете `/generate` вручную в Telegram). + +В `docker-compose.yml` сервис `cron` помечен профилем `cron`, поэтому он не стартует при обычном: + +```bash +docker compose up -d --build +``` + +Если хотите включить ночной запуск, поднимите его отдельно: + +```bash +docker compose --profile cron up -d cron +``` + +`cron` по `CRON_SCHEDULE` вызывает внутренний endpoint: + +- `POST /internal/jobs/nightly` + +Измените время через `.env`: + +```env +CRON_SCHEDULE=15 2 * * * +TZ=Europe/Moscow +``` + +Отключить обратно: + +```bash +docker compose stop cron +``` + +## Хранилище данных + +- SQLite БД: `./data/app.db` + +Эта папка примонтирована как volume, поэтому данные переживают перезапуск контейнеров. + +## Проверка работы + +- `GET /health` должен вернуть `{"ok": true}` +- после `/generate` в Telegram появится ссылка на Spotify playlist + +## Типичный деплой + +- VPS + Docker Compose +- `APP_BASE_URL` = публичный URL сервиса +- `SPOTIFY_REDIRECT_URI` = `${APP_BASE_URL}/auth/spotify/callback` +- Telegram работает через polling (webhook не нужен) +- `cron` можно не включать совсем, если генерация только вручную + +## Архитектура + +Подробное описание архитектуры приложения, потоков данных и таблиц БД вынесено в `DESIGN.md`. + +## Feature Plans + +Ниже план ближайших улучшений (roadmap), которые хорошо ложатся на текущую архитектуру. + +- Явный feedback loop: + - команды вроде `/ban`, `/unban`, `/prefer` + - отдельная blacklist-таблица, чтобы "не понравилось" != "просто не лайкнул" +- Настройки anti-repeat: + - жесткий запрет повторов на N дней/недель + - отдельные правила для liked / previously recommended +- Explainability / debug: + - why-this-track (показать источник, score, причины попадания) + - dry-run endpoint/команда без создания плейлиста +- Тонкая настройка алгоритма: + - веса источников (Spotify / Last.fm / search fallback) + - режимы генерации (explore / familiar / mixed) +- Улучшение источников кандидатов: + - дополнительные музыкальные источники / метаданные + - более умная работа с жанрами/артист-кластерами +- Персональный scheduler: + - per-user timezone и per-user cron schedule + - выбор дней недели / времени генерации +- Наблюдаемость: + - структурированные логи по source coverage и причинам фильтрации + - простые метрики по ошибкам Spotify/Last.fm и latency +- Хранилище / масштабирование: + - миграции (Alembic) + - Postgres вместо SQLite для multi-user сценариев + +## Ограничения / улучшения (если захотите дальше) + +- Персонификация по timezone на пользователя (сейчас cron общий, но user-specific generation поддерживается вручную) +- Больше источников похожих треков (например, MusicBrainz/Discogs mapping) +- Выделенный Postgres вместо SQLite для multi-user нагрузки diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..89e05ee --- /dev/null +++ b/app/__init__.py @@ -0,0 +1 @@ +"""Spotify vibe bot application package.""" diff --git a/app/api/__init__.py b/app/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/api/routes.py b/app/api/routes.py new file mode 100644 index 0000000..76688cb --- /dev/null +++ b/app/api/routes.py @@ -0,0 +1,82 @@ +from __future__ import annotations + +from fastapi import APIRouter, Depends, Header, HTTPException, Query, Request +from fastapi.responses import HTMLResponse + +from app.services.app_services import AppServices + + +def get_router() -> APIRouter: + router = APIRouter() + + def runtime(request: Request): + return request.app.state.runtime + + def services(request: Request) -> AppServices: + return request.app.state.services + + @router.get("/health") + async def health(): + return {"ok": True} + + @router.get("/auth/spotify/start") + async def spotify_start( + chat_id: str = Query(...), + username: str | None = Query(default=None), + svc: AppServices = Depends(services), + ): + url = await svc.auth.create_connect_url(chat_id=chat_id, username=username) + return {"url": url} + + @router.get("/auth/spotify/callback", response_class=HTMLResponse) + async def spotify_callback( + request: Request, + code: str | None = None, + state: str | None = None, + error: str | None = None, + svc: AppServices = Depends(services), + ): + try: + if error: + raise ValueError(f"Spotify returned error: {error}") + if not code or not state: + raise ValueError("Missing code/state in callback") + chat_id, display_name = await svc.auth.handle_callback(code=code, state=state) + runner = request.app.state.runtime.telegram_runner + if runner is not None: + await runner.send_message(chat_id, f"Spotify подключен: {display_name}\nТеперь можно /generate") + return """ + +

Spotify connected

+

Return to Telegram and use /generate.

+ + """ + except Exception as exc: + return HTMLResponse( + content=( + "" + f"

Auth failed

{exc}

" + ), + status_code=400, + ) + + @router.post("/internal/jobs/nightly") + async def run_nightly( + request: Request, + authorization: str | None = Header(default=None), + svc: AppServices = Depends(services), + ): + rt = runtime(request) + expected = f"Bearer {rt.settings.internal_job_token}" + if authorization != expected: + raise HTTPException(status_code=401, detail="Unauthorized") + outcomes = await svc.jobs.generate_for_all_connected_users() + return { + "ok": True, + "count": len(outcomes), + "results": [ + {"user_id": o.user_id, "ok": o.ok, "message": o.message, "playlist_url": o.playlist_url} for o in outcomes + ], + } + + return router diff --git a/app/bot/__init__.py b/app/bot/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/bot/telegram_bot.py b/app/bot/telegram_bot.py new file mode 100644 index 0000000..2e93536 --- /dev/null +++ b/app/bot/telegram_bot.py @@ -0,0 +1,217 @@ +from __future__ import annotations + +import contextlib + +from telegram import Update +from telegram.ext import Application, CommandHandler, ContextTypes + +from app.db.repositories import PlaylistRunRepository, SavedTrackRepository, UserRepository +from app.services.app_services import AppServices + + +class TelegramBotRunner: + def __init__(self, token: str, session_factory, services: AppServices, app_base_url: str) -> None: + self.token = token + self.session_factory = session_factory + self.services = services + self.app_base_url = app_base_url.rstrip("/") + self.application = Application.builder().token(token).build() + self._setup_handlers() + self._running = False + + def _setup_handlers(self) -> None: + self.application.add_handler(CommandHandler("start", self.start)) + self.application.add_handler(CommandHandler("help", self.help)) + self.application.add_handler(CommandHandler("connect", self.connect)) + self.application.add_handler(CommandHandler("status", self.status)) + self.application.add_handler(CommandHandler("generate", self.generate)) + self.application.add_handler(CommandHandler("latest", self.latest)) + self.application.add_handler(CommandHandler("setsize", self.set_size)) + self.application.add_handler(CommandHandler("setratio", self.set_ratio)) + self.application.add_handler(CommandHandler("sync", self.sync_likes)) + + async def start_polling(self) -> None: + await self.application.initialize() + await self.application.start() + if self.application.updater is None: + raise RuntimeError("Telegram updater is not available") + await self.application.updater.start_polling(drop_pending_updates=False) + self._running = True + + async def stop(self) -> None: + if not self._running: + return + with contextlib.suppress(Exception): + if self.application.updater: + await self.application.updater.stop() + with contextlib.suppress(Exception): + await self.application.stop() + with contextlib.suppress(Exception): + await self.application.shutdown() + self._running = False + + async def send_message(self, chat_id: str, text: str) -> None: + await self.application.bot.send_message(chat_id=int(chat_id), text=text, disable_web_page_preview=False) + + async def _ensure_user(self, update: Update): + chat = update.effective_chat + user = update.effective_user + if not chat: + return None + async with self.session_factory() as session: + repo = UserRepository(session) + db_user = await repo.get_or_create_by_chat( + chat_id=str(chat.id), + username=user.username if user else None, + ) + await session.commit() + return db_user + + async def start(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: + await self._ensure_user(update) + await update.message.reply_text( + "Я бот для Spotify daily vibe playlist.\n" + "Команды: /connect /status /generate /latest /setsize 30 /setratio 0.8 /sync" + ) + + async def help(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: + await update.message.reply_text( + "/connect - привязать Spotify\n" + "/status - статус аккаунта и последнего плейлиста\n" + "/generate - сгенерировать плейлист сейчас\n" + "/latest - показать ссылку на последний плейлист\n" + "/setsize N - размер плейлиста\n" + "/setratio X - доля новых треков (0.5..1.0)\n" + "/sync - обновить лайкнутые треки из Spotify" + ) + + async def connect(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: + chat = update.effective_chat + user = update.effective_user + if not chat: + return + url = await self.services.auth.create_connect_url(str(chat.id), user.username if user else None) + await update.message.reply_text(f"Открой ссылку и авторизуй Spotify:\n{url}", disable_web_page_preview=True) + + async def status(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: + chat = update.effective_chat + if not chat: + return + async with self.session_factory() as session: + users = UserRepository(session) + runs = PlaylistRunRepository(session) + saved = SavedTrackRepository(session) + db_user = await users.get_by_chat_id(str(chat.id)) + if not db_user: + await update.message.reply_text("Пользователь не найден. Напиши /start") + return + latest = await runs.latest_for_user(db_user.id) + saved_count = await saved.count_for_user(db_user.id) + connected = "yes" if db_user.spotify_refresh_token else "no" + text = ( + f"Connected: {connected}\n" + f"Spotify user: {db_user.spotify_user_id or '-'}\n" + f"Liked tracks cached: {saved_count}\n" + f"Playlist size: {db_user.playlist_size}\n" + f"Min new ratio: {db_user.min_new_ratio:.2f}\n" + f"Last generated: {db_user.last_generated_date or '-'}" + ) + if latest: + text += f"\nLast run: {latest.status}, tracks={latest.total_tracks}" + if latest.playlist_url: + text += f"\n{latest.playlist_url}" + await update.message.reply_text(text, disable_web_page_preview=False) + + async def latest(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: + chat = update.effective_chat + if not chat: + return + async with self.session_factory() as session: + users = UserRepository(session) + db_user = await users.get_by_chat_id(str(chat.id)) + if not db_user or not db_user.latest_playlist_url: + await update.message.reply_text("Пока нет сгенерированного плейлиста.") + return + await update.message.reply_text(db_user.latest_playlist_url, disable_web_page_preview=False) + + async def generate(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: + chat = update.effective_chat + if not chat: + return + async with self.session_factory() as session: + users = UserRepository(session) + db_user = await users.get_by_chat_id(str(chat.id)) + if not db_user: + await update.message.reply_text("Пользователь не найден. Напиши /start") + return + user_id = db_user.id + await update.message.reply_text("Генерирую плейлист, это может занять 20-60 секунд...") + outcome = await self.services.jobs.generate_for_user(user_id=user_id, force=True, notify=False) + if outcome.ok: + msg = outcome.message + if outcome.playlist_url: + msg += f"\n{outcome.playlist_url}" + await update.message.reply_text(msg, disable_web_page_preview=False) + else: + await update.message.reply_text(f"Ошибка: {outcome.message}") + + async def set_size(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: + chat = update.effective_chat + if not chat: + return + if not context.args: + await update.message.reply_text("Использование: /setsize 30") + return + try: + value = int(context.args[0]) + if value < 5 or value > 100: + raise ValueError + except ValueError: + await update.message.reply_text("Размер должен быть числом от 5 до 100.") + return + async with self.session_factory() as session: + users = UserRepository(session) + db_user = await users.get_or_create_by_chat(str(chat.id), update.effective_user.username if update.effective_user else None) + db_user.playlist_size = value + await session.commit() + await update.message.reply_text(f"Размер плейлиста установлен: {value}") + + async def set_ratio(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: + chat = update.effective_chat + if not chat: + return + if not context.args: + await update.message.reply_text("Использование: /setratio 0.8") + return + try: + value = float(context.args[0]) + if value < 0.5 or value > 1.0: + raise ValueError + except ValueError: + await update.message.reply_text("Значение должно быть от 0.5 до 1.0") + return + async with self.session_factory() as session: + users = UserRepository(session) + db_user = await users.get_or_create_by_chat(str(chat.id), update.effective_user.username if update.effective_user else None) + db_user.min_new_ratio = value + await session.commit() + await update.message.reply_text(f"Минимальная доля новых треков: {value:.2f}") + + async def sync_likes(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: + chat = update.effective_chat + if not chat: + return + async with self.session_factory() as session: + users = UserRepository(session) + db_user = await users.get_by_chat_id(str(chat.id)) + if not db_user: + await update.message.reply_text("Пользователь не найден. Напиши /start") + return + if not db_user.spotify_refresh_token: + await update.message.reply_text("Сначала /connect") + return + access_token = await self.services.auth.ensure_valid_access_token(session, db_user) + await self.services.recommendation.sync_saved_tracks(session, db_user, access_token) + await session.commit() + saved_count = await SavedTrackRepository(session).count_for_user(db_user.id) + await update.message.reply_text(f"Лайкнутые треки обновлены: {saved_count}") diff --git a/app/clients/__init__.py b/app/clients/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/clients/lastfm.py b/app/clients/lastfm.py new file mode 100644 index 0000000..9b99f99 --- /dev/null +++ b/app/clients/lastfm.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +from typing import Any + +import httpx + + +class LastFmClient: + BASE_URL = "https://ws.audioscrobbler.com/2.0/" + + def __init__(self, api_key: str | None, http: httpx.AsyncClient) -> None: + self.api_key = api_key + self.http = http + + @property + def enabled(self) -> bool: + return bool(self.api_key) + + async def track_similar(self, *, artist: str, track: str, limit: int = 20) -> list[dict[str, Any]]: + if not self.api_key: + return [] + params = { + "method": "track.getSimilar", + "api_key": self.api_key, + "artist": artist, + "track": track, + "limit": limit, + "format": "json", + "autocorrect": 1, + } + resp = await self.http.get(self.BASE_URL, params=params, timeout=20) + resp.raise_for_status() + data = resp.json() + similar = ((data.get("similartracks") or {}).get("track")) or [] + if isinstance(similar, dict): + similar = [similar] + return similar + + async def artist_similar(self, *, artist: str, limit: int = 15) -> list[dict[str, Any]]: + if not self.api_key: + return [] + params = { + "method": "artist.getSimilar", + "api_key": self.api_key, + "artist": artist, + "limit": limit, + "format": "json", + "autocorrect": 1, + } + resp = await self.http.get(self.BASE_URL, params=params, timeout=20) + resp.raise_for_status() + data = resp.json() + similar = ((data.get("similarartists") or {}).get("artist")) or [] + if isinstance(similar, dict): + similar = [similar] + return similar diff --git a/app/clients/spotify.py b/app/clients/spotify.py new file mode 100644 index 0000000..9606f5c --- /dev/null +++ b/app/clients/spotify.py @@ -0,0 +1,281 @@ +from __future__ import annotations + +import asyncio +import base64 +from collections.abc import Iterable +from datetime import datetime, timedelta, timezone +from typing import Any +from urllib.parse import urlencode + +import httpx + +from app.config import Settings +from app.utils.time import parse_spotify_datetime, to_unix_ms + + +class SpotifyApiError(RuntimeError): + def __init__(self, message: str, status_code: int | None = None, payload: Any | None = None) -> None: + super().__init__(message) + self.status_code = status_code + self.payload = payload + + +class SpotifyClient: + AUTH_BASE = "https://accounts.spotify.com" + API_BASE = "https://api.spotify.com/v1" + + def __init__(self, settings: Settings, http: httpx.AsyncClient) -> None: + self.settings = settings + self.http = http + + def build_authorize_url(self, state: str, scopes: Iterable[str]) -> str: + params = { + "client_id": self.settings.spotify_client_id, + "response_type": "code", + "redirect_uri": self.settings.spotify_redirect_uri, + "scope": " ".join(scopes), + "state": state, + "show_dialog": "false", + } + return f"{self.AUTH_BASE}/authorize?{urlencode(params)}" + + async def exchange_code(self, code: str) -> dict[str, Any]: + data = { + "grant_type": "authorization_code", + "code": code, + "redirect_uri": self.settings.spotify_redirect_uri, + } + return await self._token_request(data) + + async def refresh_access_token(self, refresh_token: str) -> dict[str, Any]: + data = { + "grant_type": "refresh_token", + "refresh_token": refresh_token, + } + return await self._token_request(data) + + async def _token_request(self, data: dict[str, str]) -> dict[str, Any]: + creds = f"{self.settings.spotify_client_id}:{self.settings.spotify_client_secret}".encode() + auth = base64.b64encode(creds).decode() + headers = { + "Authorization": f"Basic {auth}", + "Content-Type": "application/x-www-form-urlencoded", + } + resp = await self.http.post(f"{self.AUTH_BASE}/api/token", data=data, headers=headers, timeout=30) + if resp.status_code >= 400: + raise SpotifyApiError("Spotify token request failed", resp.status_code, resp.text) + return resp.json() + + async def get_current_user(self, access_token: str) -> dict[str, Any]: + return await self._request_json("GET", "/me", access_token=access_token) + + async def get_saved_tracks_all(self, access_token: str) -> list[dict[str, Any]]: + tracks: list[dict[str, Any]] = [] + offset = 0 + limit = 50 + while True: + payload = await self._request_json( + "GET", + "/me/tracks", + access_token=access_token, + params={"limit": limit, "offset": offset}, + ) + items = payload.get("items", []) + for item in items: + t = item.get("track") or {} + track_id = t.get("id") + if not track_id: + continue + artists = t.get("artists") or [] + tracks.append( + { + "id": track_id, + "uri": t.get("uri") or f"spotify:track:{track_id}", + "name": t.get("name") or "Unknown", + "artist_names": [a.get("name") or "Unknown" for a in artists], + "artist_ids": [a.get("id") for a in artists if a.get("id")], + "album_name": (t.get("album") or {}).get("name"), + "added_at": parse_spotify_datetime(item.get("added_at")), + "popularity": t.get("popularity"), + } + ) + if not payload.get("next"): + break + offset += limit + return tracks + + async def get_recently_played( + self, + access_token: str, + *, + since: datetime, + max_pages: int = 10, + ) -> list[dict[str, Any]]: + items: list[dict[str, Any]] = [] + before: int | None = None + pages = 0 + since_ms = to_unix_ms(since) + + while pages < max_pages: + params: dict[str, Any] = {"limit": 50} + if before: + params["before"] = before + payload = await self._request_json( + "GET", "/me/player/recently-played", access_token=access_token, params=params + ) + batch = payload.get("items", []) + if not batch: + break + stop = False + for item in batch: + played_at = parse_spotify_datetime(item.get("played_at")) + t = item.get("track") or {} + track_id = t.get("id") + if not played_at or not track_id: + continue + if to_unix_ms(played_at) < since_ms: + stop = True + continue + artists = t.get("artists") or [] + items.append( + { + "id": track_id, + "uri": t.get("uri") or f"spotify:track:{track_id}", + "name": t.get("name") or "Unknown", + "artist_names": [a.get("name") or "Unknown" for a in artists], + "artist_ids": [a.get("id") for a in artists if a.get("id")], + "played_at": played_at, + "popularity": t.get("popularity"), + } + ) + cursors = payload.get("cursors") or {} + before = cursors.get("before") + pages += 1 + if stop or not before: + break + return items + + async def create_playlist( + self, + access_token: str, + *, + user_id: str, + name: str, + description: str, + public: bool, + ) -> dict[str, Any]: + # Prefer /me/playlists: in some accounts/apps /users/{id}/playlists returns 403 + # even with valid playlist-modify scopes, while /me/playlists succeeds. + return await self._request_json( + "POST", + "/me/playlists", + access_token=access_token, + json={"name": name, "description": description, "public": public}, + ) + + async def delete_playlist(self, access_token: str, playlist_id: str) -> None: + # Spotify has no hard delete for playlists; DELETE here unfollows/removes it from the library. + await self._request_json( + "DELETE", + f"/playlists/{playlist_id}/followers", + access_token=access_token, + ) + + async def add_playlist_items(self, access_token: str, playlist_id: str, uris: list[str]) -> None: + for i in range(0, len(uris), 100): + await self._request_json( + "POST", + f"/playlists/{playlist_id}/items", + access_token=access_token, + json={"uris": uris[i : i + 100]}, + ) + + async def get_recommendations( + self, + access_token: str, + *, + seed_tracks: list[str], + seed_artists: list[str], + limit: int = 100, + market: str | None = None, + ) -> list[dict[str, Any]]: + params: dict[str, Any] = {"limit": limit} + if market: + params["market"] = market + if seed_tracks: + params["seed_tracks"] = ",".join(seed_tracks[:5]) + if seed_artists: + params["seed_artists"] = ",".join(seed_artists[:5]) + if not params.get("seed_tracks") and not params.get("seed_artists"): + return [] + payload = await self._request_json("GET", "/recommendations", access_token=access_token, params=params) + return payload.get("tracks", []) + + async def get_artist_top_tracks(self, access_token: str, artist_id: str, market: str) -> list[dict[str, Any]]: + payload = await self._request_json( + "GET", f"/artists/{artist_id}/top-tracks", access_token=access_token, params={"market": market} + ) + return payload.get("tracks", []) + + async def search_track( + self, + access_token: str, + *, + track_name: str, + artist_name: str | None = None, + market: str | None = None, + ) -> list[dict[str, Any]]: + q_parts: list[str] = [] + if track_name: + q_parts.append(f'track:"{track_name}"') + if artist_name: + q_parts.append(f'artist:"{artist_name}"') + if not q_parts: + return [] + q = " ".join(q_parts) + params: dict[str, Any] = {"q": q, "type": "track", "limit": 5} + if market: + params["market"] = market + payload = await self._request_json("GET", "/search", access_token=access_token, params=params) + return (payload.get("tracks") or {}).get("items", []) + + async def _request_json( + self, + method: str, + path: str, + *, + access_token: str, + params: dict[str, Any] | None = None, + json: dict[str, Any] | None = None, + ) -> dict[str, Any]: + headers = {"Authorization": f"Bearer {access_token}"} + last_error: SpotifyApiError | None = None + for _ in range(3): + resp = await self.http.request( + method, + f"{self.API_BASE}{path}", + headers=headers, + params=params, + json=json, + timeout=30, + ) + if resp.status_code == 429: + retry_after = int(resp.headers.get("Retry-After", "1")) + await asyncio.sleep(max(1, retry_after)) + continue + if resp.status_code >= 400: + last_error = SpotifyApiError( + f"Spotify API request failed: {method} {path}", + resp.status_code, + resp.text, + ) + break + return resp.json() if resp.content else {} + if last_error: + raise last_error + raise SpotifyApiError(f"Spotify API request failed after retries: {method} {path}") + + @staticmethod + def token_expiry_from_response(token_payload: dict[str, Any]) -> datetime: + expires_in = int(token_payload.get("expires_in", 3600)) + return datetime.now(timezone.utc) + timedelta(seconds=max(60, expires_in - 60)) diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000..7bc0722 --- /dev/null +++ b/app/config.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from functools import lru_cache +from pydantic import Field +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class Settings(BaseSettings): + model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore") + + app_env: str = "dev" + app_base_url: str = "http://localhost:8000" + app_timezone: str = "UTC" + app_internal_url: str = "http://app:8000" + app_port: int = 8000 + + telegram_bot_token: str + + spotify_client_id: str + spotify_client_secret: str + spotify_redirect_uri: str + spotify_default_market: str = "US" + + lastfm_api_key: str | None = None + + internal_job_token: str + db_path: str = "/data/app.db" + + default_playlist_size: int = 30 + min_new_ratio: float = Field(default=0.8, ge=0.0, le=1.0) + recent_days_window: int = 5 + playlist_visibility: str = "private" + + @property + def database_url(self) -> str: + return f"sqlite+aiosqlite:///{self.db_path}" + + +@lru_cache +def get_settings() -> Settings: + return Settings() diff --git a/app/db/__init__.py b/app/db/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/db/base.py b/app/db/base.py new file mode 100644 index 0000000..4e5854f --- /dev/null +++ b/app/db/base.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column + + +def utcnow() -> datetime: + return datetime.now(timezone.utc) + + +class Base(DeclarativeBase): + pass + + +class TimestampMixin: + created_at: Mapped[datetime] = mapped_column(default=utcnow) + updated_at: Mapped[datetime] = mapped_column(default=utcnow, onupdate=utcnow) diff --git a/app/db/models.py b/app/db/models.py new file mode 100644 index 0000000..750a794 --- /dev/null +++ b/app/db/models.py @@ -0,0 +1,115 @@ +from __future__ import annotations + +from datetime import date, datetime + +from sqlalchemy import Boolean, Date, DateTime, Float, ForeignKey, Integer, String, Text, UniqueConstraint +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from app.db.base import Base, TimestampMixin + + +class User(Base, TimestampMixin): + __tablename__ = "users" + + id: Mapped[int] = mapped_column(primary_key=True) + telegram_chat_id: Mapped[str] = mapped_column(String(64), unique=True, index=True) + telegram_username: Mapped[str | None] = mapped_column(String(128), nullable=True) + + spotify_user_id: Mapped[str | None] = mapped_column(String(128), unique=True, nullable=True, index=True) + spotify_access_token: Mapped[str | None] = mapped_column(Text, nullable=True) + spotify_refresh_token: Mapped[str | None] = mapped_column(Text, nullable=True) + spotify_token_expires_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) + spotify_scopes: Mapped[str | None] = mapped_column(Text, nullable=True) + + timezone: Mapped[str] = mapped_column(String(64), default="UTC") + is_active: Mapped[bool] = mapped_column(Boolean, default=True) + playlist_size: Mapped[int] = mapped_column(Integer, default=30) + min_new_ratio: Mapped[float] = mapped_column(Float, default=0.8) + last_generated_date: Mapped[date | None] = mapped_column(Date, nullable=True) + latest_playlist_id: Mapped[str | None] = mapped_column(String(128), nullable=True) + latest_playlist_url: Mapped[str | None] = mapped_column(Text, nullable=True) + + saved_tracks: Mapped[list["SavedTrack"]] = relationship(back_populates="user", cascade="all, delete-orphan") + playlist_runs: Mapped[list["PlaylistRun"]] = relationship(back_populates="user", cascade="all, delete-orphan") + rec_history: Mapped[list["RecommendationHistory"]] = relationship( + back_populates="user", cascade="all, delete-orphan" + ) + + +class AuthState(Base): + __tablename__ = "auth_states" + + id: Mapped[int] = mapped_column(primary_key=True) + state: Mapped[str] = mapped_column(String(128), unique=True, index=True) + telegram_chat_id: Mapped[str] = mapped_column(String(64), index=True) + expires_at: Mapped[datetime] = mapped_column(DateTime(timezone=True)) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True)) + + +class SavedTrack(Base): + __tablename__ = "saved_tracks" + __table_args__ = (UniqueConstraint("user_id", "spotify_track_id", name="uq_saved_track_user_track"),) + + id: Mapped[int] = mapped_column(primary_key=True) + user_id: Mapped[int] = mapped_column(ForeignKey("users.id", ondelete="CASCADE"), index=True) + spotify_track_id: Mapped[str] = mapped_column(String(128), index=True) + name: Mapped[str] = mapped_column(Text) + artist_names: Mapped[str] = mapped_column(Text) + artist_ids_csv: Mapped[str] = mapped_column(Text) + album_name: Mapped[str | None] = mapped_column(Text, nullable=True) + added_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) + popularity: Mapped[int | None] = mapped_column(Integer, nullable=True) + + user: Mapped[User] = relationship(back_populates="saved_tracks") + + +class RecommendationHistory(Base): + __tablename__ = "recommendation_history" + __table_args__ = (UniqueConstraint("user_id", "spotify_track_id", name="uq_rec_history_user_track"),) + + id: Mapped[int] = mapped_column(primary_key=True) + user_id: Mapped[int] = mapped_column(ForeignKey("users.id", ondelete="CASCADE"), index=True) + spotify_track_id: Mapped[str] = mapped_column(String(128), index=True) + first_recommended_at: Mapped[datetime] = mapped_column(DateTime(timezone=True)) + last_recommended_at: Mapped[datetime] = mapped_column(DateTime(timezone=True)) + times_recommended: Mapped[int] = mapped_column(Integer, default=1) + + user: Mapped[User] = relationship(back_populates="rec_history") + + +class PlaylistRun(Base, TimestampMixin): + __tablename__ = "playlist_runs" + + id: Mapped[int] = mapped_column(primary_key=True) + user_id: Mapped[int] = mapped_column(ForeignKey("users.id", ondelete="CASCADE"), index=True) + run_date: Mapped[date] = mapped_column(Date) + status: Mapped[str] = mapped_column(String(32), default="pending") + playlist_id: Mapped[str | None] = mapped_column(String(128), nullable=True) + playlist_name: Mapped[str | None] = mapped_column(Text, nullable=True) + playlist_url: Mapped[str | None] = mapped_column(Text, nullable=True) + total_tracks: Mapped[int] = mapped_column(Integer, default=0) + new_tracks: Mapped[int] = mapped_column(Integer, default=0) + reused_tracks: Mapped[int] = mapped_column(Integer, default=0) + notes: Mapped[str | None] = mapped_column(Text, nullable=True) + + user: Mapped[User] = relationship(back_populates="playlist_runs") + tracks: Mapped[list["PlaylistRunTrack"]] = relationship(back_populates="run", cascade="all, delete-orphan") + + +class PlaylistRunTrack(Base): + __tablename__ = "playlist_run_tracks" + __table_args__ = ( + UniqueConstraint("run_id", "spotify_track_id", name="uq_run_track"), + UniqueConstraint("run_id", "position", name="uq_run_position"), + ) + + id: Mapped[int] = mapped_column(primary_key=True) + run_id: Mapped[int] = mapped_column(ForeignKey("playlist_runs.id", ondelete="CASCADE"), index=True) + spotify_track_id: Mapped[str] = mapped_column(String(128), index=True) + name: Mapped[str] = mapped_column(Text) + artist_names: Mapped[str] = mapped_column(Text) + source: Mapped[str] = mapped_column(String(64)) + position: Mapped[int] = mapped_column(Integer) + is_new_to_bot: Mapped[bool] = mapped_column(Boolean, default=True) + + run: Mapped[PlaylistRun] = relationship(back_populates="tracks") diff --git a/app/db/repositories.py b/app/db/repositories.py new file mode 100644 index 0000000..df3c309 --- /dev/null +++ b/app/db/repositories.py @@ -0,0 +1,201 @@ +from __future__ import annotations + +from collections.abc import Iterable +from datetime import date, datetime, timezone + +from sqlalchemy import delete, func, select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db.models import ( + AuthState, + PlaylistRun, + PlaylistRunTrack, + RecommendationHistory, + SavedTrack, + User, +) +from app.utils.time import ensure_utc + + +class UserRepository: + def __init__(self, session: AsyncSession) -> None: + self.session = session + + async def get_or_create_by_chat(self, chat_id: str, username: str | None = None) -> User: + user = await self.get_by_chat_id(chat_id) + if user: + if username and user.telegram_username != username: + user.telegram_username = username + return user + user = User(telegram_chat_id=chat_id, telegram_username=username) + self.session.add(user) + await self.session.flush() + return user + + async def get_by_chat_id(self, chat_id: str) -> User | None: + result = await self.session.execute(select(User).where(User.telegram_chat_id == chat_id)) + return result.scalar_one_or_none() + + async def get_by_id(self, user_id: int) -> User | None: + result = await self.session.execute(select(User).where(User.id == user_id)) + return result.scalar_one_or_none() + + async def list_active_connected_users(self) -> list[User]: + result = await self.session.execute( + select(User).where(User.is_active.is_(True), User.spotify_refresh_token.is_not(None)) + ) + return list(result.scalars().all()) + + +class AuthStateRepository: + def __init__(self, session: AsyncSession) -> None: + self.session = session + + async def create(self, state: str, telegram_chat_id: str, expires_at: datetime) -> AuthState: + row = AuthState( + state=state, + telegram_chat_id=telegram_chat_id, + expires_at=expires_at, + created_at=datetime.now(timezone.utc), + ) + self.session.add(row) + await self.session.flush() + return row + + async def pop_valid(self, state: str) -> AuthState | None: + now = datetime.now(timezone.utc) + result = await self.session.execute(select(AuthState).where(AuthState.state == state)) + row = result.scalar_one_or_none() + if not row: + return None + await self.session.delete(row) + if ensure_utc(row.expires_at) < now: + return None + return row + + async def delete_expired(self) -> int: + result = await self.session.execute(delete(AuthState).where(AuthState.expires_at < datetime.now(timezone.utc))) + return result.rowcount or 0 + + +class SavedTrackRepository: + def __init__(self, session: AsyncSession) -> None: + self.session = session + + async def replace_for_user(self, user_id: int, tracks: Iterable[dict]) -> None: + await self.session.execute(delete(SavedTrack).where(SavedTrack.user_id == user_id)) + for item in tracks: + self.session.add( + SavedTrack( + user_id=user_id, + spotify_track_id=item["id"], + name=item["name"], + artist_names=", ".join(item["artist_names"]), + artist_ids_csv=",".join(item["artist_ids"]), + album_name=item.get("album_name"), + added_at=item.get("added_at"), + popularity=item.get("popularity"), + ) + ) + await self.session.flush() + + async def list_for_user(self, user_id: int) -> list[SavedTrack]: + result = await self.session.execute(select(SavedTrack).where(SavedTrack.user_id == user_id)) + return list(result.scalars().all()) + + async def count_for_user(self, user_id: int) -> int: + result = await self.session.execute(select(func.count()).select_from(SavedTrack).where(SavedTrack.user_id == user_id)) + return int(result.scalar_one()) + + +class RecommendationHistoryRepository: + def __init__(self, session: AsyncSession) -> None: + self.session = session + + async def list_track_ids(self, user_id: int) -> set[str]: + result = await self.session.execute( + select(RecommendationHistory.spotify_track_id).where(RecommendationHistory.user_id == user_id) + ) + return {row[0] for row in result.all()} + + async def mark_tracks(self, user_id: int, track_ids: list[str]) -> None: + if not track_ids: + return + now = datetime.now(timezone.utc) + result = await self.session.execute(select(RecommendationHistory).where(RecommendationHistory.user_id == user_id)) + existing = {row.spotify_track_id: row for row in result.scalars().all()} + for track_id in track_ids: + if track_id in existing: + row = existing[track_id] + row.last_recommended_at = now + row.times_recommended += 1 + else: + self.session.add( + RecommendationHistory( + user_id=user_id, + spotify_track_id=track_id, + first_recommended_at=now, + last_recommended_at=now, + times_recommended=1, + ) + ) + await self.session.flush() + + +class PlaylistRunRepository: + def __init__(self, session: AsyncSession) -> None: + self.session = session + + async def create_run(self, user_id: int, run_date: date, notes: str | None = None) -> PlaylistRun: + run = PlaylistRun(user_id=user_id, run_date=run_date, status="running", notes=notes) + self.session.add(run) + await self.session.flush() + return run + + async def add_tracks(self, run_id: int, tracks: list[dict]) -> None: + for idx, track in enumerate(tracks, start=1): + self.session.add( + PlaylistRunTrack( + run_id=run_id, + spotify_track_id=track["id"], + name=track["name"], + artist_names=", ".join(track["artist_names"]), + source=track["source"], + position=idx, + is_new_to_bot=track.get("is_new_to_bot", True), + ) + ) + await self.session.flush() + + async def mark_success( + self, + run: PlaylistRun, + *, + playlist_id: str, + playlist_name: str, + playlist_url: str | None, + total_tracks: int, + new_tracks: int, + reused_tracks: int, + notes: str | None = None, + ) -> None: + run.status = "success" + run.playlist_id = playlist_id + run.playlist_name = playlist_name + run.playlist_url = playlist_url + run.total_tracks = total_tracks + run.new_tracks = new_tracks + run.reused_tracks = reused_tracks + run.notes = notes + await self.session.flush() + + async def mark_failed(self, run: PlaylistRun, message: str) -> None: + run.status = "failed" + run.notes = message + await self.session.flush() + + async def latest_for_user(self, user_id: int) -> PlaylistRun | None: + result = await self.session.execute( + select(PlaylistRun).where(PlaylistRun.user_id == user_id).order_by(PlaylistRun.created_at.desc()).limit(1) + ) + return result.scalar_one_or_none() diff --git a/app/db/session.py b/app/db/session.py new file mode 100644 index 0000000..e4b5b5a --- /dev/null +++ b/app/db/session.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from collections.abc import AsyncIterator +from contextlib import asynccontextmanager + +from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, async_sessionmaker, create_async_engine + +from app.config import Settings +from app.db.base import Base + + +def create_engine(settings: Settings) -> AsyncEngine: + return create_async_engine(settings.database_url, future=True, echo=False) + + +def create_session_factory(engine: AsyncEngine) -> async_sessionmaker[AsyncSession]: + return async_sessionmaker(engine, expire_on_commit=False, class_=AsyncSession) + + +async def init_db(engine: AsyncEngine) -> None: + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + + +@asynccontextmanager +async def session_scope(factory: async_sessionmaker[AsyncSession]) -> AsyncIterator[AsyncSession]: + session = factory() + try: + yield session + await session.commit() + except Exception: + await session.rollback() + raise + finally: + await session.close() diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..c500261 --- /dev/null +++ b/app/main.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +import asyncio +from contextlib import asynccontextmanager + +import httpx +from fastapi import FastAPI + +from app.api.routes import get_router +from app.bot.telegram_bot import TelegramBotRunner +from app.clients.lastfm import LastFmClient +from app.clients.spotify import SpotifyClient +from app.config import get_settings +from app.db.session import create_engine, create_session_factory, init_db +from app.runtime import AppRuntime +from app.services.app_services import AppServices +from app.services.playlist_job import PlaylistJobService +from app.services.recommendation import RecommendationEngine +from app.services.spotify_auth import SpotifyAuthService + + +def create_app() -> FastAPI: + settings = get_settings() + + @asynccontextmanager + async def lifespan(app: FastAPI): + engine = create_engine(settings) + session_factory = create_session_factory(engine) + await init_db(engine) + + http_client = httpx.AsyncClient(headers={"User-Agent": "spotify-vibe-bot/1.0"}) + spotify = SpotifyClient(settings, http_client) + lastfm = LastFmClient(settings.lastfm_api_key, http_client) + auth_service = SpotifyAuthService(settings, spotify, session_factory) + rec_engine = RecommendationEngine(settings, spotify, lastfm) + job_service = PlaylistJobService(settings, session_factory, auth_service, rec_engine, asyncio.Lock()) + services = AppServices(auth=auth_service, recommendation=rec_engine, jobs=job_service) + + runtime = AppRuntime( + settings=settings, + engine=engine, + session_factory=session_factory, + http_client=http_client, + spotify=spotify, + lastfm=lastfm, + generate_lock=job_service.generate_lock, + ) + app.state.runtime = runtime + app.state.services = services + + telegram_runner = TelegramBotRunner( + token=settings.telegram_bot_token, + session_factory=session_factory, + services=services, + app_base_url=settings.app_base_url, + ) + runtime.telegram_runner = telegram_runner + job_service.set_notifier(telegram_runner.send_message) + + await telegram_runner.start_polling() + try: + yield + finally: + await telegram_runner.stop() + await http_client.aclose() + await engine.dispose() + + app = FastAPI(title="Spotify Vibe Bot", lifespan=lifespan) + app.include_router(get_router()) + return app diff --git a/app/runtime.py b/app/runtime.py new file mode 100644 index 0000000..13b72a4 --- /dev/null +++ b/app/runtime.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import asyncio +from dataclasses import dataclass +from typing import Any + +import httpx +from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, async_sessionmaker + +from app.clients.lastfm import LastFmClient +from app.clients.spotify import SpotifyClient +from app.config import Settings + + +@dataclass +class AppRuntime: + settings: Settings + engine: AsyncEngine + session_factory: async_sessionmaker[AsyncSession] + http_client: httpx.AsyncClient + spotify: SpotifyClient + lastfm: LastFmClient + generate_lock: asyncio.Lock + telegram_runner: Any = None + diff --git a/app/services/__init__.py b/app/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/services/app_services.py b/app/services/app_services.py new file mode 100644 index 0000000..bc8ee31 --- /dev/null +++ b/app/services/app_services.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from dataclasses import dataclass + +from app.services.playlist_job import PlaylistJobService +from app.services.recommendation import RecommendationEngine +from app.services.spotify_auth import SpotifyAuthService + + +@dataclass +class AppServices: + auth: SpotifyAuthService + recommendation: RecommendationEngine + jobs: PlaylistJobService + diff --git a/app/services/playlist_job.py b/app/services/playlist_job.py new file mode 100644 index 0000000..2ce5680 --- /dev/null +++ b/app/services/playlist_job.py @@ -0,0 +1,149 @@ +from __future__ import annotations + +import asyncio +from dataclasses import dataclass +from datetime import date + +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker + +from app.config import Settings +from app.db.repositories import PlaylistRunRepository, RecommendationHistoryRepository, UserRepository +from app.services.recommendation import RecommendationEngine +from app.services.spotify_auth import SpotifyAuthService +from app.types import PlaylistBuildResult + + +@dataclass +class JobOutcome: + user_id: int + ok: bool + message: str + playlist_url: str | None = None + + +class PlaylistJobService: + def __init__( + self, + settings: Settings, + session_factory: async_sessionmaker[AsyncSession], + auth_service: SpotifyAuthService, + recommendation_engine: RecommendationEngine, + generate_lock: asyncio.Lock, + ) -> None: + self.settings = settings + self.session_factory = session_factory + self.auth_service = auth_service + self.recommendation_engine = recommendation_engine + self.generate_lock = generate_lock + self._notify = None + + def set_notifier(self, notifier) -> None: + self._notify = notifier + + async def generate_for_user(self, user_id: int, *, force: bool = False, notify: bool = True) -> JobOutcome: + async with self.generate_lock: + async with self.session_factory() as session: + users = UserRepository(session) + runs = PlaylistRunRepository(session) + history = RecommendationHistoryRepository(session) + + user = await users.get_by_id(user_id) + if not user: + return JobOutcome(user_id=user_id, ok=False, message="User not found") + if not user.spotify_refresh_token: + return JobOutcome(user_id=user_id, ok=False, message="Spotify is not connected") + if not force and user.last_generated_date == date.today(): + latest = await runs.latest_for_user(user.id) + return JobOutcome( + user_id=user.id, + ok=True, + message="Already generated today", + playlist_url=latest.playlist_url if latest else user.latest_playlist_url, + ) + + run = await runs.create_run(user_id=user.id, run_date=date.today()) + try: + access_token = await self.auth_service.ensure_valid_access_token(session, user) + # Re-sync likes each run so new likes affect next day's picks. + await self.recommendation_engine.sync_saved_tracks(session, user, access_token) + build = await self.recommendation_engine.build_daily_playlist(session, user, access_token) + if not build.tracks: + raise RuntimeError("No candidate tracks found. Try listening more or widen sources.") + playlist_meta = await self._create_spotify_playlist(session, user, access_token, build) + + serialized_tracks = [] + for c in build.tracks: + serialized_tracks.append( + { + "id": c.id, + "name": c.name, + "artist_names": c.artist_names, + "source": c.source, + "is_new_to_bot": True, # fixed below + } + ) + history_ids = await history.list_track_ids(user.id) + for item in serialized_tracks: + item["is_new_to_bot"] = item["id"] not in history_ids + + await runs.add_tracks(run.id, serialized_tracks) + await history.mark_tracks(user.id, [c.id for c in build.tracks]) + await runs.mark_success( + run, + playlist_id=playlist_meta["id"], + playlist_name=playlist_meta["name"], + playlist_url=playlist_meta.get("url"), + total_tracks=len(build.tracks), + new_tracks=build.new_count, + reused_tracks=build.reused_count, + notes=build.notes, + ) + user.last_generated_date = date.today() + user.latest_playlist_id = playlist_meta["id"] + user.latest_playlist_url = playlist_meta.get("url") + await session.commit() + message = ( + f"Playlist ready: {playlist_meta['name']} ({len(build.tracks)} tracks, " + f"new {build.new_count}/{len(build.tracks)})" + ) + if notify and self._notify: + await self._notify(user.telegram_chat_id, f"{message}\n{playlist_meta.get('url', '')}".strip()) + return JobOutcome(user_id=user.id, ok=True, message=message, playlist_url=playlist_meta.get("url")) + except Exception as exc: + await runs.mark_failed(run, str(exc)) + await session.commit() + if notify and self._notify: + await self._notify(user.telegram_chat_id, f"Playlist generation failed: {exc}") + return JobOutcome(user_id=user.id, ok=False, message=str(exc)) + + async def generate_for_all_connected_users(self) -> list[JobOutcome]: + async with self.session_factory() as session: + users_repo = UserRepository(session) + users = await users_repo.list_active_connected_users() + outcomes: list[JobOutcome] = [] + for user in users: + outcomes.append(await self.generate_for_user(user.id, notify=True)) + return outcomes + + async def _create_spotify_playlist( + self, session: AsyncSession, user, access_token: str, build: PlaylistBuildResult + ) -> dict[str, str | None]: + public = self.settings.playlist_visibility.lower() == "public" + name = f"Daily Vibe {date.today().isoformat()}" + desc = ( + "Auto-generated from your recent listening + liked tracks. " + f"New-to-bot: {build.new_count}/{len(build.tracks)}." + ) + playlist = await self.auth_service.spotify.create_playlist( + access_token, + user_id=user.spotify_user_id, + name=name, + description=desc, + public=public, + ) + await self.auth_service.spotify.add_playlist_items(access_token, playlist["id"], [c.uri for c in build.tracks]) + return { + "id": playlist["id"], + "name": playlist["name"], + "url": ((playlist.get("external_urls") or {}).get("spotify")), + } diff --git a/app/services/recommendation.py b/app/services/recommendation.py new file mode 100644 index 0000000..12fbca7 --- /dev/null +++ b/app/services/recommendation.py @@ -0,0 +1,374 @@ +from __future__ import annotations + +import math +import random +from collections import Counter, defaultdict +from datetime import date, timedelta +from typing import Any + +from sqlalchemy.ext.asyncio import AsyncSession + +from app.clients.lastfm import LastFmClient +from app.clients.spotify import SpotifyApiError, SpotifyClient +from app.config import Settings +from app.db.models import SavedTrack, User +from app.db.repositories import RecommendationHistoryRepository, SavedTrackRepository +from app.types import PlaylistBuildResult, TrackCandidate +from app.utils.text import normalize_track_signature +from app.utils.time import utcnow + + +class RecommendationEngine: + def __init__(self, settings: Settings, spotify: SpotifyClient, lastfm: LastFmClient) -> None: + self.settings = settings + self.spotify = spotify + self.lastfm = lastfm + + async def sync_saved_tracks(self, session: AsyncSession, user: User, access_token: str) -> list[SavedTrack]: + saved_tracks_repo = SavedTrackRepository(session) + raw = await self.spotify.get_saved_tracks_all(access_token) + await saved_tracks_repo.replace_for_user(user.id, raw) + return await saved_tracks_repo.list_for_user(user.id) + + async def build_daily_playlist(self, session: AsyncSession, user: User, access_token: str) -> PlaylistBuildResult: + saved_tracks_repo = SavedTrackRepository(session) + history_repo = RecommendationHistoryRepository(session) + + saved_rows = await saved_tracks_repo.list_for_user(user.id) + if not saved_rows: + saved_rows = await self.sync_saved_tracks(session, user, access_token) + + recent_since = utcnow() - timedelta(days=self.settings.recent_days_window) + recent_plays = await self.spotify.get_recently_played(access_token, since=recent_since, max_pages=12) + + seed = self._build_seed_profile(saved_rows, recent_plays, user_id=user.id) + history_ids = await history_repo.list_track_ids(user.id) + liked_ids = {row.spotify_track_id for row in saved_rows} + + market = self._normalize_spotify_market(self.settings.spotify_default_market) + candidates = await self._collect_candidates( + access_token=access_token, + seed=seed, + market=market, + ) + result = self._rank_and_select( + candidates=candidates, + liked_ids=liked_ids, + history_ids=history_ids, + target_size=max(1, user.playlist_size or self.settings.default_playlist_size), + min_new_ratio=user.min_new_ratio if user.min_new_ratio is not None else self.settings.min_new_ratio, + ) + return result + + def _build_seed_profile(self, saved_rows: list[SavedTrack], recent_plays: list[dict[str, Any]], *, user_id: int) -> dict[str, Any]: + today = date.today() + rng = random.Random(f"{user_id}-{today.isoformat()}") + + recent_track_counts: Counter[str] = Counter() + recent_track_meta: dict[str, dict[str, Any]] = {} + artist_weights: Counter[str] = Counter() + artist_names: dict[str, str] = {} + + for idx, play in enumerate(sorted(recent_plays, key=lambda x: x.get("played_at") or utcnow(), reverse=True)): + track_id = play["id"] + weight = max(1.0, 3.0 - (idx * 0.04)) + recent_track_counts[track_id] += weight + recent_track_meta[track_id] = play + for artist_id, artist_name in zip(play.get("artist_ids", []), play.get("artist_names", [])): + artist_weights[artist_id] += weight + artist_names[artist_id] = artist_name + + sorted_saved = sorted(saved_rows, key=lambda x: x.added_at or utcnow(), reverse=True) + recent_likes = sorted_saved[:120] + sampled_older = rng.sample(sorted_saved[120:], k=min(180, max(0, len(sorted_saved) - 120))) if len(sorted_saved) > 120 else [] + exploration_pool = recent_likes + sampled_older + + for idx, row in enumerate(exploration_pool): + base_weight = 1.2 if idx < 50 else 0.6 + artist_ids = [a for a in row.artist_ids_csv.split(",") if a] + artist_list = [x.strip() for x in row.artist_names.split(",") if x.strip()] + for artist_id, artist_name in zip(artist_ids, artist_list): + artist_weights[artist_id] += base_weight + artist_names[artist_id] = artist_name + + seed_track_ids = [t for t, _ in recent_track_counts.most_common(10)] + if len(seed_track_ids) < 10: + for row in recent_likes[:20]: + if row.spotify_track_id not in seed_track_ids: + seed_track_ids.append(row.spotify_track_id) + if len(seed_track_ids) >= 10: + break + + seed_artist_ids = [a for a, _ in artist_weights.most_common(20)] + seed_artist_names = [artist_names[a] for a in seed_artist_ids if a in artist_names] + + return { + "seed_track_ids": seed_track_ids, + "seed_artists": seed_artist_ids, + "seed_artist_names": seed_artist_names, + "recent_track_meta": recent_track_meta, + } + + async def _collect_candidates(self, *, access_token: str, seed: dict[str, Any], market: str | None) -> list[TrackCandidate]: + by_id: dict[str, TrackCandidate] = {} + sig_to_id: dict[str, str] = {} + source_count: Counter[str] = Counter() + recent_track_meta = seed["recent_track_meta"] + + def upsert(candidate: TrackCandidate) -> None: + sig = normalize_track_signature(candidate.name, candidate.artist_names) + existing_id = sig_to_id.get(sig) + if existing_id and existing_id != candidate.id and existing_id in by_id: + if candidate.score <= by_id[existing_id].score: + return + del by_id[existing_id] + existing = by_id.get(candidate.id) + if existing: + existing.score = max(existing.score, candidate.score) + if candidate.source not in existing.source: + existing.source = f"{existing.source}+{candidate.source}" + for reason in candidate.seed_reasons: + if reason not in existing.seed_reasons: + existing.seed_reasons.append(reason) + return + by_id[candidate.id] = candidate + sig_to_id[sig] = candidate.id + source_count[candidate.source] += 1 + + seed_tracks = list(seed["seed_track_ids"]) + seed_artists = list(seed["seed_artists"]) + top_tracks_market = market or "US" + + for batch_idx in range(4): + # Spotify recommendations endpoint supports max 5 total seeds. + track_start = batch_idx * 2 + artist_start = batch_idx * 3 + batch_seed_tracks = seed_tracks[track_start : track_start + 2] + remaining_slots = max(0, 5 - len(batch_seed_tracks)) + batch_seed_artists = seed_artists[artist_start : artist_start + remaining_slots] + if not batch_seed_tracks and not batch_seed_artists: + continue + try: + rec_tracks = await self.spotify.get_recommendations( + access_token, + seed_tracks=batch_seed_tracks, + seed_artists=batch_seed_artists, + limit=100, + market=market, + ) + except SpotifyApiError: + rec_tracks = [] + for raw in rec_tracks: + cand = self._candidate_from_spotify_track(raw, source="spotify_recommendations", base_score=1.0) + if not cand: + continue + if any(a in batch_seed_artists for a in cand.artist_ids): + cand.score += 0.08 + upsert(cand) + + for artist_id in seed_artists[:12]: + try: + top_tracks = await self.spotify.get_artist_top_tracks( + access_token, artist_id=artist_id, market=top_tracks_market + ) + except SpotifyApiError: + continue + for raw in top_tracks: + cand = self._candidate_from_spotify_track(raw, source="artist_top_tracks", base_score=0.68) + if not cand: + continue + if artist_id in cand.artist_ids: + cand.score += 0.07 + upsert(cand) + + # Fallback for apps/accounts where top-tracks and recommendations endpoints are restricted. + if len(by_id) < 40: + for artist_name in seed.get("seed_artist_names", [])[:12]: + if not artist_name: + continue + try: + search_hits = await self.spotify.search_track( + access_token, + track_name="", + artist_name=artist_name, + market=market, + ) + except SpotifyApiError: + continue + for raw in search_hits[:3]: + cand = self._candidate_from_spotify_track(raw, source="spotify_search_artist", base_score=0.55) + if not cand: + continue + if artist_name.lower() in {a.lower() for a in cand.artist_names}: + cand.score += 0.05 + upsert(cand) + + if self.lastfm.enabled: + for track_id in seed_tracks[:10]: + meta = recent_track_meta.get(track_id) + if not meta: + continue + artist_name = (meta.get("artist_names") or [None])[0] + if not artist_name: + continue + try: + similars = await self.lastfm.track_similar(artist=artist_name, track=meta["name"], limit=10) + except Exception: + similars = [] + for item in similars[:5]: + lf_name = item.get("name") + lf_artist = (item.get("artist") or {}).get("name") if isinstance(item.get("artist"), dict) else None + if not lf_name: + continue + try: + search_hits = await self.spotify.search_track( + access_token, track_name=lf_name, artist_name=lf_artist, market=market + ) + except SpotifyApiError: + search_hits = [] + for raw in search_hits[:1]: + cand = self._candidate_from_spotify_track(raw, source="lastfm_track_similar", base_score=0.9) + if cand: + upsert(cand) + + for artist_name in seed.get("seed_artist_names", [])[:8]: + try: + similars = await self.lastfm.artist_similar(artist=artist_name, limit=8) + except Exception: + similars = [] + for item in similars[:4]: + sim_artist = item.get("name") + if not sim_artist: + continue + try: + search_hits = await self.spotify.search_track( + access_token, track_name="", artist_name=sim_artist, market=market + ) + except SpotifyApiError: + search_hits = [] + for raw in search_hits[:2]: + cand = self._candidate_from_spotify_track(raw, source="lastfm_artist_similar", base_score=0.78) + if cand: + upsert(cand) + + return list(by_id.values()) + + def _candidate_from_spotify_track(self, raw: dict[str, Any], *, source: str, base_score: float) -> TrackCandidate | None: + track_id = raw.get("id") + if not track_id: + return None + artists = raw.get("artists") or [] + artist_names = [a.get("name") or "Unknown" for a in artists] + artist_ids = [a.get("id") for a in artists if a.get("id")] + popularity = raw.get("popularity") + + score = base_score + if isinstance(popularity, int): + # Prefer mid-popularity a bit to avoid obvious mainstream repeats and totally obscure misses. + score += max(-0.12, 0.15 - abs(popularity - 55) / 250) + + return TrackCandidate( + id=track_id, + uri=raw.get("uri") or f"spotify:track:{track_id}", + name=raw.get("name") or "Unknown", + artist_names=artist_names, + artist_ids=artist_ids, + popularity=popularity, + source=source, + score=score, + ) + + def _rank_and_select( + self, + *, + candidates: list[TrackCandidate], + liked_ids: set[str], + history_ids: set[str], + target_size: int, + min_new_ratio: float, + ) -> PlaylistBuildResult: + min_new_required = math.ceil(target_size * min_new_ratio) + + filtered = [c for c in candidates if c.id not in liked_ids] + liked_fallback_used = False + if not filtered and candidates: + # If every discovered candidate is already liked, degrade gracefully instead of failing the run. + filtered = list(candidates) + liked_fallback_used = True + for c in filtered: + if c.id in liked_ids: + c.score -= 0.35 + if c.id in history_ids: + c.score -= 0.2 + if len(c.artist_ids) > 1: + c.score += 0.03 + c.score += min(0.15, 0.01 * len(c.seed_reasons)) + + filtered.sort(key=lambda c: (c.score, c.popularity or 0), reverse=True) + + novel = [c for c in filtered if c.id not in history_ids and c.id not in liked_ids] + reused = [c for c in filtered if c.id in history_ids or c.id in liked_ids] + + selected: list[TrackCandidate] = [] + artist_caps: defaultdict[str, int] = defaultdict(int) + + def try_take(pool: list[TrackCandidate], count: int, hard_artist_cap: int) -> None: + for c in pool: + if len(selected) >= count: + return + if any(s.id == c.id for s in selected): + continue + main_artist = c.artist_ids[0] if c.artist_ids else f"name:{(c.artist_names or [''])[0].lower()}" + if artist_caps[main_artist] >= hard_artist_cap: + continue + artist_caps[main_artist] += 1 + selected.append(c) + + try_take(novel, min_new_required, hard_artist_cap=2) + if len([c for c in selected if c.id not in history_ids and c.id not in liked_ids]) < min_new_required: + try_take(novel, min_new_required, hard_artist_cap=4) + + try_take(novel, target_size, hard_artist_cap=3) + try_take(reused, target_size, hard_artist_cap=2) + if len(selected) < target_size: + try_take(reused, target_size, hard_artist_cap=4) + + # Mark novelty for persistence. + for c in selected: + c.seed_reasons = c.seed_reasons or [] + + new_count = len([c for c in selected if c.id not in history_ids and c.id not in liked_ids]) + reused_count = len(selected) - new_count + + notes_parts: list[str] = [] + if liked_fallback_used: + notes_parts.append("All discovered candidates were already in Liked Songs; allowed liked-track fallback.") + if new_count < min_new_required: + notes_parts.append( + f"Not enough completely new tracks to satisfy {int(min_new_ratio * 100)}% target " + f"(got {new_count}/{target_size})." + ) + notes = " ".join(notes_parts) if notes_parts else None + + return PlaylistBuildResult( + tracks=selected, + target_size=target_size, + new_count=new_count, + reused_count=reused_count, + min_new_required=min_new_required, + notes=notes, + ) + + @staticmethod + def _normalize_spotify_market(value: str | None) -> str | None: + if not value: + return None + market = value.strip().upper() + if not market: + return None + # Common shorthand users put in .env, but Spotify APIs expect a country code. + if market in {"EU", "GLOBAL", "WORLD", "ALL"}: + return None + if len(market) == 2 and market.isalpha(): + return market + return None diff --git a/app/services/spotify_auth.py b/app/services/spotify_auth.py new file mode 100644 index 0000000..9f9296a --- /dev/null +++ b/app/services/spotify_auth.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +import secrets +from datetime import timedelta + +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker + +from app.clients.spotify import SpotifyClient +from app.config import Settings +from app.db.repositories import AuthStateRepository, UserRepository +from app.utils.time import ensure_utc, utcnow + + +SPOTIFY_SCOPES = [ + "user-library-read", + "user-read-recently-played", + "playlist-modify-private", + "playlist-modify-public", +] + + +class SpotifyAuthService: + def __init__(self, settings: Settings, spotify: SpotifyClient, session_factory: async_sessionmaker[AsyncSession]) -> None: + self.settings = settings + self.spotify = spotify + self.session_factory = session_factory + + async def create_connect_url(self, chat_id: str, username: str | None = None) -> str: + async with self.session_factory() as session: + users = UserRepository(session) + states = AuthStateRepository(session) + user = await users.get_or_create_by_chat(chat_id=chat_id, username=username) + user.timezone = user.timezone or self.settings.app_timezone + state = secrets.token_urlsafe(24) + await states.delete_expired() + await states.create(state=state, telegram_chat_id=user.telegram_chat_id, expires_at=utcnow() + timedelta(minutes=15)) + await session.commit() + return self.spotify.build_authorize_url(state=state, scopes=SPOTIFY_SCOPES) + + async def handle_callback(self, code: str, state: str) -> tuple[str, str]: + async with self.session_factory() as session: + users = UserRepository(session) + states = AuthStateRepository(session) + auth_state = await states.pop_valid(state) + if not auth_state: + await session.commit() + raise ValueError("OAuth state is invalid or expired") + + user = await users.get_by_chat_id(auth_state.telegram_chat_id) + if not user: + raise ValueError("User not found for auth state") + + token_payload = await self.spotify.exchange_code(code) + access_token = token_payload["access_token"] + me = await self.spotify.get_current_user(access_token) + + user.spotify_user_id = me.get("id") + user.spotify_access_token = access_token + user.spotify_refresh_token = token_payload.get("refresh_token") or user.spotify_refresh_token + user.spotify_token_expires_at = self.spotify.token_expiry_from_response(token_payload) + user.spotify_scopes = token_payload.get("scope") + user.is_active = True + if not user.timezone: + user.timezone = self.settings.app_timezone + + await session.commit() + return user.telegram_chat_id, me.get("display_name") or me.get("id") or "Spotify user" + + async def ensure_valid_access_token(self, session: AsyncSession, user) -> str: + if ( + user.spotify_access_token + and user.spotify_token_expires_at + and ensure_utc(user.spotify_token_expires_at) > utcnow() + ): + return user.spotify_access_token + if not user.spotify_refresh_token: + raise RuntimeError("User is not connected to Spotify") + token_payload = await self.spotify.refresh_access_token(user.spotify_refresh_token) + user.spotify_access_token = token_payload["access_token"] + if token_payload.get("refresh_token"): + user.spotify_refresh_token = token_payload["refresh_token"] + user.spotify_token_expires_at = self.spotify.token_expiry_from_response(token_payload) + if token_payload.get("scope"): + user.spotify_scopes = token_payload["scope"] + await session.flush() + return user.spotify_access_token diff --git a/app/types.py b/app/types.py new file mode 100644 index 0000000..ff00f63 --- /dev/null +++ b/app/types.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import datetime + + +@dataclass +class TrackCandidate: + id: str + uri: str + name: str + artist_names: list[str] + artist_ids: list[str] + popularity: int | None = None + source: str = "unknown" + score: float = 0.0 + seed_reasons: list[str] = field(default_factory=list) + added_at: datetime | None = None + + +@dataclass +class PlaylistBuildResult: + tracks: list[TrackCandidate] + target_size: int + new_count: int + reused_count: int + min_new_required: int + notes: str | None = None diff --git a/app/utils/__init__.py b/app/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/utils/text.py b/app/utils/text.py new file mode 100644 index 0000000..f7b5dcb --- /dev/null +++ b/app/utils/text.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +import re + + +_WS_RE = re.compile(r"\s+") +_FEAT_RE = re.compile(r"\s*[\(\[\-–—]\s*(feat|ft)\.?.*$", re.IGNORECASE) + + +def normalize_track_signature(name: str, artists: list[str]) -> str: + clean_name = _FEAT_RE.sub("", name).strip().lower() + clean_name = _WS_RE.sub(" ", clean_name) + clean_artists = ",".join(sorted(a.strip().lower() for a in artists if a.strip())) + return f"{clean_name}::{clean_artists}" diff --git a/app/utils/time.py b/app/utils/time.py new file mode 100644 index 0000000..66bf3f5 --- /dev/null +++ b/app/utils/time.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +from datetime import datetime, timezone + + +def utcnow() -> datetime: + return datetime.now(timezone.utc) + + +def ensure_utc(dt: datetime) -> datetime: + if dt.tzinfo is None or dt.tzinfo.utcoffset(dt) is None: + return dt.replace(tzinfo=timezone.utc) + return dt.astimezone(timezone.utc) + + +def parse_spotify_datetime(value: str | None) -> datetime | None: + if not value: + return None + if value.endswith("Z"): + value = value.replace("Z", "+00:00") + return datetime.fromisoformat(value) + + +def to_unix_ms(dt: datetime) -> int: + return int(dt.timestamp() * 1000) diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..c1b2f85 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,43 @@ +services: + app: + build: . + container_name: spotify-vibe-bot-app + restart: unless-stopped + labels: + - traefik.enable=true + - traefik.docker.network=web + - traefik.http.routers.spotify-vibe.rule=Host(`${TRAEFIK_HOSTNAME}`) + - traefik.http.routers.spotify-vibe.entrypoints=websecure + - traefik.http.routers.spotify-vibe.tls.certresolver=letsencrypt + - traefik.http.services.spotify-vibe.loadbalancer.server.port=8000 + ports: + - "${APP_PORT:-8000}:8000" + env_file: + - .env + environment: + TZ: "${TZ:-UTC}" + volumes: + - ./data:/data + command: ["/app/scripts/start_app.sh"] + networks: + - default + - web + + cron: + profiles: ["cron"] + build: . + container_name: spotify-vibe-bot-cron + restart: unless-stopped + depends_on: + - app + env_file: + - .env + environment: + TZ: "${TZ:-UTC}" + command: ["/app/scripts/start_cron.sh"] + networks: + - default + +networks: + web: + external: true diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..082ecae --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +fastapi>=0.110,<1 +uvicorn[standard]>=0.29,<1 +httpx>=0.27,<1 +SQLAlchemy>=2.0,<3 +aiosqlite>=0.19,<1 +pydantic>=2.6,<3 +pydantic-settings>=2.2,<3 +python-telegram-bot>=21,<23 +python-dotenv>=1,<2 +tenacity>=8,<10 +orjson>=3.9,<4 diff --git a/scripts/run_nightly.sh b/scripts/run_nightly.sh new file mode 100644 index 0000000..381614e --- /dev/null +++ b/scripts/run_nightly.sh @@ -0,0 +1,10 @@ +#!/bin/sh +set -eu + +: "${INTERNAL_JOB_TOKEN:?INTERNAL_JOB_TOKEN is required}" +: "${APP_INTERNAL_URL:=http://app:8000}" + +curl --fail --silent --show-error \ + -X POST \ + -H "Authorization: Bearer ${INTERNAL_JOB_TOKEN}" \ + "${APP_INTERNAL_URL}/internal/jobs/nightly" diff --git a/scripts/start_app.sh b/scripts/start_app.sh new file mode 100644 index 0000000..050f2ca --- /dev/null +++ b/scripts/start_app.sh @@ -0,0 +1,4 @@ +#!/bin/sh +set -eu + +exec uvicorn app.main:create_app --factory --host 0.0.0.0 --port 8000 diff --git a/scripts/start_cron.sh b/scripts/start_cron.sh new file mode 100644 index 0000000..fdcc0e9 --- /dev/null +++ b/scripts/start_cron.sh @@ -0,0 +1,13 @@ +#!/bin/sh +set -eu + +: "${CRON_SCHEDULE:=15 2 * * *}" +: "${APP_TIMEZONE:=UTC}" + +export TZ="${TZ:-$APP_TIMEZONE}" + +cat > /tmp/crontab < list[dict]: + raise RuntimeError("Last.fm key invalid") + + async def artist_similar(self, *, artist: str, limit: int = 15) -> list[dict]: + raise RuntimeError("Last.fm key invalid") + + +class StaticLastFm: + enabled = True + + def __init__( + self, + *, + track_similar_results: dict[tuple[str, str], list[dict]] | None = None, + artist_similar_results: dict[str, list[dict]] | None = None, + ) -> None: + self.track_similar_results = track_similar_results or {} + self.artist_similar_results = artist_similar_results or {} + + async def track_similar(self, *, artist: str, track: str, limit: int = 20) -> list[dict]: + return list(self.track_similar_results.get((artist, track), [])) + + async def artist_similar(self, *, artist: str, limit: int = 15) -> list[dict]: + return list(self.artist_similar_results.get(artist, [])) + + +class RecordingSpotifyStub: + def __init__(self) -> None: + self.recommendation_calls: list[tuple[list[str], list[str], str | None]] = [] + self.top_tracks_calls: list[tuple[str, str]] = [] + self.search_calls: list[tuple[str, str | None]] = [] + + self.raise_recommendations = False + self.raise_top_tracks = False + self.search_results_by_artist: dict[str, list[dict]] = {} + + async def get_recommendations( + self, + access_token: str, + *, + seed_tracks: list[str], + seed_artists: list[str], + limit: int = 100, + market: str | None = None, + ) -> list[dict]: + self.recommendation_calls.append((list(seed_tracks), list(seed_artists), market)) + if self.raise_recommendations: + raise SpotifyApiError("recommendations disabled", 404, "") + return [] + + async def get_artist_top_tracks(self, access_token: str, artist_id: str, market: str) -> list[dict]: + self.top_tracks_calls.append((artist_id, market)) + if self.raise_top_tracks: + raise SpotifyApiError("top tracks forbidden", 403, "") + return [] + + async def search_track( + self, + access_token: str, + *, + track_name: str, + artist_name: str | None = None, + market: str | None = None, + ) -> list[dict]: + self.search_calls.append((track_name, artist_name)) + if not artist_name: + return [] + return list(self.search_results_by_artist.get(artist_name, [])) + + +def make_engine(spotify_stub: RecordingSpotifyStub, lastfm=None) -> RecommendationEngine: + settings = SimpleNamespace( + recent_days_window=5, + spotify_default_market="US", + default_playlist_size=30, + min_new_ratio=0.8, + ) + return RecommendationEngine(settings, spotify_stub, lastfm or DummyLastFm()) + + +def fake_spotify_track(track_id: str, name: str, artist_id: str, artist_name: str, popularity: int = 50) -> dict: + return { + "id": track_id, + "uri": f"spotify:track:{track_id}", + "name": name, + "artists": [{"id": artist_id, "name": artist_name}], + "popularity": popularity, + } + + +class RecommendationEngineTests(unittest.IsolatedAsyncioTestCase): + async def test_collect_candidates_limits_recommendation_seeds_to_five(self) -> None: + spotify = RecordingSpotifyStub() + engine = make_engine(spotify) + seed = { + "seed_track_ids": [f"t{i}" for i in range(10)], + "seed_artists": [f"a{i}" for i in range(20)], + "seed_artist_names": [], + "recent_track_meta": {}, + } + + candidates = await engine._collect_candidates(access_token="token", seed=seed, market=None) + + self.assertEqual(candidates, []) + self.assertEqual(len(spotify.recommendation_calls), 4) + self.assertTrue( + all((len(seed_tracks) + len(seed_artists)) <= 5 for seed_tracks, seed_artists, _ in spotify.recommendation_calls) + ) + + async def test_collect_candidates_uses_search_artist_fallback_when_other_sources_fail(self) -> None: + spotify = RecordingSpotifyStub() + spotify.raise_recommendations = True + spotify.raise_top_tracks = True + spotify.search_results_by_artist = { + "Artist One": [fake_spotify_track("c1", "Song 1", "ax1", "Artist One")], + "Artist Two": [fake_spotify_track("c2", "Song 2", "ax2", "Artist Two")], + } + engine = make_engine(spotify) + seed = { + "seed_track_ids": ["t1", "t2"], + "seed_artists": ["a1", "a2"], + "seed_artist_names": ["Artist One", "Artist Two"], + "recent_track_meta": {}, + } + + candidates = await engine._collect_candidates(access_token="token", seed=seed, market=None) + + self.assertGreaterEqual(len(spotify.search_calls), 1) + self.assertEqual({c.id for c in candidates}, {"c1", "c2"}) + self.assertTrue(all("spotify_search_artist" in c.source for c in candidates)) + + async def test_collect_candidates_tolerates_lastfm_errors(self) -> None: + spotify = RecordingSpotifyStub() + spotify.raise_recommendations = True + spotify.raise_top_tracks = True + spotify.search_results_by_artist = { + "Seed Artist": [fake_spotify_track("c1", "Song 1", "ax1", "Seed Artist")], + } + engine = make_engine(spotify, lastfm=RaisingLastFm()) + seed = { + "seed_track_ids": ["t1"], + "seed_artists": ["a1"], + "seed_artist_names": ["Seed Artist"], + "recent_track_meta": { + "t1": { + "id": "t1", + "name": "Seed Track", + "artist_names": ["Seed Artist"], + } + }, + } + + candidates = await engine._collect_candidates(access_token="token", seed=seed, market=None) + + self.assertEqual({c.id for c in candidates}, {"c1"}) + self.assertTrue(any("spotify_search_artist" in c.source for c in candidates)) + + async def test_collect_candidates_uses_lastfm_artist_similar_search(self) -> None: + spotify = RecordingSpotifyStub() + spotify.raise_recommendations = True + spotify.raise_top_tracks = True + spotify.search_results_by_artist = { + "Similar Artist": [fake_spotify_track("lf1", "LF Song", "lfa1", "Similar Artist")], + } + lastfm = StaticLastFm( + artist_similar_results={ + "Seed Artist": [{"name": "Similar Artist"}], + } + ) + engine = make_engine(spotify, lastfm=lastfm) + seed = { + "seed_track_ids": [], + "seed_artists": ["a1"], + "seed_artist_names": ["Seed Artist"], + "recent_track_meta": {}, + } + + candidates = await engine._collect_candidates(access_token="token", seed=seed, market=None) + + self.assertIn("lf1", {c.id for c in candidates}) + self.assertTrue(any("lastfm_artist_similar" in c.source for c in candidates)) + + def test_normalize_spotify_market(self) -> None: + spotify = RecordingSpotifyStub() + engine = make_engine(spotify) + + self.assertIsNone(engine._normalize_spotify_market("EU")) + self.assertIsNone(engine._normalize_spotify_market("global")) + self.assertEqual(engine._normalize_spotify_market("de"), "DE") + self.assertEqual(engine._normalize_spotify_market("US"), "US") + self.assertIsNone(engine._normalize_spotify_market("USA")) + self.assertIsNone(engine._normalize_spotify_market("")) + + def test_rank_and_select_uses_liked_fallback_and_counts_as_reused(self) -> None: + spotify = RecordingSpotifyStub() + engine = make_engine(spotify) + candidates = [ + TrackCandidate( + id="c1", + uri="spotify:track:c1", + name="Song 1", + artist_names=["Artist One"], + artist_ids=["a1"], + source="spotify_search_artist", + score=0.7, + ), + TrackCandidate( + id="c2", + uri="spotify:track:c2", + name="Song 2", + artist_names=["Artist Two"], + artist_ids=["a2"], + source="spotify_search_artist", + score=0.6, + ), + ] + + result = engine._rank_and_select( + candidates=candidates, + liked_ids={"c1", "c2"}, + history_ids=set(), + target_size=2, + min_new_ratio=0.8, + ) + + self.assertEqual(len(result.tracks), 2) + self.assertEqual(result.new_count, 0) + self.assertEqual(result.reused_count, 2) + self.assertIsNotNone(result.notes) + self.assertIn("liked-track fallback", result.notes or "") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_spotify_client.py b/tests/test_spotify_client.py new file mode 100644 index 0000000..8fbb95a --- /dev/null +++ b/tests/test_spotify_client.py @@ -0,0 +1,136 @@ +from __future__ import annotations + +import unittest +from types import SimpleNamespace + +import httpx + +from app.clients.spotify import SpotifyClient + + +class RecordingHttpClient: + def __init__(self, responses: list[httpx.Response]) -> None: + self._responses = list(responses) + self.calls: list[dict] = [] + + async def request(self, method: str, url: str, **kwargs): + self.calls.append( + { + "method": method, + "url": url, + "headers": kwargs.get("headers"), + "params": kwargs.get("params"), + "json": kwargs.get("json"), + } + ) + if not self._responses: + raise AssertionError("No queued response for request") + return self._responses.pop(0) + + +def make_response(method: str, url: str, status_code: int, *, json_body=None, text_body: str | None = None) -> httpx.Response: + request = httpx.Request(method, url) + if json_body is not None: + return httpx.Response(status_code, request=request, json=json_body) + return httpx.Response(status_code, request=request, text=text_body or "") + + +def make_client(http_client: RecordingHttpClient) -> SpotifyClient: + settings = SimpleNamespace( + spotify_client_id="id", + spotify_client_secret="secret", + spotify_redirect_uri="https://example.test/callback", + ) + return SpotifyClient(settings, http_client) # type: ignore[arg-type] + + +class SpotifyClientPlaylistTests(unittest.IsolatedAsyncioTestCase): + async def test_create_playlist_uses_me_endpoint(self) -> None: + http_client = RecordingHttpClient( + [ + make_response( + "POST", + "https://api.spotify.com/v1/me/playlists", + 201, + json_body={ + "id": "pl1", + "name": "Test Playlist", + "external_urls": {"spotify": "https://open.spotify.com/playlist/pl1"}, + }, + ) + ] + ) + client = make_client(http_client) + + payload = await client.create_playlist( + "token", + user_id="user123", + name="Test Playlist", + description="desc", + public=False, + ) + + self.assertEqual(payload["id"], "pl1") + self.assertEqual(len(http_client.calls), 1) + call = http_client.calls[0] + self.assertEqual(call["method"], "POST") + self.assertEqual(call["url"], "https://api.spotify.com/v1/me/playlists") + self.assertEqual(call["json"], {"name": "Test Playlist", "description": "desc", "public": False}) + self.assertEqual(call["headers"]["Authorization"], "Bearer token") + + async def test_delete_playlist_calls_followers_delete(self) -> None: + http_client = RecordingHttpClient( + [ + make_response( + "DELETE", + "https://api.spotify.com/v1/playlists/pl123/followers", + 200, + text_body="", + ) + ] + ) + client = make_client(http_client) + + await client.delete_playlist("token", "pl123") + + self.assertEqual(len(http_client.calls), 1) + call = http_client.calls[0] + self.assertEqual(call["method"], "DELETE") + self.assertEqual(call["url"], "https://api.spotify.com/v1/playlists/pl123/followers") + self.assertEqual(call["headers"]["Authorization"], "Bearer token") + self.assertIsNone(call["json"]) + + async def test_add_playlist_items_uses_items_endpoint_and_chunks(self) -> None: + uris = [f"spotify:track:{i:02d}" for i in range(101)] + http_client = RecordingHttpClient( + [ + make_response( + "POST", + "https://api.spotify.com/v1/playlists/pl999/items", + 201, + json_body={"snapshot_id": "snap1"}, + ), + make_response( + "POST", + "https://api.spotify.com/v1/playlists/pl999/items", + 201, + json_body={"snapshot_id": "snap2"}, + ), + ] + ) + client = make_client(http_client) + + await client.add_playlist_items("token", "pl999", uris) + + self.assertEqual(len(http_client.calls), 2) + first, second = http_client.calls + self.assertEqual(first["method"], "POST") + self.assertEqual(first["url"], "https://api.spotify.com/v1/playlists/pl999/items") + self.assertEqual(second["url"], "https://api.spotify.com/v1/playlists/pl999/items") + self.assertEqual(len(first["json"]["uris"]), 100) + self.assertEqual(len(second["json"]["uris"]), 1) + self.assertEqual(first["headers"]["Authorization"], "Bearer token") + + +if __name__ == "__main__": + unittest.main()