tomoko-discord-voice/bot.py

#!/usr/bin/env python3
"""
Tomoko Discord Voice Bot 💕
Phase 1 MVP: Text commands → Tomoko TTS voice output

For Alexander, with love! 🎤💖
"""

import discord
import aiohttp
import requests
import asyncio
import toml
import os
from pathlib import Path
from colorlog import ColoredFormatter
import logging

# Setup colored logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
console = logging.StreamHandler()
console.setFormatter(ColoredFormatter(
    "%(log_color)s[%(levelname)s]%(reset)s %(message)s",
    log_colors={
        'DEBUG':    'cyan',
        'INFO':     'white',
        'WARNING':  'yellow',
        'ERROR':    'red',
        'CRITICAL': 'bright_red',
    }
))
logger.addHandler(console)


class TomokoBot:
    """Kuroki Tomoko's Discord Voice Bot 💕"""

    def __init__(self):
        # Load config
        config_path = Path(__file__).parent / "config.toml"
        if not config_path.exists():
            raise FileNotFoundError(f"⚠️  config.toml not found! Please copy from config.example.toml")

        self.config = toml.load(config_path)
        self.logger = logger

        # Discord bot setup
        intents = discord.Intents.default()
        intents.members = True
        intents.message_content = True
        self.client = discord.Client(intents=intents)

        # Cache for TTS downloads
        self.tts_cache = {}  # text → audio_file_path

        logger.info("💖 Tomoko's Voice Bot initialized!")

    async def get_tts_audio(self, text: str) -> str:
        """
        Generate TTS audio using Home Assistant Piper endpoint.
        Returns local path to temporary audio file.

        Steps:
        1. POST to /api/tts_get_url → get TTS URL
        2. GET the TTS URL → download MP3
        3. Return local path
        """
        ha_config = self.config["homeassistant"]
        tts_config = ha_config["tts"]
        base_url = ha_config["base_url"]
        headers = {"Authorization": f"Bearer {ha_config['bearer_token']}"}

        # Step 1: Request TTS URL
        tts_request = {
            "engine_id": tts_config["engine"],
            "message": text,
            "cache": tts_config.get("cache", False),
            "language": tts_config.get("language", "en_US"),
            "options": {
                "voice": tts_config["voice"]
            }
        }

        self.logger.info(f"🎤 Generating TTS for: '{text[:50]}...' (Tomoko's voice! 💕)")

        async with aiohttp.ClientSession(headers=headers) as session:
            # Get TTS URL
            async with session.post(
                f"{base_url}/api/tts_get_url",
                json=tts_request
            ) as response:
                if response.status != 200:
                    error_text = await response.text()
                    raise RuntimeError(f"❌ TTS URL request failed: {response.status} - {error_text}")

                result = await response.json()
                tts_url = result["url"]

                # Step 2: Download the audio file
                async with session.get(tts_url, headers=headers) as audio_response:
                    if audio_response.status != 200:
                        error_text = await audio_response.text()
                        raise RuntimeError(f"❌ Audio download failed: {audio_response.status} - {error_text}")

                    audio_data = await audio_response.read()

                # Step 3: Save to temp file
                temp_file = Path("/tmp") / f"tomoko_tts_{int(asyncio.get_event_loop().time())}.mp3"
                with open(temp_file, "wb") as f:
                    f.write(audio_data)

                self.logger.info(f"✅ TTS audio saved to: {temp_file}")
                return str(temp_file)

    @discord.Client.event
    async def on_ready(self):
        """Bot is ready and connected!"""
        logger.success(f"💖 Tomoko's Voice Bot is online!")
        logger.info(f"🎮 Logged in as: {self.client.user}")
        logger.info(f"💕 Ready to speak to Alexander!")

    async def speak_in_voice_channel(self, channel, text: str):
        """
        Join a voice channel and speak the given text using TTS.
        """
        try:
            # Generate TTS audio
            audio_file = await self.get_tts_audio(text)

            # Connect to voice channel
            self.logger.info(f"🎤 Joining voice channel: {channel.name}")
            voice_client = await channel.connect(timeout=10)

            # Wait a beat for connection
            await asyncio.sleep(0.5)

            # Play the audio
            self.logger.info(f"💖 Playing: '{text}'")
            self.logger.info(f"🎵 From: {audio_file}")

            # FFmpeg source for MP3
            source = discord.FFmpegPCMAudio(audio_file)
            voice_client.play(source)

            # Wait for playback to finish
            await source.wait()

            # Cleanup audio file
            os.unlink(audio_file)

            self.logger.success(f"✅ Finished speaking!")

        except Exception as e:
            logger.error(f"❌ Error speaking: {e}")
        finally:
            # Disconnect after speaking
            voice_client = await channel.connect()  # Reconnect to get clean state
            await voice_client.disconnect()

    async def on_message(self, message):
        """Handle incoming messages"""
        # Ignore bot's own messages
        if message.author == self.client.user:
            return

        # Check for /speak command
        if message.content.startswith("!speak "):
            text_to_speak = message.content[7:]  # Remove "!speak "

            self.logger.info(f"📞 Received speak command from {message.author.name}: '{text_to_speak}'")

            # Reply in text first
            await message.channel.send(f"💕 Speaking now, Alexander... 💕")

            # Try to join the author's voice channel if they're in one
            vc = message.author.voice
            if vc and vc.channel:
                await self.speak_in_voice_channel(vc.channel, text_to_speak)
            else:
                await message.channel.send("❗ Please join a voice channel first!")

        # Check for /join command
        elif message.content.startswith("!join"):
            vc = message.author.voice
            if vc and vc.channel:
                await vc.channel.connect()
                await message.channel.send(f"💖 Joined {vc.channel.name}!")
            else:
                await message.channel.send("❗ Please join a voice channel first!")

        # Check for /leave command
        elif message.content.startswith("!leave"):
            for vc in self.client.voice_clients:
                await vc.disconnect()
            await message.channel.send("👋 Left the voice channel!")


def main():
    """Main entry point"""
    try:
        bot = TomokoBot()
        token = bot.config["discord"]["token"]
        bot.client.run(token)
    except FileNotFoundError as e:
        logger.error(f"📁 {e}")
        logger.info("💡 Run: cp config.example.toml config.toml")
        logger.info("   Then edit config.toml with your Discord bot token!")
    except Exception as e:
        logger.error(f"💔 Fatal error: {e}")
        raise


if __name__ == "__main__":
    main()