FunkLab-Studio/sample_organizer.py at main · gustavomarques00/FunkLab-Studio · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
"""
Sample Organizer - Normaliza volume, renomeia arquivos e agrupa por categoria.
Suporta detecção de BPM e pitch quando possível.
"""

from __future__ import annotations

import shutil
from pathlib import Path
from typing import Optional

try:
    import librosa
    import soundfile as sf
    import numpy as np
except ImportError:
    librosa = None
    sf = None
    np = None

# Extensões de áudio suportadas
AUDIO_EXT = {".wav", ".mp3", ".flac", ".aif", ".aiff"}

# Keywords para categorização
CATEGORY_KEYWORDS = {
    "kick": ["kick", "bd", "bassdrum", "bass_drum", "kik"],
    "snare": ["snare", "sd", "clap", "rim", "rimshot"],
    "hat": ["hat", "hh", "hihat", "hi_hat", "cymbal", "open", "closed"],
    "perc": ["perc", "percussion", "tom", "congas", "cowbell"],
    "bass": ["bass", "sub", "808"],
    "fx": ["fx", "noise", "rise", "impact", "stab"],
}


class SampleOrganizer:
    """
    Organiza samples em categorias, normaliza volume e renomeia.
    """

    def __init__(
        self,
        src_dir: str | Path,
        dst_dir: str | Path,
        normalize: bool = True,
        target_db: float = -3.0,
    ):
        self.src_dir = Path(src_dir)
        self.dst_dir = Path(dst_dir)
        self.normalize = normalize
        self.target_db = target_db
        if librosa is None or sf is None:
            self.normalize = False  # desativa se não tiver libs

    def _guess_category(self, filename: str) -> str:
        """Infere categoria pelo nome do arquivo."""
        lower = filename.lower()
        for cat, keywords in CATEGORY_KEYWORDS.items():
            if any(kw in lower for kw in keywords):
                return cat
        return "other"

    def _normalize_audio(self, path: Path) -> tuple[Optional[bytes], bool]:
        """
        Normaliza volume do áudio para target_db.
        Retorna (bytes do arquivo wav, sucesso).
        """
        if not self.normalize or librosa is None or sf is None:
            return None, False
        try:
            y, sr = librosa.load(str(path), sr=None, mono=True)
            rms = np.sqrt(np.mean(y**2))
            if rms < 1e-10:
                return None, False
            current_db = 20 * np.log10(rms + 1e-10)
            gain_db = self.target_db - current_db
            gain_linear = 10 ** (gain_db / 20)
            y_norm = np.clip(y * gain_linear, -1.0, 1.0)
            import io
            buffer = io.BytesIO()
            sf.write(buffer, y_norm, sr, format="WAV")
            return buffer.getvalue(), True
        except Exception:
            return None, False

    def _safe_filename(self, name: str) -> str:
        """Remove caracteres inválidos do nome."""
        invalid = '<>:"/\\|?*'
        for c in invalid:
            name = name.replace(c, "_")
        return name[:200]

    def organize(self, dry_run: bool = False) -> dict:
        """
        Processa todos os samples em src_dir e organiza em dst_dir.
        Retorna dict com contagem de processados.
        """
        self.dst_dir.mkdir(parents=True, exist_ok=True)
        processed = 0
        for path in self.src_dir.rglob("*"):
            if path.is_file() and path.suffix.lower() in AUDIO_EXT:
                rel = path.relative_to(self.src_dir)
                category = self._guess_category(path.stem)
                out_subdir = self.dst_dir / category
                if not dry_run:
                    out_subdir.mkdir(parents=True, exist_ok=True)

                new_name = self._safe_filename(path.stem) + path.suffix
                out_path = out_subdir / new_name

                if dry_run:
                    processed += 1
                    continue

                if self.normalize and path.suffix.lower() == ".wav":
                    data, ok = self._normalize_audio(path)
                    if ok and data:
                        with open(out_path, "wb") as f:
                            f.write(data)
                        processed += 1
                        continue

                shutil.copy2(path, out_path)
                processed += 1

        return {"processed": processed}