Spaces:
Configuration error
Configuration error
| defaults: | |
| - _self_ | |
| - dset: musdb44 | |
| - svd: default | |
| - variant: default | |
| - override hydra/hydra_logging: colorlog | |
| - override hydra/job_logging: colorlog | |
| dummy: | |
| dset: | |
| musdb: /checkpoint/defossez/datasets/musdbhq | |
| musdb_samplerate: 44100 | |
| use_musdb: true # set to false to not use musdb as training data. | |
| wav: # path to custom wav dataset | |
| wav2: # second custom wav dataset | |
| segment: 11 | |
| shift: 1 | |
| train_valid: false | |
| full_cv: true | |
| samplerate: 44100 | |
| channels: 2 | |
| normalize: true | |
| metadata: ./metadata | |
| sources: ['drums', 'bass', 'other', 'vocals'] | |
| valid_samples: # valid dataset size | |
| backend: null # if provided select torchaudio backend. | |
| test: | |
| save: False | |
| best: True | |
| workers: 2 | |
| every: 20 | |
| split: true | |
| shifts: 1 | |
| overlap: 0.25 | |
| sdr: true | |
| metric: 'loss' # metric used for best model selection on the valid set, can also be nsdr | |
| nonhq: # path to non hq MusDB for evaluation | |
| epochs: 360 | |
| batch_size: 64 | |
| max_batches: # limit the number of batches per epoch, useful for debugging | |
| # or if your dataset is gigantic. | |
| optim: | |
| lr: 3e-4 | |
| momentum: 0.9 | |
| beta2: 0.999 | |
| loss: l1 # l1 or mse | |
| optim: adam | |
| weight_decay: 0 | |
| clip_grad: 0 | |
| seed: 42 | |
| debug: false | |
| valid_apply: true | |
| flag: | |
| save_every: | |
| weights: [1., 1., 1., 1.] # weights over each source for the training/valid loss. | |
| augment: | |
| shift_same: false | |
| repitch: | |
| proba: 0.2 | |
| max_tempo: 12 | |
| remix: | |
| proba: 1 | |
| group_size: 4 | |
| scale: | |
| proba: 1 | |
| min: 0.25 | |
| max: 1.25 | |
| flip: true | |
| continue_from: # continue from other XP, give the XP Dora signature. | |
| continue_pretrained: # signature of a pretrained XP, this cannot be a bag of models. | |
| pretrained_repo: # repo for pretrained model (default is official AWS) | |
| continue_best: true | |
| continue_opt: false | |
| misc: | |
| num_workers: 10 | |
| num_prints: 4 | |
| show: false | |
| verbose: false | |
| # List of decay for EMA at batch or epoch level, e.g. 0.999. | |
| # Batch level EMA are kept on GPU for speed. | |
| ema: | |
| epoch: [] | |
| batch: [] | |
| use_train_segment: true # to remove | |
| model_segment: # override the segment parameter for the model, usually 4 times the training segment. | |
| model: demucs # see demucs/train.py for the possibilities, and config for each model hereafter. | |
| demucs: # see demucs/demucs.py for a detailed description | |
| # Channels | |
| channels: 64 | |
| growth: 2 | |
| # Main structure | |
| depth: 6 | |
| rewrite: true | |
| lstm_layers: 0 | |
| # Convolutions | |
| kernel_size: 8 | |
| stride: 4 | |
| context: 1 | |
| # Activations | |
| gelu: true | |
| glu: true | |
| # Normalization | |
| norm_groups: 4 | |
| norm_starts: 4 | |
| # DConv residual branch | |
| dconv_depth: 2 | |
| dconv_mode: 1 # 1 = branch in encoder, 2 = in decoder, 3 = in both. | |
| dconv_comp: 4 | |
| dconv_attn: 4 | |
| dconv_lstm: 4 | |
| dconv_init: 1e-4 | |
| # Pre/post treatment | |
| resample: true | |
| normalize: false | |
| # Weight init | |
| rescale: 0.1 | |
| hdemucs: # see demucs/hdemucs.py for a detailed description | |
| # Channels | |
| channels: 48 | |
| channels_time: | |
| growth: 2 | |
| # STFT | |
| nfft: 4096 | |
| wiener_iters: 0 | |
| end_iters: 0 | |
| wiener_residual: false | |
| cac: true | |
| # Main structure | |
| depth: 6 | |
| rewrite: true | |
| hybrid: true | |
| hybrid_old: false | |
| # Frequency Branch | |
| multi_freqs: [] | |
| multi_freqs_depth: 3 | |
| freq_emb: 0.2 | |
| emb_scale: 10 | |
| emb_smooth: true | |
| # Convolutions | |
| kernel_size: 8 | |
| stride: 4 | |
| time_stride: 2 | |
| context: 1 | |
| context_enc: 0 | |
| # normalization | |
| norm_starts: 4 | |
| norm_groups: 4 | |
| # DConv residual branch | |
| dconv_mode: 1 | |
| dconv_depth: 2 | |
| dconv_comp: 4 | |
| dconv_attn: 4 | |
| dconv_lstm: 4 | |
| dconv_init: 1e-3 | |
| # Weight init | |
| rescale: 0.1 | |
| # Torchaudio implementation of HDemucs | |
| torch_hdemucs: | |
| # Channels | |
| channels: 48 | |
| growth: 2 | |
| # STFT | |
| nfft: 4096 | |
| # Main structure | |
| depth: 6 | |
| freq_emb: 0.2 | |
| emb_scale: 10 | |
| emb_smooth: true | |
| # Convolutions | |
| kernel_size: 8 | |
| stride: 4 | |
| time_stride: 2 | |
| context: 1 | |
| context_enc: 0 | |
| # normalization | |
| norm_starts: 4 | |
| norm_groups: 4 | |
| # DConv residual branch | |
| dconv_depth: 2 | |
| dconv_comp: 4 | |
| dconv_attn: 4 | |
| dconv_lstm: 4 | |
| dconv_init: 1e-3 | |
| htdemucs: # see demucs/htdemucs.py for a detailed description | |
| # Channels | |
| channels: 48 | |
| channels_time: | |
| growth: 2 | |
| # STFT | |
| nfft: 4096 | |
| wiener_iters: 0 | |
| end_iters: 0 | |
| wiener_residual: false | |
| cac: true | |
| # Main structure | |
| depth: 4 | |
| rewrite: true | |
| # Frequency Branch | |
| multi_freqs: [] | |
| multi_freqs_depth: 3 | |
| freq_emb: 0.2 | |
| emb_scale: 10 | |
| emb_smooth: true | |
| # Convolutions | |
| kernel_size: 8 | |
| stride: 4 | |
| time_stride: 2 | |
| context: 1 | |
| context_enc: 0 | |
| # normalization | |
| norm_starts: 4 | |
| norm_groups: 4 | |
| # DConv residual branch | |
| dconv_mode: 1 | |
| dconv_depth: 2 | |
| dconv_comp: 8 | |
| dconv_init: 1e-3 | |
| # Before the Transformer | |
| bottom_channels: 0 | |
| # CrossTransformer | |
| # ------ Common to all | |
| # Regular parameters | |
| t_layers: 5 | |
| t_hidden_scale: 4.0 | |
| t_heads: 8 | |
| t_dropout: 0.0 | |
| t_layer_scale: True | |
| t_gelu: True | |
| # ------------- Positional Embedding | |
| t_emb: sin | |
| t_max_positions: 10000 # for the scaled embedding | |
| t_max_period: 10000.0 | |
| t_weight_pos_embed: 1.0 | |
| t_cape_mean_normalize: True | |
| t_cape_augment: True | |
| t_cape_glob_loc_scale: [5000.0, 1.0, 1.4] | |
| t_sin_random_shift: 0 | |
| # ------------- norm before a transformer encoder | |
| t_norm_in: True | |
| t_norm_in_group: False | |
| # ------------- norm inside the encoder | |
| t_group_norm: False | |
| t_norm_first: True | |
| t_norm_out: True | |
| # ------------- optim | |
| t_weight_decay: 0.0 | |
| t_lr: | |
| # ------------- sparsity | |
| t_sparse_self_attn: False | |
| t_sparse_cross_attn: False | |
| t_mask_type: diag | |
| t_mask_random_seed: 42 | |
| t_sparse_attn_window: 400 | |
| t_global_window: 100 | |
| t_sparsity: 0.95 | |
| t_auto_sparsity: False | |
| # Cross Encoder First (False) | |
| t_cross_first: False | |
| # Weight init | |
| rescale: 0.1 | |
| svd: # see svd.py for documentation | |
| penalty: 0 | |
| min_size: 0.1 | |
| dim: 1 | |
| niters: 2 | |
| powm: false | |
| proba: 1 | |
| conv_only: false | |
| convtr: false | |
| bs: 1 | |
| quant: # quantization hyper params | |
| diffq: # diffq penalty, typically 1e-4 or 3e-4 | |
| qat: # use QAT with a fixed number of bits (not as good as diffq) | |
| min_size: 0.2 | |
| group_size: 8 | |
| dora: | |
| dir: outputs | |
| exclude: ["misc.*", "slurm.*", 'test.reval', 'flag', 'dset.backend'] | |
| slurm: | |
| time: 4320 | |
| constraint: volta32gb | |
| setup: ['module load cudnn/v8.4.1.50-cuda.11.6 NCCL/2.11.4-6-cuda.11.6 cuda/11.6'] | |
| # Hydra config | |
| hydra: | |
| job_logging: | |
| formatters: | |
| colorlog: | |
| datefmt: "%m-%d %H:%M:%S" | |