Spaces:

PreciousMposa
/

audio

Configuration error

App Files Files Community

audio / conf /config.yaml

PreciousMposa

Upload 107 files

519d358 verified 8 months ago

raw

history blame contribute delete

6.63 kB

	defaults:
	- _self_
	- dset: musdb44
	- svd: default
	- variant: default
	- override hydra/hydra_logging: colorlog
	- override hydra/job_logging: colorlog

	dummy:
	dset:
	musdb: /checkpoint/defossez/datasets/musdbhq
	musdb_samplerate: 44100
	use_musdb: true # set to false to not use musdb as training data.
	wav: # path to custom wav dataset
	wav2: # second custom wav dataset
	segment: 11
	shift: 1
	train_valid: false
	full_cv: true
	samplerate: 44100
	channels: 2
	normalize: true
	metadata: ./metadata
	sources: ['drums', 'bass', 'other', 'vocals']
	valid_samples: # valid dataset size
	backend: null # if provided select torchaudio backend.

	test:
	save: False
	best: True
	workers: 2
	every: 20
	split: true
	shifts: 1
	overlap: 0.25
	sdr: true
	metric: 'loss' # metric used for best model selection on the valid set, can also be nsdr
	nonhq: # path to non hq MusDB for evaluation

	epochs: 360
	batch_size: 64
	max_batches: # limit the number of batches per epoch, useful for debugging
	# or if your dataset is gigantic.
	optim:
	lr: 3e-4
	momentum: 0.9
	beta2: 0.999
	loss: l1 # l1 or mse
	optim: adam
	weight_decay: 0
	clip_grad: 0

	seed: 42
	debug: false
	valid_apply: true
	flag:
	save_every:
	weights: [1., 1., 1., 1.] # weights over each source for the training/valid loss.

	augment:
	shift_same: false
	repitch:
	proba: 0.2
	max_tempo: 12
	remix:
	proba: 1
	group_size: 4
	scale:
	proba: 1
	min: 0.25
	max: 1.25
	flip: true

	continue_from: # continue from other XP, give the XP Dora signature.
	continue_pretrained: # signature of a pretrained XP, this cannot be a bag of models.
	pretrained_repo: # repo for pretrained model (default is official AWS)
	continue_best: true
	continue_opt: false

	misc:
	num_workers: 10
	num_prints: 4
	show: false
	verbose: false

	# List of decay for EMA at batch or epoch level, e.g. 0.999.
	# Batch level EMA are kept on GPU for speed.
	ema:
	epoch: []
	batch: []

	use_train_segment: true # to remove
	model_segment: # override the segment parameter for the model, usually 4 times the training segment.
	model: demucs # see demucs/train.py for the possibilities, and config for each model hereafter.
	demucs: # see demucs/demucs.py for a detailed description
	# Channels
	channels: 64
	growth: 2
	# Main structure
	depth: 6
	rewrite: true
	lstm_layers: 0
	# Convolutions
	kernel_size: 8
	stride: 4
	context: 1
	# Activations
	gelu: true
	glu: true
	# Normalization
	norm_groups: 4
	norm_starts: 4
	# DConv residual branch
	dconv_depth: 2
	dconv_mode: 1 # 1 = branch in encoder, 2 = in decoder, 3 = in both.
	dconv_comp: 4
	dconv_attn: 4
	dconv_lstm: 4
	dconv_init: 1e-4
	# Pre/post treatment
	resample: true
	normalize: false
	# Weight init
	rescale: 0.1

	hdemucs: # see demucs/hdemucs.py for a detailed description
	# Channels
	channels: 48
	channels_time:
	growth: 2
	# STFT
	nfft: 4096
	wiener_iters: 0
	end_iters: 0
	wiener_residual: false
	cac: true
	# Main structure
	depth: 6
	rewrite: true
	hybrid: true
	hybrid_old: false
	# Frequency Branch
	multi_freqs: []
	multi_freqs_depth: 3
	freq_emb: 0.2
	emb_scale: 10
	emb_smooth: true
	# Convolutions
	kernel_size: 8
	stride: 4
	time_stride: 2
	context: 1
	context_enc: 0
	# normalization
	norm_starts: 4
	norm_groups: 4
	# DConv residual branch
	dconv_mode: 1
	dconv_depth: 2
	dconv_comp: 4
	dconv_attn: 4
	dconv_lstm: 4
	dconv_init: 1e-3
	# Weight init
	rescale: 0.1

	# Torchaudio implementation of HDemucs
	torch_hdemucs:
	# Channels
	channels: 48
	growth: 2
	# STFT
	nfft: 4096
	# Main structure
	depth: 6
	freq_emb: 0.2
	emb_scale: 10
	emb_smooth: true
	# Convolutions
	kernel_size: 8
	stride: 4
	time_stride: 2
	context: 1
	context_enc: 0
	# normalization
	norm_starts: 4
	norm_groups: 4
	# DConv residual branch
	dconv_depth: 2
	dconv_comp: 4
	dconv_attn: 4
	dconv_lstm: 4
	dconv_init: 1e-3

	htdemucs: # see demucs/htdemucs.py for a detailed description
	# Channels
	channels: 48
	channels_time:
	growth: 2
	# STFT
	nfft: 4096
	wiener_iters: 0
	end_iters: 0
	wiener_residual: false
	cac: true
	# Main structure
	depth: 4
	rewrite: true
	# Frequency Branch
	multi_freqs: []
	multi_freqs_depth: 3
	freq_emb: 0.2
	emb_scale: 10
	emb_smooth: true
	# Convolutions
	kernel_size: 8
	stride: 4
	time_stride: 2
	context: 1
	context_enc: 0
	# normalization
	norm_starts: 4
	norm_groups: 4
	# DConv residual branch
	dconv_mode: 1
	dconv_depth: 2
	dconv_comp: 8
	dconv_init: 1e-3
	# Before the Transformer
	bottom_channels: 0
	# CrossTransformer
	# ------ Common to all
	# Regular parameters
	t_layers: 5
	t_hidden_scale: 4.0
	t_heads: 8
	t_dropout: 0.0
	t_layer_scale: True
	t_gelu: True
	# ------------- Positional Embedding
	t_emb: sin
	t_max_positions: 10000 # for the scaled embedding
	t_max_period: 10000.0
	t_weight_pos_embed: 1.0
	t_cape_mean_normalize: True
	t_cape_augment: True
	t_cape_glob_loc_scale: [5000.0, 1.0, 1.4]
	t_sin_random_shift: 0
	# ------------- norm before a transformer encoder
	t_norm_in: True
	t_norm_in_group: False
	# ------------- norm inside the encoder
	t_group_norm: False
	t_norm_first: True
	t_norm_out: True
	# ------------- optim
	t_weight_decay: 0.0
	t_lr:
	# ------------- sparsity
	t_sparse_self_attn: False
	t_sparse_cross_attn: False
	t_mask_type: diag
	t_mask_random_seed: 42
	t_sparse_attn_window: 400
	t_global_window: 100
	t_sparsity: 0.95
	t_auto_sparsity: False
	# Cross Encoder First (False)
	t_cross_first: False
	# Weight init
	rescale: 0.1

	svd: # see svd.py for documentation
	penalty: 0
	min_size: 0.1
	dim: 1
	niters: 2
	powm: false
	proba: 1
	conv_only: false
	convtr: false
	bs: 1

	quant: # quantization hyper params
	diffq: # diffq penalty, typically 1e-4 or 3e-4
	qat: # use QAT with a fixed number of bits (not as good as diffq)
	min_size: 0.2
	group_size: 8

	dora:
	dir: outputs
	exclude: ["misc.", "slurm.", 'test.reval', 'flag', 'dset.backend']

	slurm:
	time: 4320
	constraint: volta32gb
	setup: ['module load cudnn/v8.4.1.50-cuda.11.6 NCCL/2.11.4-6-cuda.11.6 cuda/11.6']

	# Hydra config
	hydra:
	job_logging:
	formatters:
	colorlog:
	datefmt: "%m-%d %H:%M:%S"