configs/config.yaml

model:
  type: unet
  pretrained_model_path: cerspense/zeroscope_v2_576w
  motion_embeddings:
    combinations:
    - - down
      - 1280
    # - - down
    #   - 640
    # - - up
    #   - 640
    - - up
      - 1280
  # unet can be either 'videoCrafter2' or 'zeroscope_v2_576w', the former produces better video quality
  unet: videoCrafter2
      
train:
  output_dir: ./results
  validation_steps: 2000
  checkpointing_steps: 50
  checkpointing_start: 200
  train_batch_size: 1
  max_train_steps: 400
  gradient_accumulation_steps: 1
  cache_latents: true
  cached_latent_dir: null
  logger_type: tensorboard
  mixed_precision: fp16
  use_8bit_adam: false
  resume_from_checkpoint: null
  resume_step: null

dataset:
  type:
  - single_video
  single_video_path: ./assets/car-roundabout-24.mp4
  single_video_prompt: 'A car turnaround in a city street'
  width: 576
  height: 320
  n_sample_frames: 24
  fps: 8
  sample_start_idx: 1
  frame_step: 1
  use_bucketing: false
  use_caption: false

loss:
  type: BaseLoss
  learning_rate: 0.02
  lr_scheduler: constant
  lr_warmup_steps: 0

# loss:
#   type: DebiasedHybridLoss
#   learning_rate: 0.02
#   learning_rate_spatial: 5e-4
#   lr_scheduler: constant
#   lr_warmup_steps: 0
#   spatial_lora_num: -1
#   use_unet_lora: true
 
noise_init:
  type: BlendInit
  noise_prior: 0.5

val:
  prompt:
    - "A skateboard slides along a city lane"
  negative_prompt: ""
  sample_preview: true
  width: 576
  height: 320
  num_frames: 24
  num_inference_steps: 30
  guidance_scale: 12.0
  seeds: [0]

strategy:
  # during training, the debiasing operaion will not be applied
  vSpatial_frameSubtraction: false
  removeMFromV: false