From ca44e3e447fc1185ce188229b4e1a0f7f3bbbf66 Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sun, 27 Oct 2024 10:19:05 +0900 Subject: [PATCH] reduce VRAM usage, instead of increasing main RAM usage --- README.md | 7 +++++++ networks/svd_merge_lora.py | 11 +++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 7f8508dc0..1e7d49afe 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,13 @@ The majority of scripts is licensed under ASL 2.0 (including codes from Diffuser ## Change History +### Oct 27, 2024 / 2024-10-27: + +- `svd_merge_lora.py` VRAM usage has been reduced. However, main memory usage will increase (32GB is sufficient). +- This will be included in the next release. +- `svd_merge_lora.py` のVRAM使用量を削減しました。ただし、メインメモリの使用量は増加します(32GBあれば十分です)。 +- これは次回リリースに含まれます。 + ### Oct 26, 2024 / 2024-10-26: - Fixed a bug in `svd_merge_lora.py`, `sdxl_merge_lora.py`, and `resize_lora.py` where the hash value of LoRA metadata was not correctly calculated when the `save_precision` was different from the `precision` used in the calculation. See issue [#1722](https://github.com/kohya-ss/sd-scripts/pull/1722) for details. Thanks to JujoHotaru for raising the issue. diff --git a/networks/svd_merge_lora.py b/networks/svd_merge_lora.py index c520e7f89..c79b45acf 100644 --- a/networks/svd_merge_lora.py +++ b/networks/svd_merge_lora.py @@ -301,10 +301,10 @@ def merge_lora_models(models, ratios, lbws, new_rank, new_conv_rank, device, mer # make original weight if not exist if lora_module_name not in merged_sd: weight = torch.zeros((out_dim, in_dim, *kernel_size) if conv2d else (out_dim, in_dim), dtype=merge_dtype) - if device: - weight = weight.to(device) else: weight = merged_sd[lora_module_name] + if device: + weight = weight.to(device) # merge to weight if device: @@ -336,13 +336,16 @@ def merge_lora_models(models, ratios, lbws, new_rank, new_conv_rank, device, mer conved = torch.nn.functional.conv2d(down_weight.permute(1, 0, 2, 3), up_weight).permute(1, 0, 2, 3) weight = weight + ratio * conved * scale - merged_sd[lora_module_name] = weight + merged_sd[lora_module_name] = weight.to("cpu") # extract from merged weights logger.info("extract new lora...") merged_lora_sd = {} with torch.no_grad(): for lora_module_name, mat in tqdm(list(merged_sd.items())): + if device: + mat = mat.to(device) + conv2d = len(mat.size()) == 4 kernel_size = None if not conv2d else mat.size()[2:4] conv2d_3x3 = conv2d and kernel_size != (1, 1) @@ -381,7 +384,7 @@ def merge_lora_models(models, ratios, lbws, new_rank, new_conv_rank, device, mer merged_lora_sd[lora_module_name + ".lora_up.weight"] = up_weight.to("cpu").contiguous() merged_lora_sd[lora_module_name + ".lora_down.weight"] = down_weight.to("cpu").contiguous() - merged_lora_sd[lora_module_name + ".alpha"] = torch.tensor(module_new_rank) + merged_lora_sd[lora_module_name + ".alpha"] = torch.tensor(module_new_rank, device="cpu") # build minimum metadata dims = f"{new_rank}"