Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Here are BitsFusion Script Quantisation without finetuned #5

Open
s9anus98a opened this issue Jun 18, 2024 · 2 comments
Open

Here are BitsFusion Script Quantisation without finetuned #5

s9anus98a opened this issue Jun 18, 2024 · 2 comments

Comments

@s9anus98a
Copy link

import torch
import copy
import numpy as np
from transformers import CLIPProcessor, CLIPModel
from diffusers import StableDiffusionPipeline
from scipy.cluster.vq import vq, kmeans2

# Konfigurasi
model_path = "path/to/your/sd-v1-5-fp16.safetensors"
output_path = "path/to/your/quantized_model.safetensors"
bits = [1, 2, 3, 4]  # Bit-width yang akan diuji
sensitivity_threshold = 0.05
size_factor = 0.5
clip_thresholds = [0.9, 0.95, 0.98]
time_steps = 50

# Fungsi untuk menghitung Mean Squared Error (MSE)
def calculate_mse(image1, image2):
  """Menghitung Mean Squared Error (MSE) antara dua gambar."""
  return ((image1 - image2) ** 2).mean()

# Fungsi untuk menghitung CLIP score
def calculate_clip_score(images, texts, clip_processor, clip_model):
  """Menghitung CLIP score untuk gambar yang dihasilkan dan teks prompt."""
  inputs = clip_processor(text=texts, images=images, return_tensors="pt", padding=True)
  outputs = clip_model(**inputs)
  logits_per_image = outputs.logits_per_image
  return logits_per_image.diag().mean().item()

# Fungsi untuk menganalisis sensitivitas layer
def analyze_layer_sensitivity(model, prompts, bits, sample_size=100):
  """Menganalisis sensitivitas layer terhadap kuantisasi."""
  clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
  clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
  results = {}
  for name, param in model.named_parameters():
    if "weight" in name:
      results[name] = {}
      for b in bits:
        quantized_model = copy.deepcopy(model)
        quantize_layer(quantized_model, name, b)
        # Generate gambar dengan model yang telah dikuantisasi (implementasi diperlukan)
        generated_images = generate_images(quantized_model, prompts, sample_size)
        # Hitung metrik
        results[name][b] = {
            "mse": calculate_mse(generated_images, generate_images(model, prompts, sample_size)),
            "clip_score": calculate_clip_score(generated_images, prompts, clip_processor, clip_model),
            "parameters": param.numel()
        }
  return results

# Fungsi untuk mengkuantisasi layer dengan Lloyd-Max dari SciPy
def quantize_layer(model, layer_name, bits):
  """Menerapkan kuantisasi pada layer tertentu menggunakan algoritma Lloyd-Max dari SciPy.

  Args:
    model: Model PyTorch yang mengandung layer yang akan di kuantisasikan.
    layer_name: Nama layer yang akan di kuantisasikan (string).
    bits: Jumlah bit yang digunakan untuk kuantisasi (integer).
  """

  # Dapatkan layer berdasarkan namanya
  layer = dict(model.named_modules())[layer_name]

  # Pastikan layer memiliki bobot
  if not hasattr(layer, 'weight'):
    print(f"Layer {layer_name} tidak memiliki bobot.")
    return

  # Dapatkan tensor bobot
  w = layer.weight.data

  # Lewati kuantisasi jika bobot sudah dalam tipe data integer
  if w.dtype not in [torch.float, torch.float16, torch.float32, torch.float64]:
    print(f"Layer {layer_name} sudah memiliki bobot integer.")
    return

  # Konversi tensor ke CPU untuk kuantisasi Lloyd-Max
  w = w.cpu().numpy()

  # Lakukan kuantisasi Lloyd-Max menggunakan kmeans2 dari SciPy
  centroids, labels = kmeans2(w.reshape(-1, 1), 2**bits, minit='points')
  quantized_w = centroids[labels].reshape(w.shape)

  # Konversi kembali ke tensor PyTorch dan simpan ke layer
  layer.weight.data = torch.from_numpy(quantized_w).to(layer.weight.device)

  # Hitung dan simpan faktor skala dan titik nol
  min_val = w.min()
  max_val = w.max()
  scale = (max_val - min_val) / (2**bits - 1)
  zero_point = torch.round(-min_val / scale).to(layer.weight.device)
  layer.quantization_scale = scale.to(layer.weight.device)
  layer.quantization_zero_point = zero_point

  # Ganti fungsi forward untuk melakukan dekuantisasi saat inferensi
  layer._forward_impl = layer.forward
  def quantized_forward(*args, **kwargs):
    # Dekuantized bobot sebelum operasi forward
    dequantized_w = (layer.weight.data - layer.quantization_zero_point) * layer.quantization_scale
    return layer._forward_impl(dequantized_w, *args[1:], **kwargs)
  layer.forward = quantized_forward

# Fungsi untuk menghasilkan gambar
def generate_images(model, prompts, sample_size):
  """Menghasilkan gambar menggunakan model Stable Diffusion."""
  # Implementasikan proses generasi gambar menggunakan pipeline diffusers
  # Gunakan `model` dan `prompts` sebagai input
  # ...

# Fungsi untuk menentukan strategi mixed-precision
def determine_mixed_precision(results, sensitivity_threshold, size_factor, clip_thresholds):
  """Menentukan bit-width optimal untuk setiap layer."""
  mixed_precision = {}
  for name, layer_results in results.items():
    sensitivity_scores = {
        b: layer_results[b]["mse"] / (layer_results[b]["parameters"] ** size_factor)
        for b in bits
    }
    optimal_bits = min(bits, key=lambda b: sensitivity_scores[b])
    if sensitivity_scores[optimal_bits] > sensitivity_threshold:
      optimal_bits = max(bits)  # Gunakan bit-width maksimum jika melebihi ambang batas
    clip_score_drop = layer_results[max(bits)]["clip_score"] - results[name][32]["clip_score"]
    for i, threshold in enumerate(clip_thresholds):
      if clip_score_drop > np.quantile(
          [results[n][32]["clip_score"] for n in results], threshold
      ):
        optimal_bits += i + 1
        break
    mixed_precision[name] = optimal_bits
  return mixed_precision

# Fungsi untuk mengkuantisasi model
def quantize_model(model, mixed_precision):
  """Menerapkan kuantisasi pada model berdasarkan strategi mixed-precision."""
  for name, param in model.named_parameters():
    if "weight" in name and name in mixed_precision:
      quantize_layer(model, name, mixed_precision[name])
  return model

# Fungsi untuk pre-komputasi dan caching time embedding
def precompute_time_embeddings(model, time_steps):
  """Menghitung dan menyimpan time embedding."""
  time_embeddings = {}
  for t in range(time_steps):
    time_embeddings[t] = model.time_embedding(torch.tensor([t]))
  return time_embeddings

# Memuat model Stable Diffusion
pipe = StableDiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float16)
model = pipe.unet

# Analisis sensitivitas layer (implementasikan generate_images terlebih dahulu)
prompts = ["A photo of a cat"]  # Ganti dengan prompt yang Anda inginkan
results = analyze_layer_sensitivity(model, prompts, bits)

# Menentukan strategi mixed-precision
mixed_precision = determine_mixed_precision(
    results, sensitivity_threshold, size_factor, clip_thresholds
)

# Menerapkan kuantisasi pada model
quantized_model = quantize_model(model, mixed_precision)

# Pre-komputasi dan caching time embedding
time_embeddings = precompute_time_embeddings(quantized_model, time_steps)

# Menyimpan model yang telah dikuantisasi
torch.save(quantized_model.state_dict(), output_path)
@Shinyzenith
Copy link

Hi, did you implement this?

@charlesrwest
Copy link

charlesrwest commented Sep 12, 2024

Here's the same code with the comments translated. attached
commentsTranslated.txt
)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants