PaddlePaddle · ZHUI · Dec 25, 2024 · Dec 23, 2024 · Dec 23, 2024 · Dec 23, 2024
diff --git a/llm/config/qwen/emb_argument.json b/llm/config/qwen/emb_argument.json
@@ -32,5 +32,6 @@
   "unified_checkpoint": true,
   "use_flash_attention": true,
   "amp_custom_black_list": "elementwise_div",
-  "release_grads": true
-}
+  "release_grads": true,
+  "loss_type": "contrastive"
+}
diff --git a/llm/utils/argument.py b/llm/utils/argument.py
@@ -88,3 +88,11 @@ class EmbeddingArgument:
         default=None,
         metadata={"help": "The dims for matryoshka training."},
     )
+    loss_type: str = field(
+        default="contrastive",
+        metadata={"help": "The type of loss computation."},
+    )
+    inf_cl_head_dim: int = field(
+        default=64,
+        metadata={"help": "The size of the head dimension when gpu ops are set as 'inf_cl'."},
+    )
diff --git a/paddlenlp/transformers/contrastive_loss.py b/paddlenlp/transformers/contrastive_loss.py
@@ -63,3 +63,90 @@
         else:
             loss = self.loss_fn(q_reps, p_reps)
         return loss
+
+
+class SimpleInfclLoss(nn.Layer):
+    def __init__(self, inf_cl_head_dim=64):
+        """
+        Initializes the Simple Inf_cl Loss class.
+
+        Args:
+            inf_cl_head_dim (int, optional): Dimension of the projection head. Default is 64.
+        """
+        super().__init__()
+        self.head_dim = inf_cl_head_dim
+
+    def forward(self, q_reps, p_reps):
+        """
+        Computes the instance discrimination loss.
+
+        Args:
+            q_reps (Tensor): Query representations.
+            p_reps (Tensor): key representations.
+
+        Returns:
+            Tensor: The computed loss.
+        """
+        try:
+            from paddlenlp_kernel.triton.inf_cl import cal_inf_loss
+        except ImportError:
+            raise ImportError(
+                "Paddlenlp_kernels are not available, which means the inf_cl loss cannot be used. If you wish to use the inf_cl loss, please follow the instructions in the README.md on the `ops`."
+            )
+        group_size = p_reps.shape[0] // q_reps.shape[0]  # Number of keys per query
+        labels = paddle.arange(q_reps.shape[0], dtype="int64")  # Generate labels for queries
+        labels = labels * group_size  # Adjust labels based on group size
+        loss = cal_inf_loss(q_reps, p_reps, labels=labels, scale=None, head_dim=self.head_dim)
+        return loss
+
+
+class MatryoshkaInfclLoss(nn.Layer):
+    def __init__(self, embedding_matryoshka_dims: Optional[List[int]] = None, inf_cl_head_dim=64):
+        """
+        Initializes the Matryoshka Inf_cl Loss class.
+
+        Args:
+            embedding_matryoshka_dims (List[int], optional): List of dimensions for Matryoshka embeddings.
+                If None, no Matryoshka embedding is used. Default is None.
+            inf_cl_head_dim (int, optional): Dimension of the projection head. Default is 64.
+        """
+        super().__init__()
+        if embedding_matryoshka_dims is None:
+            self.embedding_matryoshka_dims = []
+        else:
+            self.embedding_matryoshka_dims = embedding_matryoshka_dims
+        self.loss_fn = SimpleInfclLoss(inf_cl_head_dim)
+
+    def forward(self, q_reps, p_reps):
+        """
+        Computes the Matryoshka instance discrimination loss.
+
+        Args:
+            q_reps (Tensor): Query representations.
+            p_reps (Tensor): key representations.
+
+        Returns:
+            Tensor: The computed loss.
+        """
+        if len(self.embedding_matryoshka_dims) > 0:
+            loss = 0.0
+            for dim in self.embedding_matryoshka_dims:
+                reduced_q_reps = q_reps[:, :dim]  # Reduce query representations to the current Matryoshka dimension
+                reduced_q_reps = nn.functional.normalize(
+                    reduced_q_reps, axis=-1
+                )  # Normalize the reduced query representations along the last axis
+
+                reduced_p_reps = p_reps[:, :dim]  # Reduce key representations to the current Matryoshka dimension
+                reduced_p_reps = nn.functional.normalize(
+                    reduced_p_reps, axis=-1
+                )  # Normalize the reduced key representations along the last axis
+
+                dim_loss = self.loss_fn(
+                    reduced_q_reps, reduced_p_reps
+                )  # Compute the loss for the current Matryoshka dimension using the internal loss function
+                loss += dim_loss
+        else:
+            loss = self.loss_fn(
+                q_reps, p_reps
+            )  # If no Matryoshka dimensions are specified, compute the loss using the full representations
+        return loss
diff --git a/paddlenlp/trl/embedding_trainer.py b/paddlenlp/trl/embedding_trainer.py
@@ -21,7 +21,9 @@
 from paddlenlp.trainer import Trainer
 from paddlenlp.transformers.contrastive_loss import (
     MatryoshkaContrastiveLoss,
+    MatryoshkaInfclLoss,
     SimpleContrastiveLoss,
+    SimpleInfclLoss,
 )
 from paddlenlp.transformers.embedding_utils import dist_gather_tensor_with_gradient
 
@@ -44,11 +46,19 @@
         self.accum_rng_states["hybrid"] = []
 
         if model_args.embedding_matryoshka_dims is not None and len(model_args.embedding_matryoshka_dims) > 0:
-            self.loss_fn = MatryoshkaContrastiveLoss(
-                model_args.embedding_temperature, model_args.embedding_matryoshka_dims
-            )
+            if model_args.loss_type == "inf_cl":
+                self.embedding_negatives_cross_device = False
+                self.loss_fn = MatryoshkaInfclLoss(model_args.embedding_matryoshka_dims, model_args.inf_cl_head_dim)
+            elif model_args.loss_type == "contrastive":
+                self.loss_fn = MatryoshkaContrastiveLoss(
+                    model_args.embedding_temperature, model_args.embedding_matryoshka_dims
+                )
         else:
-            self.loss_fn = SimpleContrastiveLoss(model_args.embedding_temperature)
+            if model_args.loss_type == "inf_cl":
+                self.embedding_negatives_cross_device = False
+                self.loss_fn = SimpleInfclLoss(model_args.inf_cl_head_dim)
+            elif model_args.loss_type == "contrastive":
+                self.loss_fn = SimpleContrastiveLoss(model_args.embedding_temperature)
 
     def clear_memory(self):
         self.accum_q_features.clear()