SciSharp · martindevans · May 14, 2024 · May 12, 2024 · May 12, 2024 · May 12, 2024
diff --git a/LLama.Examples/Examples/BatchedExecutorGuidance.cs b/LLama.Examples/Examples/BatchedExecutorGuidance.cs
@@ -1,4 +1,4 @@
-using LLama.Batched;
+using LLama.Batched;
 using LLama.Common;
 using LLama.Native;
 using LLama.Sampling;
@@ -105,18 +105,19 @@ public override ISamplingPipeline Clone()
 
         protected override void ProcessLogits(SafeLLamaContextHandle ctx, Span<float> logits, ReadOnlySpan<LLamaToken> lastTokens)
         {
-            if (guidance == null)
-                return;
-
-            // Get the logits generated by the guidance sequences
-            var guidanceLogits = guidance.Sample();
-
-            // Use those logits to guide this sequence
-            NativeApi.llama_sample_apply_guidance(ctx, logits, guidanceLogits, weight);
         }
 
         protected override LLamaToken ProcessTokenDataArray(SafeLLamaContextHandle ctx, LLamaTokenDataArray candidates, ReadOnlySpan<LLamaToken> lastTokens)
         {
+            if (guidance != null)
+            {
+                // Get the logits generated by the guidance sequences
+                var guidanceLogits = guidance.Sample();
+
+                // Modify these logits based on the guidance logits
+                candidates.Guidance(ctx, guidanceLogits, weight);
+            }
+
             candidates.Temperature(ctx, 0.8f);
             candidates.TopK(ctx, 25);
 

diff --git a/LLama/LLamaContext.cs b/LLama/LLamaContext.cs
@@ -89,6 +89,8 @@ public uint BatchThreads
         /// Get the maximum batch size for this context
         /// </summary>
         public uint BatchSize => NativeHandle.BatchSize;
+
+        private LLamaTokenData[]? _samplingBuffer;
 
         /// <summary>
         /// Create a new LLamaContext for the given LLamaWeights
@@ -496,7 +498,9 @@ public LLamaTokenDataArray ApplyPenalty(int logits_i, IEnumerable<LLamaToken> la
             var nl_logit = logits[(int?)nl_token ?? 0];
 
             // Convert logits into token candidates
-            var candidates_p = LLamaTokenDataArray.Create(logits);
+            if (_samplingBuffer == null || _samplingBuffer.Length < logits.Length)
+                _samplingBuffer = new LLamaTokenData[logits.Length];
+            var candidates_p = LLamaTokenDataArray.Create(logits, _samplingBuffer);
 
             // Extract most recently returned tokens
             var last_n_repeat = Math.Min((int)ContextSize, repeatLastTokensCount);
@@ -508,14 +512,14 @@ public LLamaTokenDataArray ApplyPenalty(int logits_i, IEnumerable<LLamaToken> la
             // Restore newline token logit value if necessary
             if (!penalizeNL && nl_token.HasValue)
             {
-                var candidatesSpan = candidates_p.data.Span;
-                for (var i = 0; i < candidates_p.data.Length; i++)
+                var candidatesSpan = candidates_p.Data.Span;
+                for (var i = 0; i < candidates_p.Data.Length; i++)
                 {
                     ref var item = ref candidatesSpan[i];
                     if (item.id == nl_token)
                         item.logit = nl_logit;
                 }
-                candidates_p.sorted = false;
+                candidates_p.Sorted = false;
             }
 
             return candidates_p;