Merge pull request #3 from hadriansecurity/fix_oom_for_large_n

use a smaller batch size for even less memory usage
hadriansecurity · Aug 18, 2024 · ce60364 · ce60364
2 parents c2786b6 + 153801f
commit ce60364
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 3 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "subwiz"
-version = "0.1.1"
+version = "0.1.2"
 description = "A recon tool that uses AI to predict subdomains. Then returns those that resolve."
 readme = "README.md"
 requires-python = ">=3.7"

diff --git a/subwiz/model.py b/subwiz/model.py
@@ -1,4 +1,7 @@
 """
+This code is largely copied from the nanogpt repository by Andrej Karpathy.
+https://github.com/karpathy/nanoGPT
+
 Full definition of a GPT Language Model, all of it in this single file.
 References:
 1) the official GPT-2 TensorFlow implementation released by OpenAI:
@@ -308,11 +311,11 @@ def generate(
             sequences = sequences[:, -self.config.block_size :]
 
             # inference the model in batches
-            batch_size = 500
+            batch_size = 8
             logits, _ = self(sequences[:batch_size])
             for j in range(batch_size, len(sequences), batch_size):
                 new_logits, _ = self(sequences[j : j + batch_size])
-                logits = torch.cat(tensors=(new_logits, logits), dim=1)
+                logits = torch.cat(tensors=(new_logits, logits), dim=0)
             logits = logits.squeeze(1)
 
             # take N most probable next tokens for each sequence