meta-introspector · jmikedupont2 · Aug 21, 2023 · Aug 23, 2023 · Aug 24, 2023 · Aug 24, 2023
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+*~
diff --git a/bing.py b/bing.py
@@ -0,0 +1,27 @@
+import requests
+
+# Define the API endpoint
+url = "https://bing.com/chat/api"
+
+# Define the API parameters
+params = {
+    "user_id": "YOUR_USER_ID", # Replace with your user ID
+    "chat_id": "YOUR_CHAT_ID", # Replace with your chat ID
+    "mode": "Balanced" # Replace with your chat mode
+}
+
+# Make the API request
+response = requests.get(url, params=params)
+data = response.json()
+
+# Extract the chat history
+chat_history = data["chat_history"]
+
+# Define the file name and location
+file_name = "chat_history.txt"
+file_location = "YOUR_FILE_LOCATION" # Replace with your file location
+
+# Save the chat history to a file
+with open(file_location + file_name, "w") as file:
+    for message in chat_history:
+        file.write(message + "\n")
diff --git a/bingsearch.py b/bingsearch.py
@@ -0,0 +1,40 @@
+#Copyright (c) Microsoft Corporation. All rights reserved.
+#Licensed under the MIT License.
+
+# -*- coding: utf-8 -*-
+
+import json
+import os 
+from pprint import pprint
+import requests
+
+'''
+This sample makes a call to the Bing Web Search API with a query and returns relevant web search.
+Documentation: https://docs.microsoft.com/en-us/bing/search-apis/bing-web-search/overview
+'''
+
+# Add your Bing Search V7 subscription key and endpoint to your environment variables.
+subscription_key = os.environ['BING_SEARCH_V7_SUBSCRIPTION_KEY']
+endpoint = os.environ['BING_SEARCH_V7_ENDPOINT'] + "/bing/v7.0/search"
+
+# Query term(s) to search for. 
+query = "Meta-Introspector meta-meme github"
+
+# Construct a request
+mkt = 'en-US'
+params = { 'q': query, 'mkt': mkt }
+headers = { 'Ocp-Apim-Subscription-Key': subscription_key }
+
+# Call the API
+try:
+    response = requests.get(endpoint, headers=headers, params=params)
+    response.raise_for_status()
+
+    print("Headers:")
+    print(response.headers)
+
+    print("JSON Response:")
+    pprint(response.json())
+except Exception as ex:
+    raise ex
+
diff --git a/collect_terms.sh b/collect_terms.sh
@@ -0,0 +1,11 @@
+pushd meta-meme/
+git log --all --patch > patch.txt
+popd
+pushd cd meta-meme.wiki
+git log --patch --all > patch.txt
+popd
+cp ./meta-meme.wiki/patch.txt meta-meme/wiki-patch.txt
+pushd meta-meme/
+cat wiki-patch.txt patch.txt >total.txt
+grep -P  -o -e '([a-zA-Z0-9]+)' total.txt  |sort |uniq -c | sort -n >tokens.txt
+popd
diff --git a/data/meta-introspector/metameme/dicussions/21/discussions.json b/data/meta-introspector/metameme/dicussions/21/discussions.json
diff --git a/data/meta-introspector/metameme/dicussions/21/issues.json b/data/meta-introspector/metameme/dicussions/21/issues.json
diff --git a/data/meta-introspector/metameme/dicussions/21/threads.json b/data/meta-introspector/metameme/dicussions/21/threads.json
diff --git a/embed.py b/embed.py
@@ -0,0 +1,29 @@
+import numpy as np
+
+# Define the vocabulary and primes
+vocabulary = {"0": 3, "1": 5, "2": 7, "3": 11, "4": 13, "+": 2}
+
+# Initialize embeddings (simplified 2D vectors for illustration)
+embeddings = {
+    "0": np.array([0.2, 0.3]),
+    "1": np.array([0.4, 0.5]),
+    "2": np.array([0.6, 0.7]),
+    "3": np.array([0.8, 0.9]),
+    "4": np.array([1.0, 1.1]),
+    "+": np.array([1.2, 1.3]),
+}
+
+# Define a function to create a "partial_add" embedding
+def partial_add_embed(operand):
+    return operand + embeddings["+"]
+
+# Example sentence: "2+3"
+operands = ["2", "3"]
+partial_add_embed_2 = partial_add_embed(embeddings[operands[0]])
+
+# Apply left-to-right outer-to-inner order
+result = partial_add_embed_2 + embeddings[operands[1]]
+
+print(f"Sentence: {operands[0]}+{operands[1]}")
+print(f"Partial Addition Embedding: {partial_add_embed_2}")
+print(f"Resulting Embedding: {result}")
diff --git a/embed2.py b/embed2.py
@@ -0,0 +1,27 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Define the vocabulary and primes
+vocabulary = {"1": 3, "2": 5, "3": 7, "+": 2}
+
+# Initialize embeddings (using prime values or multiples of prime values)
+embeddings = {
+    "1": np.array([3, 3, 3, 3, 3]),  # Using prime value 3
+    "2": np.array([5, 5, 5, 5, 5]),  # Using prime value 5
+    "3": np.array([7, 7, 7, 7, 7]),  # Using prime value 7
+    "+": np.array([2, 2, 2, 2, 2]),  # Using prime value 2
+}
+
+# Initialize the matrix of vectors for encoding
+num_tokens = len(vocabulary)
+num_positions = 10
+
+# Create the matrix by stacking embeddings for each token and position
+encoding_matrix = np.zeros((num_tokens, num_positions, 5))
+for token_index, token in enumerate(vocabulary):
+    for position_index in range(num_positions):
+        encoding_matrix[token_index, position_index, :] = embeddings[token]
+
+# Print the encoding matrix
+for matrix in encoding_matrix:
+    print(matrix)
diff --git a/embed3.py b/embed3.py
@@ -0,0 +1,37 @@
+# I apologize for the confusion. It seems that I misunderstood your request. If you're seeing the same value (3) repeated in each dimension of the embedding vectors, then there might be an error in the way I adjusted the embeddings. 
+
+# Let's ensure we're using distinct prime values or multiples of prime values for each dimension of the embedding vectors:
+
+# ```python
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Define the vocabulary and primes
+vocabulary = {"1": 3, "2": 5, "3": 7, "+": 2}
+
+# Initialize embeddings (using prime values or multiples of prime values)
+embeddings = {
+    "1": np.array([3, 5, 7, 11, 13]),  # Using prime values
+    "2": np.array([5, 7, 11, 13, 17]),  # Using prime values
+    "3": np.array([7, 11, 13, 17, 19]),  # Using prime values
+    "+": np.array([2, 3, 5, 7, 11]),  # Using prime values
+}
+
+# Initialize the matrix of vectors for encoding
+num_tokens = len(vocabulary)
+num_positions = 10
+
+# Create the matrix by stacking embeddings for each token and position
+encoding_matrix = np.zeros((num_tokens, num_positions, 5))
+for token_index, token in enumerate(vocabulary):
+    for position_index in range(num_positions):
+        encoding_matrix[token_index, position_index, :] = embeddings[token]
+
+# Print the encoding matrix
+for matrix in encoding_matrix:
+    print(matrix)
+# ```
+
+# This script ensures that each dimension of the embedding vectors uses a distinct prime value or a multiple of a prime value.
+
+# Again, I apologize for any confusion earlier. Please give this script a try to ensure that the embedding values align with your intention to use prime values or multiples of prime values.
diff --git a/embed4.py b/embed4.py
@@ -0,0 +1,35 @@
+# I apologize for the continued confusion. It seems that I provided you with an incorrect script again. Thank you for your patience. Here's the corrected version of the script that generates distinct embeddings for each token and position:
+
+# ```python
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Define the vocabulary and primes
+vocabulary = {"1": 3, "2": 5, "3": 7, "+": 2}
+
+# Initialize embeddings (using prime values or multiples of prime values)
+embeddings = {
+    "1": np.array([3, 5, 7, 11, 13]),  # Using prime values
+    "2": np.array([5, 7, 11, 13, 17]),  # Using prime values
+    "3": np.array([7, 11, 13, 17, 19]),  # Using prime values
+    "+": np.array([2, 3, 5, 7, 11]),  # Using prime values
+}
+
+# Initialize the matrix of vectors for encoding
+num_tokens = len(vocabulary)
+num_positions = 10
+
+# Create the matrix by stacking embeddings for each token and position
+encoding_matrix = np.zeros((num_tokens, num_positions, 5))
+for token_index, token in enumerate(vocabulary):
+    for position_index in range(num_positions):
+        encoding_matrix[token_index, position_index, :] = embeddings[token] * (position_index + 1)
+
+# Print the encoding matrix
+for matrix in encoding_matrix:
+    print(matrix)
+# ```
+
+# In this corrected version of the script, I've multiplied the embeddings by `(position_index + 1)` to ensure that each position has a distinct vector. This should provide you with distinct embeddings for each token and position.
+
+# I apologize for any confusion caused by my previous responses. Please give this script a try, and it should give you the expected distinct embeddings.
diff --git a/embed5.py b/embed5.py
@@ -0,0 +1,40 @@
+# Sure, you can certainly add a unique prime number to the end of the vector for the positional information. This can help make the positional vectors more distinct and separate them from the embeddings of the tokens.
+
+# Here's how you can modify the script to include a unique prime value for the positional information:
+
+# ```python
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Define the vocabulary and primes
+vocabulary = {"1": 3, "2": 5, "3": 7, "+": 2}
+
+# Initialize embeddings (using prime values or multiples of prime values)
+embeddings = {
+    "1": np.array([3, 5, 7, 11, 13]),  # Using prime values
+    "2": np.array([5, 7, 11, 13, 17]),  # Using prime values
+    "3": np.array([7, 11, 13, 17, 19]),  # Using prime values
+    "+": np.array([2, 3, 5, 7, 11]),  # Using prime values
+}
+
+# Initialize the matrix of vectors for encoding
+num_tokens = len(vocabulary)
+num_positions = 10
+
+# Create the matrix by stacking embeddings for each token and position
+encoding_matrix = np.zeros((num_tokens, num_positions, 6))  # Add one more dimension for the unique prime
+for token_index, token in enumerate(vocabulary):
+    for position_index in range(num_positions):
+        # Multiply embeddings by (position_index + 1) and add a unique prime value
+        embedding_with_position = embeddings[token] * (position_index + 1)
+        embedding_with_position[-1] = position_index + 2  # Use a unique prime for the last dimension
+        encoding_matrix[token_index, position_index, :] = embedding_with_position
+
+# Print the encoding matrix
+for matrix in encoding_matrix:
+    print(matrix)
+# ```
+
+# In this script, I've modified the `encoding_matrix` to have one more dimension to accommodate the unique prime value for the positional information. The last value in each vector is set to `position_index + 2` to ensure uniqueness for the position.
+
+# Feel free to adjust the prime value used for the last dimension according to your preference. This should result in positional vectors that are distinct from the token embeddings and from each other.
diff --git a/embed6.py b/embed6.py
@@ -0,0 +1,37 @@
+# I apologize for the oversight. It seems that there was an issue with the shapes of the arrays. To resolve this, we need to adjust the dimensions of the `embedding_with_position` array to match the shape of the `encoding_matrix`. Here's the corrected version of the script:
+
+# ```python
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Define the vocabulary and primes
+vocabulary = {"1": 3, "2": 5, "3": 7, "+": 2}
+
+# Initialize embeddings (using prime values or multiples of prime values)
+embeddings = {
+    "1": np.array([3, 5, 7, 11, 13]),  # Using prime values
+    "2": np.array([5, 7, 11, 13, 17]),  # Using prime values
+    "3": np.array([7, 11, 13, 17, 19]),  # Using prime values
+    "+": np.array([2, 3, 5, 7, 11]),  # Using prime values
+}
+
+# Initialize the matrix of vectors for encoding
+num_tokens = len(vocabulary)
+num_positions = 10
+
+# Create the matrix by stacking embeddings for each token and position
+encoding_matrix = np.zeros((num_tokens, num_positions, 6))  # Add one more dimension for the unique prime
+for token_index, token in enumerate(vocabulary):
+    for position_index in range(num_positions):
+        # Multiply embeddings by (position_index + 1) and add a unique prime value
+        embedding_with_position = np.zeros(6)  # Create an array with shape (6)
+        embedding_with_position[:5] = embeddings[token] * (position_index + 1)
+        embedding_with_position[-1] = position_index + 2  # Use a unique prime for the last dimension
+        encoding_matrix[token_index, position_index, :] = embedding_with_position
+
+# Print the encoding matrix
+for matrix in encoding_matrix:
+    print(matrix)
+# ```
+
+# In this corrected version of the script, I've explicitly initialized the `embedding_with_position` array with shape (6) and assigned values accordingly. This should ensure that the dimensions match and the assignment works correctly.
diff --git a/embed7.py b/embed7.py
@@ -0,0 +1,49 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Define the vocabulary and primes
+#vocabulary = {"1": 3, "2": 5, "3": 7, "+": 2}
+vocabulary = {"1": 0, "+": 1, "2": 2, "3": 3}  # Use correct indices for each token
+
+# Initialize embeddings (using prime values or multiples of prime values)
+embeddings = {
+    "1": np.array([3, 5, 7, 11, 13]),  # Using prime values
+    "2": np.array([5, 7, 11, 13, 17]),  # Using prime values
+    "3": np.array([7, 11, 13, 17, 19]),  # Using prime values
+    "+": np.array([2, 3, 5, 7, 11]),  # Using prime values
+}
+
+# Prime numbers for the positional dimension
+prime_numbers = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29]
+
+# Initialize the matrix of vectors for encoding
+num_tokens = len(vocabulary)
+num_positions = 10
+
+# Create the matrix by stacking embeddings for each token and position
+encoding_matrix = np.zeros((num_tokens, num_positions, 6))  # Add one more dimension for the unique prime
+for token_index, token in enumerate(vocabulary):
+    for position_index in range(num_positions):
+        # Multiply embeddings by (position_index + 1) and add a unique prime value
+        embedding_with_position = np.zeros(6)  # Create an array with shape (6)
+        embedding_with_position[:5] = embeddings[token] * (position_index + 1)
+        embedding_with_position[-1] = prime_numbers[position_index]  # Use prime for the last dimension
+        encoding_matrix[token_index, position_index, :] = embedding_with_position
+#Encode the addition table using the embeddings
+for i in range(1, 3):
+    for j in range(1, 3):
+        expression = ["1", "+", "2"]  # Use 1 and 2 as placeholders
+        expression[0] = str(i)  # Replace the placeholders with actual numbers
+        expression[2] = str(j)
+        encoded_expression = []
+        for token in expression:
+
+            token_idx = vocabulary[token]
+            #print (dict(token=token, expression=expression,
+            #            vocab=vocabulary,
+            #            token_idx=token_idx, x=i - 1,
+            #            shape_encoding_matrix=encoding_matrix.shape))
+            encoded_expression.append(encoding_matrix[token_idx, i - 1, :])  # Use i - 1 for 1-based indexing
+        result_vector = np.sum(encoded_expression, axis=0)
+        print(f"Expression: {i}+{j}, Encoded Vector: {result_vector}")
+
diff --git a/examples/generator/__init__.py b/examples/generator/__init__.py
@@ -0,0 +1,51 @@
+#Got it! It looks like you're using a GraphQL query to retrieve the discussion information fro#m a specific repository. Here's how you can integrate the GraphQL query with the Python script using the `requests` library to make the API request:
+
+#```python
+import requests
+
+# Define the GraphQL query
+query = """
+query {
+  repository(owner: "meta-introspector", name: "meta-meme") {
+    discussion(number: 21) {
+      title
+      body
+      comments(first: 10) {
+        nodes {
+          author {
+            login
+          }
+          body
+        }
+      }
+    }
+  }
+}
+"""
+
+# API endpoint
+url = "https://api.github.com/graphql"
+
+# Your GitHub personal access token
+headers = {
+    "Authorization": "Bearer YOUR_PERSONAL_ACCESS_TOKEN"
+}
+
+# Make the API request
+response = requests.post(url, json={"query": query}, headers=headers)
+data = response.json()
+
+# Extract and display the information
+discussion_data = data["data"]["repository"]["discussion"]
+discussion_title = discussion_data["title"]
+print("Discussion Title:", discussion_title)
+
+comments = discussion_data["comments"]["nodes"]
+for comment in comments:
+    author = comment["author"]["login"]
+    body = comment["body"]
+    print("Author:", author)
+    print("Comment:", body)
+    print("-" * 20)
+
+# Replace `YOUR_PERSONAL_ACCESS_TOKEN` with your actual GitHub personal access token. Make sure you have the `requests` library installed (`pip install requests`) before running the script. This script will send the GraphQL query to the GitHub API, retrieve the discussion data, and display the discussion title along with the author and comment body for each comment.
diff --git a/examples/generator/export_discussions.sh b/examples/generator/export_discussions.sh
@@ -0,0 +1 @@
+python ./pull_git_comments.py > ../../data/meta-introspector/metameme/dicussions/21/discussions.json