Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add files via upload #89

Open
wants to merge 20 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*~
27 changes: 27 additions & 0 deletions bing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import requests

# Define the API endpoint
url = "https://bing.com/chat/api"

# Define the API parameters
params = {
"user_id": "YOUR_USER_ID", # Replace with your user ID
"chat_id": "YOUR_CHAT_ID", # Replace with your chat ID
"mode": "Balanced" # Replace with your chat mode
}

# Make the API request
response = requests.get(url, params=params)
data = response.json()

# Extract the chat history
chat_history = data["chat_history"]

# Define the file name and location
file_name = "chat_history.txt"
file_location = "YOUR_FILE_LOCATION" # Replace with your file location

# Save the chat history to a file
with open(file_location + file_name, "w") as file:
for message in chat_history:
file.write(message + "\n")
40 changes: 40 additions & 0 deletions bingsearch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#Copyright (c) Microsoft Corporation. All rights reserved.
#Licensed under the MIT License.

# -*- coding: utf-8 -*-

import json
import os
from pprint import pprint
import requests

'''
This sample makes a call to the Bing Web Search API with a query and returns relevant web search.
Documentation: https://docs.microsoft.com/en-us/bing/search-apis/bing-web-search/overview
'''

# Add your Bing Search V7 subscription key and endpoint to your environment variables.
subscription_key = os.environ['BING_SEARCH_V7_SUBSCRIPTION_KEY']
endpoint = os.environ['BING_SEARCH_V7_ENDPOINT'] + "/bing/v7.0/search"

# Query term(s) to search for.
query = "Meta-Introspector meta-meme github"

# Construct a request
mkt = 'en-US'
params = { 'q': query, 'mkt': mkt }
headers = { 'Ocp-Apim-Subscription-Key': subscription_key }

# Call the API
try:
response = requests.get(endpoint, headers=headers, params=params)
response.raise_for_status()

print("Headers:")
print(response.headers)

print("JSON Response:")
pprint(response.json())
except Exception as ex:
raise ex

11 changes: 11 additions & 0 deletions collect_terms.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
pushd meta-meme/
git log --all --patch > patch.txt
popd
pushd cd meta-meme.wiki
git log --patch --all > patch.txt
popd
cp ./meta-meme.wiki/patch.txt meta-meme/wiki-patch.txt
pushd meta-meme/
cat wiki-patch.txt patch.txt >total.txt
grep -P -o -e '([a-zA-Z0-9]+)' total.txt |sort |uniq -c | sort -n >tokens.txt
popd
25 changes: 25 additions & 0 deletions data/meta-introspector/metameme/dicussions/21/discussions.json

Large diffs are not rendered by default.

79 changes: 79 additions & 0 deletions data/meta-introspector/metameme/dicussions/21/issues.json

Large diffs are not rendered by default.

37 changes: 37 additions & 0 deletions data/meta-introspector/metameme/dicussions/21/threads.json

Large diffs are not rendered by default.

29 changes: 29 additions & 0 deletions embed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import numpy as np

# Define the vocabulary and primes
vocabulary = {"0": 3, "1": 5, "2": 7, "3": 11, "4": 13, "+": 2}

# Initialize embeddings (simplified 2D vectors for illustration)
embeddings = {
"0": np.array([0.2, 0.3]),
"1": np.array([0.4, 0.5]),
"2": np.array([0.6, 0.7]),
"3": np.array([0.8, 0.9]),
"4": np.array([1.0, 1.1]),
"+": np.array([1.2, 1.3]),
}

# Define a function to create a "partial_add" embedding
def partial_add_embed(operand):
return operand + embeddings["+"]

# Example sentence: "2+3"
operands = ["2", "3"]
partial_add_embed_2 = partial_add_embed(embeddings[operands[0]])

# Apply left-to-right outer-to-inner order
result = partial_add_embed_2 + embeddings[operands[1]]

print(f"Sentence: {operands[0]}+{operands[1]}")
print(f"Partial Addition Embedding: {partial_add_embed_2}")
print(f"Resulting Embedding: {result}")
27 changes: 27 additions & 0 deletions embed2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import numpy as np
import matplotlib.pyplot as plt

# Define the vocabulary and primes
vocabulary = {"1": 3, "2": 5, "3": 7, "+": 2}

# Initialize embeddings (using prime values or multiples of prime values)
embeddings = {
"1": np.array([3, 3, 3, 3, 3]), # Using prime value 3
"2": np.array([5, 5, 5, 5, 5]), # Using prime value 5
"3": np.array([7, 7, 7, 7, 7]), # Using prime value 7
"+": np.array([2, 2, 2, 2, 2]), # Using prime value 2
}

# Initialize the matrix of vectors for encoding
num_tokens = len(vocabulary)
num_positions = 10

# Create the matrix by stacking embeddings for each token and position
encoding_matrix = np.zeros((num_tokens, num_positions, 5))
for token_index, token in enumerate(vocabulary):
for position_index in range(num_positions):
encoding_matrix[token_index, position_index, :] = embeddings[token]

# Print the encoding matrix
for matrix in encoding_matrix:
print(matrix)
37 changes: 37 additions & 0 deletions embed3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# I apologize for the confusion. It seems that I misunderstood your request. If you're seeing the same value (3) repeated in each dimension of the embedding vectors, then there might be an error in the way I adjusted the embeddings.

# Let's ensure we're using distinct prime values or multiples of prime values for each dimension of the embedding vectors:

# ```python
import numpy as np
import matplotlib.pyplot as plt

# Define the vocabulary and primes
vocabulary = {"1": 3, "2": 5, "3": 7, "+": 2}

# Initialize embeddings (using prime values or multiples of prime values)
embeddings = {
"1": np.array([3, 5, 7, 11, 13]), # Using prime values
"2": np.array([5, 7, 11, 13, 17]), # Using prime values
"3": np.array([7, 11, 13, 17, 19]), # Using prime values
"+": np.array([2, 3, 5, 7, 11]), # Using prime values
}

# Initialize the matrix of vectors for encoding
num_tokens = len(vocabulary)
num_positions = 10

# Create the matrix by stacking embeddings for each token and position
encoding_matrix = np.zeros((num_tokens, num_positions, 5))
for token_index, token in enumerate(vocabulary):
for position_index in range(num_positions):
encoding_matrix[token_index, position_index, :] = embeddings[token]

# Print the encoding matrix
for matrix in encoding_matrix:
print(matrix)
# ```

# This script ensures that each dimension of the embedding vectors uses a distinct prime value or a multiple of a prime value.

# Again, I apologize for any confusion earlier. Please give this script a try to ensure that the embedding values align with your intention to use prime values or multiples of prime values.
35 changes: 35 additions & 0 deletions embed4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# I apologize for the continued confusion. It seems that I provided you with an incorrect script again. Thank you for your patience. Here's the corrected version of the script that generates distinct embeddings for each token and position:

# ```python
import numpy as np
import matplotlib.pyplot as plt

# Define the vocabulary and primes
vocabulary = {"1": 3, "2": 5, "3": 7, "+": 2}

# Initialize embeddings (using prime values or multiples of prime values)
embeddings = {
"1": np.array([3, 5, 7, 11, 13]), # Using prime values
"2": np.array([5, 7, 11, 13, 17]), # Using prime values
"3": np.array([7, 11, 13, 17, 19]), # Using prime values
"+": np.array([2, 3, 5, 7, 11]), # Using prime values
}

# Initialize the matrix of vectors for encoding
num_tokens = len(vocabulary)
num_positions = 10

# Create the matrix by stacking embeddings for each token and position
encoding_matrix = np.zeros((num_tokens, num_positions, 5))
for token_index, token in enumerate(vocabulary):
for position_index in range(num_positions):
encoding_matrix[token_index, position_index, :] = embeddings[token] * (position_index + 1)

# Print the encoding matrix
for matrix in encoding_matrix:
print(matrix)
# ```

# In this corrected version of the script, I've multiplied the embeddings by `(position_index + 1)` to ensure that each position has a distinct vector. This should provide you with distinct embeddings for each token and position.

# I apologize for any confusion caused by my previous responses. Please give this script a try, and it should give you the expected distinct embeddings.
40 changes: 40 additions & 0 deletions embed5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Sure, you can certainly add a unique prime number to the end of the vector for the positional information. This can help make the positional vectors more distinct and separate them from the embeddings of the tokens.

# Here's how you can modify the script to include a unique prime value for the positional information:

# ```python
import numpy as np
import matplotlib.pyplot as plt

# Define the vocabulary and primes
vocabulary = {"1": 3, "2": 5, "3": 7, "+": 2}

# Initialize embeddings (using prime values or multiples of prime values)
embeddings = {
"1": np.array([3, 5, 7, 11, 13]), # Using prime values
"2": np.array([5, 7, 11, 13, 17]), # Using prime values
"3": np.array([7, 11, 13, 17, 19]), # Using prime values
"+": np.array([2, 3, 5, 7, 11]), # Using prime values
}

# Initialize the matrix of vectors for encoding
num_tokens = len(vocabulary)
num_positions = 10

# Create the matrix by stacking embeddings for each token and position
encoding_matrix = np.zeros((num_tokens, num_positions, 6)) # Add one more dimension for the unique prime
for token_index, token in enumerate(vocabulary):
for position_index in range(num_positions):
# Multiply embeddings by (position_index + 1) and add a unique prime value
embedding_with_position = embeddings[token] * (position_index + 1)
embedding_with_position[-1] = position_index + 2 # Use a unique prime for the last dimension
encoding_matrix[token_index, position_index, :] = embedding_with_position

# Print the encoding matrix
for matrix in encoding_matrix:
print(matrix)
# ```

# In this script, I've modified the `encoding_matrix` to have one more dimension to accommodate the unique prime value for the positional information. The last value in each vector is set to `position_index + 2` to ensure uniqueness for the position.

# Feel free to adjust the prime value used for the last dimension according to your preference. This should result in positional vectors that are distinct from the token embeddings and from each other.
37 changes: 37 additions & 0 deletions embed6.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# I apologize for the oversight. It seems that there was an issue with the shapes of the arrays. To resolve this, we need to adjust the dimensions of the `embedding_with_position` array to match the shape of the `encoding_matrix`. Here's the corrected version of the script:

# ```python
import numpy as np
import matplotlib.pyplot as plt

# Define the vocabulary and primes
vocabulary = {"1": 3, "2": 5, "3": 7, "+": 2}

# Initialize embeddings (using prime values or multiples of prime values)
embeddings = {
"1": np.array([3, 5, 7, 11, 13]), # Using prime values
"2": np.array([5, 7, 11, 13, 17]), # Using prime values
"3": np.array([7, 11, 13, 17, 19]), # Using prime values
"+": np.array([2, 3, 5, 7, 11]), # Using prime values
}

# Initialize the matrix of vectors for encoding
num_tokens = len(vocabulary)
num_positions = 10

# Create the matrix by stacking embeddings for each token and position
encoding_matrix = np.zeros((num_tokens, num_positions, 6)) # Add one more dimension for the unique prime
for token_index, token in enumerate(vocabulary):
for position_index in range(num_positions):
# Multiply embeddings by (position_index + 1) and add a unique prime value
embedding_with_position = np.zeros(6) # Create an array with shape (6)
embedding_with_position[:5] = embeddings[token] * (position_index + 1)
embedding_with_position[-1] = position_index + 2 # Use a unique prime for the last dimension
encoding_matrix[token_index, position_index, :] = embedding_with_position

# Print the encoding matrix
for matrix in encoding_matrix:
print(matrix)
# ```

# In this corrected version of the script, I've explicitly initialized the `embedding_with_position` array with shape (6) and assigned values accordingly. This should ensure that the dimensions match and the assignment works correctly.
49 changes: 49 additions & 0 deletions embed7.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import numpy as np
import matplotlib.pyplot as plt

# Define the vocabulary and primes
#vocabulary = {"1": 3, "2": 5, "3": 7, "+": 2}
vocabulary = {"1": 0, "+": 1, "2": 2, "3": 3} # Use correct indices for each token

# Initialize embeddings (using prime values or multiples of prime values)
embeddings = {
"1": np.array([3, 5, 7, 11, 13]), # Using prime values
"2": np.array([5, 7, 11, 13, 17]), # Using prime values
"3": np.array([7, 11, 13, 17, 19]), # Using prime values
"+": np.array([2, 3, 5, 7, 11]), # Using prime values
}

# Prime numbers for the positional dimension
prime_numbers = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29]

# Initialize the matrix of vectors for encoding
num_tokens = len(vocabulary)
num_positions = 10

# Create the matrix by stacking embeddings for each token and position
encoding_matrix = np.zeros((num_tokens, num_positions, 6)) # Add one more dimension for the unique prime
for token_index, token in enumerate(vocabulary):
for position_index in range(num_positions):
# Multiply embeddings by (position_index + 1) and add a unique prime value
embedding_with_position = np.zeros(6) # Create an array with shape (6)
embedding_with_position[:5] = embeddings[token] * (position_index + 1)
embedding_with_position[-1] = prime_numbers[position_index] # Use prime for the last dimension
encoding_matrix[token_index, position_index, :] = embedding_with_position
#Encode the addition table using the embeddings
for i in range(1, 3):
for j in range(1, 3):
expression = ["1", "+", "2"] # Use 1 and 2 as placeholders
expression[0] = str(i) # Replace the placeholders with actual numbers
expression[2] = str(j)
encoded_expression = []
for token in expression:

token_idx = vocabulary[token]
#print (dict(token=token, expression=expression,
# vocab=vocabulary,
# token_idx=token_idx, x=i - 1,
# shape_encoding_matrix=encoding_matrix.shape))
encoded_expression.append(encoding_matrix[token_idx, i - 1, :]) # Use i - 1 for 1-based indexing
result_vector = np.sum(encoded_expression, axis=0)
print(f"Expression: {i}+{j}, Encoded Vector: {result_vector}")

51 changes: 51 additions & 0 deletions examples/generator/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#Got it! It looks like you're using a GraphQL query to retrieve the discussion information fro#m a specific repository. Here's how you can integrate the GraphQL query with the Python script using the `requests` library to make the API request:

#```python
import requests

# Define the GraphQL query
query = """
query {
repository(owner: "meta-introspector", name: "meta-meme") {
discussion(number: 21) {
title
body
comments(first: 10) {
nodes {
author {
login
}
body
}
}
}
}
}
"""

# API endpoint
url = "https://api.github.com/graphql"

# Your GitHub personal access token
headers = {
"Authorization": "Bearer YOUR_PERSONAL_ACCESS_TOKEN"
}

# Make the API request
response = requests.post(url, json={"query": query}, headers=headers)
data = response.json()

# Extract and display the information
discussion_data = data["data"]["repository"]["discussion"]
discussion_title = discussion_data["title"]
print("Discussion Title:", discussion_title)

comments = discussion_data["comments"]["nodes"]
for comment in comments:
author = comment["author"]["login"]
body = comment["body"]
print("Author:", author)
print("Comment:", body)
print("-" * 20)

# Replace `YOUR_PERSONAL_ACCESS_TOKEN` with your actual GitHub personal access token. Make sure you have the `requests` library installed (`pip install requests`) before running the script. This script will send the GraphQL query to the GitHub API, retrieve the discussion data, and display the discussion title along with the author and comment body for each comment.
1 change: 1 addition & 0 deletions examples/generator/export_discussions.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python ./pull_git_comments.py > ../../data/meta-introspector/metameme/dicussions/21/discussions.json
Loading