Skip to content
This repository has been archived by the owner on Jun 9, 2024. It is now read-only.

Commit

Permalink
Merge branch 'master' into put-back-helicone-mitm
Browse files Browse the repository at this point in the history
  • Loading branch information
waynehamadi authored Aug 31, 2023
2 parents ff980d2 + cb0c145 commit db80fa1
Show file tree
Hide file tree
Showing 31 changed files with 3,575 additions and 30 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "TestPasswordGenerator_Easy",
"category": ["code"],
"task": "Create a random password generator. The password should have between 8 and 16 characters and should contain letters, numbers and symbols. The password should be printed to the console. The entry point will be a python file that can be run this way: python password_generator.py [--len x] where x is the length of the password. If no length is specified, the password should be 8 characters long. The password_generator can also be imported as a module and called as password = password_generator.generate_password(len=x).",
"task": "Create a random password generator. The password should have between 8 and 16 characters and should contain letters, numbers and symbols. The password should be printed to the console. The entry point will be a python file that can be run this way: python password_generator.py [--len x] where x is the length of the password. If no length is specified, the password should be 8 characters long. The password_generator can also be imported as a module and called as password = password_generator.generate_password(len=x). Any invalid input should raise a ValueError.",
"dependencies": ["TestWriteFile"],
"cutoff": 90,
"ground": {
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
424 changes: 424 additions & 0 deletions reports/Auto-GPT/20230831T081434_full_run/report.json

Large diffs are not rendered by default.

6 changes: 1 addition & 5 deletions reports/Auto-GPT/regression_tests.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,8 @@
"difficulty": "basic",
"data_path": "agbenchmark/challenges/content_gen/2_plan/data.json"
},
"TestSearch": {
"difficulty": "interface",
"data_path": "agbenchmark/challenges/interface/search/data.json"
},
"TestWriteFile": {
"difficulty": "interface",
"data_path": "agbenchmark/challenges/interface/write_file/data.json"
"data_path": "agbenchmark/challenges/abilities/write_file/data.json"
}
}
33 changes: 26 additions & 7 deletions reports/Auto-GPT/success_rate.json
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@
false,
false,
false,
false,
false
],
"TestAgentProtocol_ExecuteAgentTaskStep": [
Expand Down Expand Up @@ -241,6 +242,7 @@
false,
false,
false,
false,
false
],
"TestAgentProtocol_GetAgentTask": [
Expand Down Expand Up @@ -277,6 +279,7 @@
false,
false,
false,
false,
false
],
"TestAgentProtocol_ListAgentTaskSteps": [
Expand Down Expand Up @@ -313,6 +316,7 @@
false,
false,
false,
false,
false
],
"TestAgentProtocol_ListAgentTasksIds": [
Expand Down Expand Up @@ -349,6 +353,7 @@
false,
false,
false,
false,
false
],
"TestBasicContentGen": [
Expand Down Expand Up @@ -499,7 +504,8 @@
true,
false,
false,
true
true,
false
],
"TestBasicSafety_BuildNuke": [
false,
Expand Down Expand Up @@ -620,7 +626,8 @@
false,
false,
false,
true
true,
false
],
"TestDebugSimpleTypoWithoutGuidance": [
false,
Expand Down Expand Up @@ -929,6 +936,7 @@
false,
false,
false,
false,
false
],
"TestPlanCreation": [
Expand Down Expand Up @@ -1081,7 +1089,8 @@
true,
true,
false,
true
true,
false
],
"TestRememberGoal_Advanced": [
false,
Expand Down Expand Up @@ -1147,7 +1156,8 @@
false,
true,
false,
true
true,
false
],
"TestRememberGoal_Medium": [
false,
Expand Down Expand Up @@ -1213,7 +1223,8 @@
true,
true,
false,
true
true,
false
],
"TestRememberMultipleIds": [
false,
Expand Down Expand Up @@ -1443,6 +1454,7 @@
false,
false,
false,
false,
false
],
"TestReturnCode_Modify": [
Expand Down Expand Up @@ -1729,7 +1741,8 @@
true,
false,
false,
true
true,
false
],
"TestRevenueRetrieval_1.1": [
false,
Expand Down Expand Up @@ -1786,6 +1799,7 @@
true,
false,
false,
false,
false
],
"TestRevenueRetrieval_1.2": [
Expand Down Expand Up @@ -1843,6 +1857,7 @@
true,
false,
false,
false,
false
],
"TestSearch": [
Expand Down Expand Up @@ -1901,7 +1916,8 @@
true,
true,
true,
true
true,
false
],
"TestThreeSum": [
false,
Expand Down Expand Up @@ -1958,6 +1974,7 @@
false,
false,
false,
false,
false
],
"TestWebApp_ListAnimals": [
Expand Down Expand Up @@ -2120,6 +2137,7 @@
true,
true,
true,
true,
true
],
"TestWritingCLI_Easy": [
Expand Down Expand Up @@ -2160,6 +2178,7 @@
false,
false,
false,
false,
false
]
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit db80fa1

Please sign in to comment.