Skip to content

Commit

Permalink
Merge branch '2.17-Beta'
Browse files Browse the repository at this point in the history
  • Loading branch information
ThioJoe committed Mar 19, 2023
2 parents 6aedbbd + e0106da commit 18cbe0c
Show file tree
Hide file tree
Showing 18 changed files with 682 additions and 276 deletions.
12 changes: 4 additions & 8 deletions Scripts/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import Scripts.validation as validation

# Google Authentication Modules
from googleapiclient.errors import HttpError
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.oauth2.credentials import Credentials
Expand All @@ -13,9 +12,6 @@

TOKEN_FILE_NAME = 'token.pickle'

global CURRENTUSER
global YOUTUBE

YOUTUBE = None
CURRENTUSER = None

Expand Down Expand Up @@ -87,7 +83,7 @@ def first_authentication():
except JSONDecodeError as jx:
print(f"{F.WHITE}{B.RED} [!!!] Error: {S.R}" + str(jx))
print(f"\nDid you make the client_secrets.json file yourself by {F.LIGHTRED_EX}copying and pasting into it{S.R}, instead of {F.LIGHTGREEN_EX}downloading it{S.R}?")
print(f"You need to {F.YELLOW}download the json file directly from the google cloud dashboard{S.R} as shown in the instructions.")
print(f"You need to {F.YELLOW}download the json file directly from the Google Cloud dashboard{S.R} as shown in the instructions.")
print("If you think this is a bug, you may report it on this project's GitHub page: https://github.com/ThioJoe/YT-Spammer-Purge/issues")
input("Press Enter to Exit...")
sys.exit()
Expand All @@ -102,7 +98,7 @@ def first_authentication():
print("----------------")
print(f"{F.RED}[!!!] Error: {S.R}" + str(e))
print("If you think this is a bug, you may report it on this project's GitHub page: https://github.com/ThioJoe/YT-Spammer-Purge/issues")
input(f"\nError Code A-1: {F.RED}Something went wrong during authentication.{S.R} {F.YELLOW}Try deleting the token.pickle file.{S.R} \nPress Enter to exit...")
input(f"\nError Code A-1: {F.RED}Something went wrong during authentication.{S.R} {F.YELLOW}Try deleting the token.pickle file.{S.R} \nPress Enter to Exit...")
sys.exit()
return YOUTUBE

Expand Down Expand Up @@ -131,7 +127,7 @@ def fetch_user():
print(f"{F.YELLOW}Error Getting Current User{S.R}: The YouTube API responded, but did not provide a Channel ID.")
print(f"{F.CYAN}Known Possible Causes:{S.R}")
print("> The client_secrets file does not match user authorized with token.pickle file.")
print("> You are logging in with a Google account that does not have a YouTube channel created yet.")
print("> You are logging in with a Google Account that does not have a YouTube channel created yet.")
print("> When choosing the account to log into, you selected the option showing the Google Account's email address, which might not have a channel attached to it.")
input("\nPress Enter to try logging in again...")
os.remove(TOKEN_FILE_NAME)
Expand Down Expand Up @@ -185,4 +181,4 @@ def fetch_user():


def remove_token():
os.remove(TOKEN_FILE_NAME)
os.remove(TOKEN_FILE_NAME)
54 changes: 54 additions & 0 deletions Scripts/benchmark_distance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""
Benchmark rapidfuzz and python-Levenshtein time
Author:
Pushpam Punjabi
Machine Learning Engineer
"""

import random
from datetime import datetime

import numpy as np
from Levenshtein import ratio
from rapidfuzz import fuzz

print("\nGenerating experiment...")

# Create random sentences
CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
lengths = np.arange(10, 1000)
NUM_PAIRS = 10000

# Initialize string lists
x = []
y = []

# Generate random strings
for _ in range(NUM_PAIRS):
x_len = random.choice(lengths)
y_len = random.choice(lengths)
temp_x = []
temp_y = []
for _ in range(x_len):
temp_x.append(random.choice(CHARS))
for _ in range(y_len):
temp_y.append(random.choice(CHARS))
x.append("".join(temp_x))
y.append("".join(temp_y))

print("Generated experiment.\n\nRunning benchmark...")

# Benchmart time for python-Levenshtein
start = datetime.now()
for sen_x, sen_y in zip(x, y):
value = ratio(sen_x, sen_y)
end = datetime.now()
print(f"\npython-Levenshtein time: {end - start}")

# Benchmart time for rapidfuzz
start = datetime.now()
for sen_x, sen_y in zip(x, y):
value = fuzz.ratio(sen_x, sen_y) / 100
end = datetime.now()
print(f"rapidfuzz time: {end - start}\n")
3 changes: 0 additions & 3 deletions Scripts/community_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,7 @@
from __future__ import print_function
from Scripts.shared_imports import *

import argparse
import io
import json
import os
import sys
import time

Expand Down
7 changes: 6 additions & 1 deletion Scripts/confusablesCustom/Custom Confusable String Maker.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,14 @@ def make_string(realLetter, letterInSpam):
realPhrase = realPhrase.replace(" ", "").lower()

makeStringList = []
alreadyCheckedCharList = []

for i in range(len(spammifiedPhrase)):
check_char(realPhrase[i], spammifiedPhrase[i])
if spammifiedPhrase[i] in alreadyCheckedCharList:
continue
else:
alreadyCheckedCharList.append(spammifiedPhrase[i])
check_char(realPhrase[i], spammifiedPhrase[i])


if makeStringList:
Expand Down
2 changes: 1 addition & 1 deletion Scripts/confusablesCustom/assets/confusable_mapping.json

Large diffs are not rendered by default.

22 changes: 11 additions & 11 deletions Scripts/confusablesCustom/assets/confusables.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# confusables.txt
# Date: 2021-05-29, 22:09:29 GMT
# © 2021 Unicode®, Inc.
# confusables.txt
# Date: 2022-08-26, 16:49:08 GMT
# © 2022 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
# Unicode Security Mechanisms for UTS #39
# Version: 14.0.0
# Version: 15.0.0
#
# For documentation and usage, see http://www.unicode.org/reports/tr39
# For documentation and usage, see https://www.unicode.org/reports/tr39
#
05AD ; 0596 ; MA # ( ֭ → ֖ ) HEBREW ACCENT DEHI → HEBREW ACCENT TIPEHA #

Expand Down Expand Up @@ -2761,11 +2761,11 @@ FE87 ; 006C 0655 ; MA # ( ‎ﺇ‎ → lٕ ) ARABIC LETTER ALEF WITH HAMZA BELO

02AB ; 006C 007A ; MA # ( ʫ → lz ) LATIN SMALL LETTER LZ DIGRAPH → LATIN SMALL LETTER L, LATIN SMALL LETTER Z #

0675 ; 006C 0674 ; MA # ( ‎ٵ‎ → ‎lٴ‎ ) ARABIC LETTER HIGH HAMZA ALEF → LATIN SMALL LETTER L, ARABIC LETTER HIGH HAMZA # →‎اٴ‎→
0623 ; 006C 0674 ; MA # ( ‎أ‎ → ‎lٴ‎ ) ARABIC LETTER ALEF WITH HAMZA ABOVE → LATIN SMALL LETTER L, ARABIC LETTER HIGH HAMZA # →‎ٵ‎→→‎اٴ‎→
FE84 ; 006C 0674 ; MA # ( ‎ﺄ‎ → ‎lٴ‎ ) ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM → LATIN SMALL LETTER L, ARABIC LETTER HIGH HAMZA # →‎أ‎→→‎ٵ‎→→‎اٴ‎→
FE83 ; 006C 0674 ; MA # ( ‎ﺃ‎ → ‎lٴ‎ ) ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM → LATIN SMALL LETTER L, ARABIC LETTER HIGH HAMZA # →‎ٵ‎→→‎اٴ‎→
0672 ; 006C 0674 ; MA # ( ‎ٲ‎ → ‎lٴ‎ ) ARABIC LETTER ALEF WITH WAVY HAMZA ABOVE → LATIN SMALL LETTER L, ARABIC LETTER HIGH HAMZA # →‎أ‎→→‎ٵ‎→→‎اٴ‎→
0675 ; 006C 0674 ; MA # ( ‎ٵ‎ → ‎lٴ‎ ) ARABIC LETTER HIGH HAMZA ALEF → LATIN SMALL LETTER L, ARABIC LETTER HIGH HAMZA # →‎اٴ‎→

FDF3 ; 006C 0643 0628 0631 ; MA # ( ‎ﷳ‎ → ‎lكبر‎ ) ARABIC LIGATURE AKBAR ISOLATED FORM → LATIN SMALL LETTER L, ARABIC LETTER KAF, ARABIC LETTER BEH, ARABIC LETTER REH # →‎اكبر‎→

Expand Down Expand Up @@ -5351,10 +5351,10 @@ FBE2 ; 0648 0302 ; MA # ( ‎ﯢ‎ → ‎و̂‎ ) ARABIC LETTER KIRGHIZ YU IS
FBDC ; 0648 0670 ; MA # ( ‎ﯜ‎ → ‎وٰ‎ ) ARABIC LETTER YU FINAL FORM → ARABIC LETTER WAW, ARABIC LETTER SUPERSCRIPT ALEF # →‎ۈ‎→
FBDB ; 0648 0670 ; MA # ( ‎ﯛ‎ → ‎وٰ‎ ) ARABIC LETTER YU ISOLATED FORM → ARABIC LETTER WAW, ARABIC LETTER SUPERSCRIPT ALEF # →‎ۈ‎→

0676 ; 0648 0674 ; MA # ( ‎ٶ‎ → ‎وٴ‎ ) ARABIC LETTER HIGH HAMZA WAW → ARABIC LETTER WAW, ARABIC LETTER HIGH HAMZA #
0624 ; 0648 0674 ; MA # ( ‎ؤ‎ → ‎وٴ‎ ) ARABIC LETTER WAW WITH HAMZA ABOVE → ARABIC LETTER WAW, ARABIC LETTER HIGH HAMZA # →‎ٶ‎→
FE86 ; 0648 0674 ; MA # ( ‎ﺆ‎ → ‎وٴ‎ ) ARABIC LETTER WAW WITH HAMZA ABOVE FINAL FORM → ARABIC LETTER WAW, ARABIC LETTER HIGH HAMZA # →‎ٶ‎→
FE85 ; 0648 0674 ; MA # ( ‎ﺅ‎ → ‎وٴ‎ ) ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM → ARABIC LETTER WAW, ARABIC LETTER HIGH HAMZA # →‎ٶ‎→
0676 ; 0648 0674 ; MA # ( ‎ٶ‎ → ‎وٴ‎ ) ARABIC LETTER HIGH HAMZA WAW → ARABIC LETTER WAW, ARABIC LETTER HIGH HAMZA #

0677 ; 0648 0313 0674 ; MA # ( ‎ٷ‎ → ‎و̓ٴ‎ ) ARABIC LETTER U WITH HAMZA ABOVE → ARABIC LETTER WAW, COMBINING COMMA ABOVE, ARABIC LETTER HIGH HAMZA # →‎ۇٴ‎→
FBDD ; 0648 0313 0674 ; MA # ( ‎ﯝ‎ → ‎و̓ٴ‎ ) ARABIC LETTER U WITH HAMZA ABOVE ISOLATED FORM → ARABIC LETTER WAW, COMBINING COMMA ABOVE, ARABIC LETTER HIGH HAMZA # →‎ۇٴ‎→
Expand Down Expand Up @@ -5446,12 +5446,12 @@ FCF1 ; 0649 006F ; MA # ( ‎ﳱ‎ → ‎ىo‎ ) ARABIC LIGATURE YEH WITH HEH

FCE6 ; 0649 06DB 006F ; MA # ( ‎ﳦ‎ → ‎ىۛo‎ ) ARABIC LIGATURE THEH WITH HEH MEDIAL FORM → ARABIC LETTER ALEF MAKSURA, ARABIC SMALL HIGH THREE DOTS, LATIN SMALL LETTER O # →‎ثه‎→

0678 ; 0649 0674 ; MA # ( ‎ٸ‎ → ‎ىٴ‎ ) ARABIC LETTER HIGH HAMZA YEH → ARABIC LETTER ALEF MAKSURA, ARABIC LETTER HIGH HAMZA # →‎يٴ‎→
0626 ; 0649 0674 ; MA # ( ‎ئ‎ → ‎ىٴ‎ ) ARABIC LETTER YEH WITH HAMZA ABOVE → ARABIC LETTER ALEF MAKSURA, ARABIC LETTER HIGH HAMZA # →‎ٸ‎→→‎يٴ‎→
FE8B ; 0649 0674 ; MA # ( ‎ﺋ‎ → ‎ىٴ‎ ) ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM → ARABIC LETTER ALEF MAKSURA, ARABIC LETTER HIGH HAMZA # →‎ئ‎→→‎ٸ‎→→‎يٴ‎→
FE8C ; 0649 0674 ; MA # ( ‎ﺌ‎ → ‎ىٴ‎ ) ARABIC LETTER YEH WITH HAMZA ABOVE MEDIAL FORM → ARABIC LETTER ALEF MAKSURA, ARABIC LETTER HIGH HAMZA # →‎ئ‎→→‎ٸ‎→→‎يٴ‎→
FE8A ; 0649 0674 ; MA # ( ‎ﺊ‎ → ‎ىٴ‎ ) ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM → ARABIC LETTER ALEF MAKSURA, ARABIC LETTER HIGH HAMZA # →‎ئ‎→→‎ٸ‎→→‎يٴ‎→
FE89 ; 0649 0674 ; MA # ( ‎ﺉ‎ → ‎ىٴ‎ ) ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM → ARABIC LETTER ALEF MAKSURA, ARABIC LETTER HIGH HAMZA # →‎ٸ‎→→‎يٴ‎→
0678 ; 0649 0674 ; MA # ( ‎ٸ‎ → ‎ىٴ‎ ) ARABIC LETTER HIGH HAMZA YEH → ARABIC LETTER ALEF MAKSURA, ARABIC LETTER HIGH HAMZA # →‎يٴ‎→

FBEB ; 0649 0674 006C ; MA # ( ‎ﯫ‎ → ‎ىٴl‎ ) ARABIC LIGATURE YEH WITH HAMZA ABOVE WITH ALEF FINAL FORM → ARABIC LETTER ALEF MAKSURA, ARABIC LETTER HIGH HAMZA, LATIN SMALL LETTER L # →‎ئا‎→
FBEA ; 0649 0674 006C ; MA # ( ‎ﯪ‎ → ‎ىٴl‎ ) ARABIC LIGATURE YEH WITH HAMZA ABOVE WITH ALEF ISOLATED FORM → ARABIC LETTER ALEF MAKSURA, ARABIC LETTER HIGH HAMZA, LATIN SMALL LETTER L # →‎ئا‎→
Expand Down Expand Up @@ -7535,10 +7535,10 @@ FA7E ; 5944 ; MA # ( 奄 → 奄 ) CJK COMPATIBILITY IDEOGRAPH-FA7E → CJK UNIF

F90C ; 5948 ; MA # ( 奈 → 奈 ) CJK COMPATIBILITY IDEOGRAPH-F90C → CJK UNIFIED IDEOGRAPH-5948 #

F909 ; 5951 ; MA # ( 契 → 契 ) CJK COMPATIBILITY IDEOGRAPH-F909 → CJK UNIFIED IDEOGRAPH-5951 #

FA7F ; 5954 ; MA # ( 奔 → 奔 ) CJK COMPATIBILITY IDEOGRAPH-FA7F → CJK UNIFIED IDEOGRAPH-5954 #

F909 ; 5951 ; MA # ( 契 → 契 ) CJK COMPATIBILITY IDEOGRAPH-F909 → CJK UNIFIED IDEOGRAPH-5951 #

2F85F ; 5962 ; MA # ( 奢 → 奢 ) CJK COMPATIBILITY IDEOGRAPH-2F85F → CJK UNIFIED IDEOGRAPH-5962 #

F981 ; 5973 ; MA # ( 女 → 女 ) CJK COMPATIBILITY IDEOGRAPH-F981 → CJK UNIFIED IDEOGRAPH-5973 #
Expand Down
22 changes: 20 additions & 2 deletions Scripts/confusablesCustom/assets/custom_confusables.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,8 @@
0052 ; 027E ; # R → ɾ
0066 ; FF26 ; # f → F
0046 ; FF26 ; # F → F
0072 ; FF32 ; # r → R
0052 ; FF32 ; # R → R
0072 ; FF32 ; # r → R
0052 ; FF32 ; # R → R
0075 ; FF35 ; # u → U
0055 ; FF35 ; # U → U
0066 ; 0493 ; # f → ғ
Expand Down Expand Up @@ -58,3 +56,23 @@
0041 ; 1D43 ; # A → ᵃ
006D ; 1D50 ; # m → ᵐ
004D ; 1D50 ; # M → ᵐ
0068 ; 02B0 ; # h → ʰ
0048 ; 02B0 ; # H → ʰ
006A ; 02B2 ; # j → ʲ
004A ; 02B2 ; # J → ʲ
0077 ; 02B7 ; # w → ʷ
0057 ; 02B7 ; # W → ʷ
0079 ; 02B8 ; # y → ʸ
0059 ; 02B8 ; # Y → ʸ
0073 ; 02E2 ; # s → ˢ
0046 ; 1DA0 ; # F → ᶠ
0069 ; 1DA6 ; # i → ᶦ
0049 ; 1DA6 ; # I → ᶦ
006C ; 1DAB ; # l → ᶫ
004C ; 1DAB ; # L → ᶫ
006E ; 1DB0 ; # n → ᶰ
004E ; 1DB0 ; # N → ᶰ
0075 ; 1DB8 ; # u → ᶸ
0055 ; 1DB8 ; # U → ᶸ
007A ; 1DBB ; # z → ᶻ
005A ; 1DBB ; # Z → ᶻ
Loading

0 comments on commit 18cbe0c

Please sign in to comment.