Skip to content

Commit

Permalink
Upgrade to txtai 6.0, closes #70
Browse files Browse the repository at this point in the history
  • Loading branch information
davidmezzetti committed Sep 18, 2023
1 parent e9a3320 commit e4d6b81
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 16 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
"regex>=2020.5.14",
"rich>=12.0.1",
"text2digits>=0.1.0",
"txtai[api,similarity]>=4.3.1",
"txtai[api,similarity]>=6.0.0",
"txtmarker>=1.0.0",
],
extras_require=extras,
Expand Down
2 changes: 1 addition & 1 deletion src/python/paperai/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def embeddings(dbfile, vectors, maxsize):

# Read config and create Embeddings instance
embeddings = Embeddings(Index.config(vectors))
scoring = embeddings.scoring
scoring = embeddings.isweighted()

# Build scoring index if scoring method provided
if scoring:
Expand Down
2 changes: 1 addition & 1 deletion src/python/paperai/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def search(embeddings, cur, query, topn, threshold):
]

# Tokenize search query, if necessary
query = Tokenizer.tokenize(query) if embeddings.scoring else query
query = Tokenizer.tokenize(query) if embeddings.isweighted() else query

# Retrieve topn * 5 to account for duplicate matches
for result in embeddings.search(query, topn * 5):
Expand Down
3 changes: 1 addition & 2 deletions src/python/paperai/report/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def __init__(self, embeddings, db, options):

# Store references to embeddings index and open database cursor
self.embeddings = embeddings
self.scoring = self.embeddings.scoring

self.cur = db.cursor()

Expand Down Expand Up @@ -349,7 +348,7 @@ def sections(self, uid):
sections = []
for sid, name, text in self.cur.fetchall():
if (
not self.scoring
not self.embeddings.isweighted()
or not name
or not re.search(Index.SECTION_FILTER, name.lower())
or self.options.get("allsections")
Expand Down
22 changes: 11 additions & 11 deletions test/python/testreport.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ def testReport1(self):
Execute.run(Utils.PATH + "/report1.yml", 10, "md", Utils.PATH, None)

hashes = [
("Age.csv", "6a0c4326e5c3136ad1c46606e04e9f62"),
("Heart Disease.csv", "96b144fc1566e2c0aa774d098e203922"),
("Heart Failure.csv", "2e8da24e09b46f71870af9ce146ade76"),
("Report1.md", "3dadaf334251d8d9b8354cd7142c5ec9"),
("Age.csv", "237c2d024f758139833d681c0f7828aa"),
("Heart Disease.csv", "542003f5677b35b73e0b3fff07398789"),
("Heart Failure.csv", "a29c59d63d73c225102d74977efeac34"),
("Report1.md", "6b143c03c154cef1c6d38e0ac1a2eee9"),
]

# Check file hashes
Expand All @@ -45,11 +45,11 @@ def testReport2(self):
Execute.run(Utils.PATH + "/report2.yml", 10, "md", Utils.PATH, None)

hashes = [
("Match.csv", "c48e3205aac18e94bac915c9dbaacc89"),
("MatchSurround.csv", "df53a94064c138907d9dee9c3029c542"),
("Section.csv", "e3745af5f23041f5e68bdffa8abb2e56"),
("Surround.csv", "96814dd27a62c28e226cc7505fee9d80"),
("Report2.md", "45c37f77cdfe8e4f213d8b3b18726cee"),
("Match.csv", "4b186a90a7bc9aa7e9f65608acac3235"),
("MatchSurround.csv", "4eae0e75b75f7b7b158094de217362a5"),
("Section.csv", "32e6c53fc7c87595846e00db4fe4b1b8"),
("Surround.csv", "aa2add9eaf4a2e1b2b48753c2172772c"),
("Report2.md", "6133aa0ebcfa5a363e0e187daba8c1de"),
]

# Check file hashes
Expand All @@ -67,9 +67,9 @@ def testReport3(self):
Execute.run(Utils.PATH + "/report3.yml", 1, "ant", Utils.PATH, None, Utils.PATH)

hashes = [
("AI.csv", "94f0bead413eb71835c3f27881b29c91"),
("AI.csv", "858bfb2c0026de725cb06417e639237a"),
("All.csv", "3bca7a39a541fa68b3ef457625fb0120"),
("Report3.md", "0fc53703dace57e3403294fb8ea7e9d1"),
("Report3.md", "c4cf822444643effbb502baaf44d993c"),
]

# Check file hashes
Expand Down

0 comments on commit e4d6b81

Please sign in to comment.