Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for ujson (if available) and added /pagedjson page #53

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
============
Recoll WebUI
============
============================
Recoll WebUI for Recolldroid
============================

**This is an modified Version for use with the Recolldroid android app. It adds an feature to export only a "page" of results to json
and not all at once.**

**Recoll WebUI** is a Python-based web interface for **Recoll** text search
tool for Unix/Linux.
Expand Down
77 changes: 76 additions & 1 deletion webui.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,20 @@
import datetime
import glob
import hashlib
import json
import csv
import StringIO
import ConfigParser
import string
import shlex
import urllib

# use ujson if avalible (faster than built in json)
try:
import ujson as json
except ImportError:
import json
print("ujson module not found! Consider installing it for improved Json generating speed!")

# import recoll and rclextract
try:
from recoll import recoll
Expand Down Expand Up @@ -150,6 +157,16 @@ def get_query():
'page': int(select([bottle.request.query.get('page'), 0])),
}
return query

#}}}
#{{{ get_pages
def get_pages():
pages = {
'page': select([bottle.request.query.get('page'), 1]),
'items': select([bottle.request.query.get('items'), 50]),
}
return pages

#}}}
#{{{ query_to_recoll_string
def query_to_recoll_string(q):
Expand Down Expand Up @@ -309,6 +326,48 @@ def edit(resnum):
if pathismine:
os.unlink(path)
return f


#}}}
#{{{ recoll_search_pages
def recoll_search_page(q, item_per_page=50, page=1, dosnippets=True):
config = get_config()
tstart = datetime.datetime.now()
results = []
query = recoll_initsearch(q)
nres = query.rowcount

item_per_page = int(item_per_page)
page = int(page)

offset = (page - 1) * item_per_page
try:
query.scroll(offset, mode='absolute')
except:
pass

highlighter = HlMeths()
for i in range(item_per_page):
try:
doc = query.fetchone()
except:
break
d = {}
for f in FIELDS:
v = getattr(doc, f)
if v is not None:
d[f] = v.encode('utf-8')
else:
d[f] = ''
d['label'] = select([d['title'], d['filename'], '?'], [None, ''])
d['sha'] = hashlib.sha1(d['url']+d['ipath']).hexdigest()
d['time'] = timestr(d['mtime'], config['timefmt'])
if dosnippets:
d['snippet'] = query.makedocabstract(doc, highlighter).encode('utf-8')
results.append(d)
tend = datetime.datetime.now()
return results, nres, tend - tstart

#}}}
#{{{ json
@bottle.route('/json')
Expand All @@ -321,6 +380,22 @@ def get_json():
res, nres, timer = recoll_search(query)

return json.dumps({ 'query': query, 'results': res })


#}}}
#{{{ pagedjson
@bottle.route('/pagedjson')
def get_page_json():
query = get_query()
pages = get_pages()
query['page'] = pages['page']
qs = query_to_recoll_string(query)
bottle.response.headers['Content-Type'] = 'application/json'
bottle.response.headers['Content-Disposition'] = 'attachment; filename=recoll-%s.json' % normalise_filename(qs)
res, nres, timer = recoll_search_page(query,pages['items'],pages['page'])

return json.dumps({ 'query': query, 'results': res })

#}}}
#{{{ csv
@bottle.route('/csv')
Expand Down