From de39576c075c4402b2ef0fb2ebc1ec08be629d68 Mon Sep 17 00:00:00 2001 From: Kirtan Sakariya Date: Wed, 3 Oct 2018 22:05:40 -0400 Subject: [PATCH 01/21] done with skeleton for asciimatics --- topicexplorer/prep.py | 708 +++++++++++++++++++++++++++++++----------- 1 file changed, 525 insertions(+), 183 deletions(-) diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py index 26947c9a..c3430990 100644 --- a/topicexplorer/prep.py +++ b/topicexplorer/prep.py @@ -130,6 +130,13 @@ import topicexplorer.config from topicexplorer.lib.util import isint, is_valid_configfile, bool_prompt +from asciimatics.widgets import Frame, ListBox, Layout, Divider, Text, \ + Button, TextBox, Widget, Label +from asciimatics.scene import Scene +from asciimatics.screen import Screen +from asciimatics.exceptions import ResizeScreenError, NextScene, StopApplication +from copy import deepcopy + # NLTK Langauges langs = dict(da='danish', nl='dutch', en='english', fi='finnish', fr='french', de='german', hu='hungarian', it='italian', no='norwegian', @@ -282,14 +289,14 @@ def get_closest_bin(c, thresh, reverse=False, counts=None): return counts[min(np.searchsorted(cumsum, thresh), len(counts)-1)] -def get_high_filter(c, words=None, items=None, counts=None): +def get_high_filter_chart(c, words=None, items=None, counts=None, num=None): import numpy as np header = "FILTER HIGH FREQUENCY WORDS" stars = old_div((80 - len(header) - 2), 2) - print("\n\n{0} {1} {0}".format('*' * stars, header)) - print(" This will remove all words occurring N or more times.") - print(" The histogram below shows how many words will be removed") - print(" by selecting each maximum frequency threshold.\n") + # print("\n\n{0} {1} {0}".format('*' * stars, header)) + # print(" This will remove all words occurring N or more times.") + # print(" The histogram below shows how many words will be removed") + # print(" by selecting each maximum frequency threshold.\n") # Get frequency bins if items is None or counts is None: @@ -299,79 +306,118 @@ def get_high_filter(c, words=None, items=None, counts=None): bins = sorted(set(bins)) bins.append(max(counts)) - high_filter = False - while not high_filter: - bin_counts, bins = np.histogram(counts, bins=bins) - print("{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Top', '% of corpus', - "# words", "Rate")) - last_row = 0 - for bin, count in zip(bins[-2::-1], np.cumsum(bin_counts[::-1])): - filtered_counts = counts[get_mask(c, words)] - if (filtered_counts >= bin).sum() > last_row: - percentage = 1. - (old_div(counts[counts < bin].sum(), float(c.original_length))) - print("{0:>5.0f}x".format(bin).rjust(8), end=' ') - print('{0:2.1f}%'.format(percentage * 100).rjust(8), end=' ') - print((u'\u2588' * int(percentage * 36)).ljust(36), end=' ') - print(" {0:0.0f} words".format((filtered_counts >= bin).sum()).rjust(14), end=' ') - print(">= {0:>5.0f}x".format(bin).ljust(8)) - - last_row = (filtered_counts >= bin).sum() - - print(' ' * 17, "{} total occurrences".format(counts.sum()).ljust(36), end=' ') - print('{} words total'.format(get_mask(c, words).sum()).rjust(20)) - print('') + try: + num = int(num) + except: + # TODO: show invalid num screen + num = "str" + + ret = "" + + # do input validation here + high_filter = False + # while not high_filter: + bin_counts, bins = np.histogram(counts, bins=bins) + # print("{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Top', '% of corpus', + # "# words", "Rate")) + ret += "{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Top', '% of corpus', "# words", "Rate") + "\n" + last_row = 0 + for bin, count in zip(bins[-2::-1], np.cumsum(bin_counts[::-1])): + filtered_counts = counts[get_mask(c, words)] + if (filtered_counts >= bin).sum() > last_row: + percentage = 1. - (old_div(counts[counts < bin].sum(), float(c.original_length))) + # print("{0:>5.0f}x".format(bin).rjust(8), end=' ') + # print('{0:2.1f}%'.format(percentage * 100).rjust(8), end=' ') + # print((u'\u2588' * int(percentage * 36)).ljust(36), end=' ') + # print(" {0:0.0f} words".format((filtered_counts >= bin).sum()).rjust(14), end=' ') + # print(">= {0:>5.0f}x".format(bin).ljust(8)) + ret += "{0:>5.0f}x".format(bin).rjust(8) + ret += '{0:2.1f}% '.format(percentage * 100).rjust(10) + ret += (u'\u2588' * int(percentage * 36)).ljust(36) + ret += "{0:0.0f} words".format((filtered_counts >= bin).sum()).rjust(15) + ret += " >={0:>5.0f}x".format(bin).ljust(8) + "\n" + + last_row = (filtered_counts >= bin).sum() + + # return ret + # print(' ' * 17, "{} total occurrences".format(counts.sum()).ljust(36), end=' ') + # print('{} words total'.format(get_mask(c, words).sum()).rjust(20)) + # print('') + ret += (' ' * 18) + "{} total occurrences".format(counts.sum()).ljust(37) + ret += '{} words total'.format(get_mask(c, words).sum()).rjust(20) + '\n' + # ret += str(type(num)) + " " + str(num) + return ret + +def get_high_filter_stops(c, words=None, items=None, counts=None, num=None): + import numpy as np + try: + num = int(num) + except: + # TODO: show invalid num screen + num = "str" + return + input_filter = num + accept = None + # while not input_filter or input_filter <= 0: + try: + # if high_filter: + # input_filter = high_filter + # else: + # input_filter = int(input("Enter the maximum rate: ").replace('x', '')) + candidates = get_candidate_words(c, input_filter, words=words, items=items, counts=counts) + places = np.in1d(c.words, candidates) + places = dict(zip(candidates, np.where(places)[0])) + candidates = sorted(candidates, key=lambda x: counts[places[x]], reverse=True) + filtered_counts = counts[get_mask(c, words)] + + # print("Filter will remove", filtered_counts[filtered_counts >= input_filter].sum(), end=' ') + # print("occurrences", "of these", len(filtered_counts[filtered_counts >= input_filter]), "words:") + # print(u' '.join(candidates)) + filtered = "" + filtered += "Filter will remove " + str(filtered_counts[filtered_counts >= input_filter].sum()) + filtered += " occurrences " + "of these " + str(len(filtered_counts[filtered_counts >= input_filter])) + " words: " + filtered += u' '.join(candidates) + + # print("\nFilter will remove", filtered_counts[filtered_counts >= input_filter].sum(), end=' ') + # print("occurrences", "of these", len(filtered_counts[filtered_counts >= input_filter]), "words.", end=' ') + + # filtered += "\nFilter will remove " + str(filtered_counts[filtered_counts >= input_filter].sum()) + # filtered += " occurrences " + " of these " + str(len(filtered_counts[filtered_counts >= input_filter])) + " words." + + if len(candidates) == len(c.words): + # print("\n\nChoice of", input_filter, "will remove ALL words from the corpus.") + # print("Please choose a different filter.") + filtered += "\n\nChoice of" + str(input_filter) + "will remove ALL words from the corpus." + filtered += "Please choose a different filter." + # high_filter = 0 + # input_filter = 0 + # else: + # accept = None + # while accept not in ['y', 'n']: + # accept = input("\nAccept filter? [y/n/[different max number]] ") + # if isint(accept): + # high_filter = int(accept) + # input_filter = 0 + # accept = 'n' + # elif accept == 'y': + # high_filter = input_filter + # elif accept == 'n': + # high_filter = 0 + + except ValueError: input_filter = 0 - accept = None - while not input_filter or input_filter <= 0: - try: - if high_filter: - input_filter = high_filter - else: - input_filter = int(input("Enter the maximum rate: ").replace('x', '')) - candidates = get_candidate_words(c, input_filter, words=words, items=items, counts=counts) - places = np.in1d(c.words, candidates) - places = dict(zip(candidates, np.where(places)[0])) - candidates = sorted(candidates, key=lambda x: counts[places[x]], reverse=True) - filtered_counts = counts[get_mask(c, words)] - - print("Filter will remove", filtered_counts[filtered_counts >= input_filter].sum(), end=' ') - print("occurrences", "of these", len(filtered_counts[filtered_counts >= input_filter]), "words:") - print(u' '.join(candidates)) - - print("\nFilter will remove", filtered_counts[filtered_counts >= input_filter].sum(), end=' ') - print("occurrences", "of these", len(filtered_counts[filtered_counts >= input_filter]), "words.", end=' ') - if len(candidates) == len(c.words): - print("\n\nChoice of", input_filter, "will remove ALL words from the corpus.") - print("Please choose a different filter.") - high_filter = 0 - input_filter = 0 - else: - accept = None - while accept not in ['y', 'n']: - accept = input("\nAccept filter? [y/n/[different max number]] ") - if isint(accept): - high_filter = int(accept) - input_filter = 0 - accept = 'n' - elif accept == 'y': - high_filter = input_filter - elif accept == 'n': - high_filter = 0 - - except ValueError: - input_filter = 0 - return (high_filter, candidates) - - -def get_low_filter(c, words=None, items=None, counts=None): + return (candidates, filtered) + + +def get_low_filter_chart(c, words=None, items=None, counts=None, num=None): import numpy as np header = "FILTER LOW FREQUENCY WORDS" stars = old_div((80 - len(header) - 2), 2) - print("\n\n{0} {1} {0}".format('*' * stars, header)) - print(" This will remove all words occurring less than N times.") - print(" The histogram below shows how many words will be removed") - print(" by selecting each minimum frequency threshold.\n") + # print("\n\n{0} {1} {0}".format('*' * stars, header)) + # print(" This will remove all words occurring less than N times.") + # print(" The histogram below shows how many words will be removed") + # print(" by selecting each minimum frequency threshold.\n") # Get frequency bins if items is None or counts is None: @@ -381,78 +427,332 @@ def get_low_filter(c, words=None, items=None, counts=None): bins = sorted(set(bins)) bins.append(max(counts)) - low_filter = False - while low_filter is False: - bin_counts, bins = np.histogram(counts[counts.argsort()[::-1]], bins=bins) - # print "{0:>10s} {1:>10s}".format("# Tokens", "# Words") - print("{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Bottom', '% of corpus', - "# words", "Rate")) - - last_row = 0 - for bin, count in zip(bins, np.cumsum(bin_counts)): - filtered_counts = counts[get_mask(c, words)] - if last_row < (filtered_counts < bin).sum() <= len(filtered_counts): - percentage = (old_div(counts[counts <= bin].sum(), float(c.original_length))) - print("{0:>5.0f}x".format(bin).rjust(8), end=' ') - print('{0:2.1f}%'.format(percentage * 100).rjust(8), end=' ') - print((u'\u2588' * int(percentage * 36)).ljust(36), end=' ') - print(" {0:0.0f} words".format((filtered_counts <= bin).sum()).rjust(14), end=' ') - print("<= {0:>5.0f}x".format(bin).ljust(8)) - if (filtered_counts < bin).sum() == len(filtered_counts): - break - last_row = (filtered_counts >= bin).sum() - - - print(' ' * 17, "{} total occurrences".format(counts.sum()).ljust(36), end=' ') - print('{} words total'.format(get_mask(c, words).sum()).rjust(20)) - print('') + try: + num = int(num) + except: + # TODO: show invalid num screen + num = "str" + + ret = "" + low_filter = False + # while low_filter is False: + bin_counts, bins = np.histogram(counts[counts.argsort()[::-1]], bins=bins) + # print "{0:>10s} {1:>10s}".format("# Tokens", "# Words") + print("{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Bottom', '% of corpus', + "# words", "Rate")) + ret += "{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Bottom', '% of corpus', "# words", "Rate") + "\n" + last_row = 0 + for bin, count in zip(bins, np.cumsum(bin_counts)): + filtered_counts = counts[get_mask(c, words)] + if last_row < (filtered_counts < bin).sum() <= len(filtered_counts): + percentage = (old_div(counts[counts <= bin].sum(), float(c.original_length))) + # print("{0:>5.0f}x".format(bin).rjust(8), end=' ') + # print('{0:2.1f}%'.format(percentage * 100).rjust(8), end=' ') + # print((u'\u2588' * int(percentage * 36)).ljust(36), end=' ') + # print(" {0:0.0f} words".format((filtered_counts <= bin).sum()).rjust(14), end=' ') + # print("<= {0:>5.0f}x".format(bin).ljust(8)) + ret += "{0:>5.0f}x".format(bin).rjust(8) + ret += '{0:2.1f}%'.format(percentage * 100).rjust(9) + ret += " " + (u'\u2588' * int(percentage * 36)).ljust(36) + ret += "{0:0.0f} words".format((filtered_counts <= bin).sum()).rjust(15) + ret += " <={0:>5.0f}x".format(bin).ljust(8) + "\n" + if (filtered_counts < bin).sum() == len(filtered_counts): + break + last_row = (filtered_counts >= bin).sum() + + + # print(' ' * 17, "{} total occurrences".format(counts.sum()).ljust(36), end=' ') + # print('{} words total'.format(get_mask(c, words).sum()).rjust(20)) + # print('') + ret += (' ' * 18) + "{} total occurrences".format(counts.sum()).ljust(37) + ret += '{} words total'.format(get_mask(c, words).sum()).rjust(20) + '\n' + return ret + +def get_low_filter_stops(c, words=None, items=None, counts=None, num=None): + import numpy as np + try: + num = int(num) + except: + # TODO: show invalid num screen + num = "str" + return + input_filter = num + accept = None + # while not input_filter or input_filter <= 0: + try: + # if low_filter: + # input_filter = low_filter + # else: + # input_filter = int(input("Enter the minimum rate: ").replace('x', '')) + + candidates = get_candidate_words(c, -input_filter, words=words, items=items, counts=counts) + places = np.in1d(c.words, candidates) + places = dict(zip(candidates, np.where(places)[0])) + candidates = sorted(candidates, key=lambda x: counts[places[x]]) + filtered_counts = counts[get_mask(c, words)] + + # print("Filter will remove", filtered_counts[filtered_counts <= input_filter].sum(), "tokens", end=' ') + # print("of these", len(filtered_counts[filtered_counts <= input_filter]), "words:") + # print(u' '.join(candidates)) + filtered = "" + filtered += "Filter will remove " + str(filtered_counts[filtered_counts <= input_filter].sum()) + " tokens" + filtered += "of these " + str(len(filtered_counts[filtered_counts <= input_filter])) + " words: " + filtered += u' '.join(candidates) + + # print("\nFilter will remove", filtered_counts[filtered_counts <= input_filter].sum(), "tokens", end=' ') + # print("of these", len(filtered_counts[filtered_counts <= input_filter]), "words.", end=' ') + + if len(candidates) == len(c.words): + # print("\n\nChoice of", input_filter, "will remove ALL words from the corpus.") + # print("Please choose a different filter.") + filtered += "\n\nChoice of" + input_filter + "will remove ALL words from the corpus." + filtered += "Please choose a different filter." + # low_filter = 0 + # input_filter = 0 + # else: + # accept = None + # while accept not in ['y', 'n']: + # accept = input("\nAccept filter? [y/n/[different min. number] ") + # if isint(accept): + # low_filter = int(accept) + # input_filter = 0 + # accept = 'n' + # elif accept == 'y': + # low_filter = input_filter + # elif accept == 'n': + # low_filter = False + + except ValueError: input_filter = 0 - accept = None - while not input_filter or input_filter <= 0: - try: - if low_filter: - input_filter = low_filter - else: - input_filter = int(input("Enter the minimum rate: ").replace('x', '')) - - candidates = get_candidate_words(c, -input_filter, words=words, items=items, counts=counts) - places = np.in1d(c.words, candidates) - places = dict(zip(candidates, np.where(places)[0])) - candidates = sorted(candidates, key=lambda x: counts[places[x]]) - filtered_counts = counts[get_mask(c, words)] - - print("Filter will remove", filtered_counts[filtered_counts <= input_filter].sum(), "tokens", end=' ') - print("of these", len(filtered_counts[filtered_counts <= input_filter]), "words:") - print(u' '.join(candidates)) - - print("\nFilter will remove", filtered_counts[filtered_counts <= input_filter].sum(), "tokens", end=' ') - print("of these", len(filtered_counts[filtered_counts <= input_filter]), "words.", end=' ') - - if len(candidates) == len(c.words): - print("\n\nChoice of", input_filter, "will remove ALL words from the corpus.") - print("Please choose a different filter.") - low_filter = 0 - input_filter = 0 - else: - accept = None - while accept not in ['y', 'n']: - accept = input("\nAccept filter? [y/n/[different min. number] ") - if isint(accept): - low_filter = int(accept) - input_filter = 0 - accept = 'n' - elif accept == 'y': - low_filter = input_filter - elif accept == 'n': - low_filter = False - - except ValueError: - input_filter = 0 - - return (low_filter, candidates) + + return (candidates, filtered) + +class PrepData(Frame): + def __init__(self): + # super(PrepData, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True, + # title="null", reduce_cpu=True) + self.label = Label("change this") + self.lang = Label("haha") + self.summaryHigh = Text("High frequency word filter (#):", "summaryHighFreq") + self.high = Text("High frequency word filter (#):", "highFreq") + self.highLabel = Label("high label", height=35) + self.highFiltered = Label("filtered", height = 10) + self.highCandidates = [] + self.summaryLow = Text("Low frequency word filter (#)", "summaryLowFreq") + self.low = Text("Low frequency word filter (#)", "lowFreq") + self.lowLabel = Label("low label", height=35) + self.lowCandidates = [] + self.counter = 0 + # self.high.value("hello") + + def update_lang(self, l): + self.lang = l + + def setHigh(self): + # use this to change values of other elements + self.high._value = "hello" + # self._data["highFreq"] = val + +class Summary(Frame): + def __init__(self, screen): + super(Summary, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True, + title="Summary", reduce_cpu=True) + + global data + + # f = open("prep.txt", "a") + # f.write("Summary init") + layout = Layout([100], fill_frame=True) + self.add_layout(layout) + # layout.add_widget(Text("High frequency word filter (%):", "highFreq")) + layout.add_widget(data.summaryHigh) + # layout.add_widget(Text("Low frequency word filter (%): ", "lowFreq")) + layout.add_widget(data.summaryLow) + layout.add_widget(Text("Language-specific stopwords: ", "lang")) + layout.add_widget(Text("Minimum word length: ", "length")) + layout.add_widget(Label("need to add original corpus size")) + layout.add_widget(Label("need to add prepped corpus size")) + layout2 = Layout([1, 1, 1, 1, 1]) + self.add_layout(layout2) + layout2.add_widget(Button("prep", self._prep), 0) + layout2.add_widget(Button("high", self._high), 1) + layout2.add_widget(Button("low", self._low), 2) + layout2.add_widget(Button("lang", self._lang), 3) + layout2.add_widget(Button("exit", self._exit), 4) + self.fix() + + # proceeds to scene with chart that displays with current settings + def _prep(self): + self.save() + raise StopApplication("Quitting") + + def _high(self): + self.save() + global data + data.high._value = data.summaryHigh.value + data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + num=data.summaryHigh.value) + # args.high_filter, candidates, data.highLabel.text, data.highFiltered.text = get_high_filter(data.c, + # words=data.stoplist, items=data.items, counts=data.counts, num=data.summaryHigh.value) + + # TODO dont stoplist yet (do it at the end) + temp = deepcopy(data.stoplist) + temp.update(data.highCandidates) + temp.update(data.lowCandidates) # should I do this? + data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + num=data.summaryHigh.value) + data.highLabel.text += filtered + # data.highLabel.text = get_high_filter(data.c, words=data.stoplist, items=data.items, counts=data.counts, + # num=data.summaryHigh.value) + # data.highLabel.text += str(data.counter) + # data.counter = data.counter + 1 + raise NextScene("High Freq") + + def _low(self): + self.save() + global data + data.low._value = data.summaryLow.value + data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + num=data.summaryLow.value) + + # TODO dont stoplist yet (do it at the end) + temp = deepcopy(data.stoplist) + temp.update(data.highCandidates) + temp.update(data.lowCandidates) + data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + num=data.summaryLow.value) + data.lowLabel.text += filtered + # data.lowLabel.text += str(data.counter) + # data.counter = data.counter + 1 + raise NextScene("Low Freq") + + def _lang(self): + self.save() + raise NextScene("Lang") + + # exits without prepping + # @staticmethod + def _exit(self): + # self._screen.close() + raise StopApplication("Quitting") + +class HighFreq(Frame): + def __init__(self, screen): + super(HighFreq, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True, + title="High Frequency Word Filter", reduce_cpu=True) + + # self._data = data + global data + + layout = Layout([100], fill_frame=True) + self.add_layout(layout) + layout.add_widget(data.highLabel) + # layout.add_widget(Text("High Freq Filter (%)", "highFreq")) + layout.add_widget(data.high) + layout2 = Layout([1, 1]) + self.add_layout(layout2) + layout2.add_widget(Button("Ok", self._ok), 0) + layout2.add_widget(Button("Update", self._change), 1) + self.fix() + + def _ok(self): + self.save() + global data + data.summaryHigh._value = data.high.value + + data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + num=data.high.value) + # args.high_filter, candidates, data.highLabel.text, data.highFiltered.text = get_high_filter(data.c, + # words=data.stoplist, items=data.items, counts=data.counts, num=data.summaryHigh.value) + + # TODO dont stoplist yet (do it at the end) + temp = deepcopy(data.stoplist) + temp.update(data.highCandidates) + temp.update(data.lowCandidates) # should I do this? + data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + num=data.high.value) + data.highLabel.text += filtered + # data.highLabel.text = get_high_filter(data.c, words=data.stoplist, items=data.items, counts=data.counts, + # num=data.summaryHigh.value) + # data.highLabel.text += str(data.counter) + # data.counter = data.counter + 1 + raise NextScene("Summary") + + def _change(self): + self.save() + global data + data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + num=data.high.value) + # args.high_filter, candidates, data.highLabel.text, data.highFiltered.text = get_high_filter(data.c, + # words=data.stoplist, items=data.items, counts=data.counts, num=data.summaryHigh.value) + + # TODO dont stoplist yet (do it at the end) + temp = deepcopy(data.stoplist) + temp.update(data.highCandidates) + temp.update(data.lowCandidates) # should I do this? + data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + num=data.high.value) + data.highLabel.text += filtered + # data.highLabel.text = get_high_filter(data.c, words=data.stoplist, items=data.items, counts=data.counts, + # num=data.summaryHigh.value) + # data.highLabel.text += str(data.counter) + # data.counter = data.counter + 1 + +class LowFreq(Frame): + def __init__(self, screen): + super(LowFreq, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True, + title="Low Frequency Word Filter", reduce_cpu=True) + + # self._data = data + global data + + layout = Layout([100], fill_frame=True) + self.add_layout(layout) + layout.add_widget(data.lowLabel) + # layout.add_widget(Text("Low Freq Filter (%)", "lowFreq")) + layout.add_widget(data.low) + layout2 = Layout([1, 1]) + self.add_layout(layout2) + layout2.add_widget(Button("Ok", self._ok), 0) + layout2.add_widget(Button("Update", self._change), 1) + self.fix() + + def _ok(self): + self.save() + raise NextScene("Summary") + + def _change(self): + self.save() + global data + data.summaryLow._value = data.low + +class Lang(Frame): + def __init__(self, screen): + super(Lang, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True, + title="Lang", reduce_cpu=True) + + # f = open("prep.txt", "a") + # f.write("Lang init") + layout = Layout([100], fill_frame=True) + self.add_layout(layout) + layout.add_widget(Text(label="Language-specific stopwords: ", name="lang")) + layout.add_widget(Label("hello")) + layout2 = Layout([1, 1, 1]) + self.add_layout(layout2) + layout2.add_widget(Button("Ok", self._ok), 0) + self.fix() + + # proceeds to scene with chart that displays with current settings + def _ok(self): + self.save() + raise NextScene("Summary") def main(args): + global data + data = PrepData() + print("IN MAINNNNNNNNNNNNNNNNN") + config = topicexplorer.config.read(args.config_file) if config.getboolean("main", "sentences"): @@ -464,9 +764,9 @@ def main(args): args.lang = [] args.corpus_path = config.get("main", "corpus_file") - c = Corpus.load(args.corpus_path) + data.c = Corpus.load(args.corpus_path) - if c.original_length != len(c.corpus): + if data.c.original_length != len(data.c.corpus): print("Corpus has already been prepared. Proceed to training or") print("re-init the corpus to apply a different set of stopwords.") print("\nTIP: Train the LDA models with:") @@ -480,6 +780,8 @@ def main(args): args.lang.extend(new_langs) """ + # NEXT 2 IF AND THE FOR ARE FOR LANG (PUT THEM IN LANG SCENE) + # add default locale if no other languages are specified # do not add if in quiet mode -- make everything explicit if not args.lang and not args.quiet: @@ -489,19 +791,21 @@ def main(args): args.lang.append(locale) # check for any new candidates - args.lang = [lang for lang in args.lang if stop_language(c, langs[lang])] + args.lang = [lang for lang in args.lang if stop_language(data.c, langs[lang])] if args.lang and not args.quiet: args.lang = lang_prompt(args.lang) - stoplist = set() + data.stoplist = set() # Apply stop words print(" ") for lang in args.lang: print("Applying", langs[lang], "stopwords") - candidates = stop_language(c, langs[lang]) + candidates = stop_language(data.c, langs[lang]) if len(candidates): - stoplist.update(candidates) + data.stoplist.update(candidates) + # DO THIS AUTOMATICALLY, NOT NEED FOR SCENE, MAYBE HAVE SOME SORT OF INFO SCENE TO DISPLAY THIS INFO IN + # Apply custom stopwords file if args.stopword_file: with open(args.stopword_file, encoding='utf8') as swf: @@ -511,76 +815,112 @@ def main(args): if len(candidates): print("Applying custom stopword file to remove {} word{}.".format( len(candidates), 's' if len(candidates) > 1 else '')) - stoplist.update(candidates) + data.stoplist.update(candidates) + # DO THIS AUTOMATICALLY BASED OFF ARGS + if args.min_word_len: - candidates = get_small_words(c, args.min_word_len) + candidates = get_small_words(data.c, args.min_word_len) if len(candidates): print("Filtering {} small word{} with less than {} characters.".format( len(candidates), 's' if len(candidates) > 1 else '', args.min_word_len)) - stoplist.update(candidates) + data.stoplist.update(candidates) + # DO THIS AUTOMATICALLY BASED OFF ARGS, NOT THE FIRST IF + # TODO TEST USUAL BEHAVIOR # cache item counts - items, counts = get_corpus_counts(c) - if args.high_filter is None and args.high_percent is None and not args.quiet: - args.high_filter, candidates = get_high_filter(c, words=stoplist, items=items, counts=counts) - if len(candidates): - print("Filtering {} high frequency word{}.".format(len(candidates), - 's' if len(candidates) > 1 else '')) - stoplist.update(candidates) - elif args.high_filter is None and args.high_percent is None and args.quiet: + data.items, data.counts = get_corpus_counts(data.c) + # if args.high_filter is None and args.high_percent is None and not args.quiet: + # args.high_filter, candidates = get_high_filter(c, words=stoplist, items=items, counts=counts) + # if len(candidates): + # print("Filtering {} high frequency word{}.".format(len(candidates), + # 's' if len(candidates) > 1 else '')) + # stoplist.update(candidates) + # elif args.high_filter is None and args.high_percent is None and args.quiet: + if args.high_filter is None and args.high_percent is None and args.quiet: pass elif args.high_filter: - candidates = get_candidate_words(c, args.high_filter, sort=False, items=items, counts=counts) + candidates = get_candidate_words(data.c, args.high_filter, sort=False, items=data.items, counts=data.counts) if len(candidates): print("Filtering {} high frequency word{}.".format(len(candidates), 's' if len(candidates) > 1 else '')) - stoplist.update(candidates) + data.stoplist.update(candidates) elif args.high_percent: - args.high_filter = get_closest_bin(c, 1 - (args.high_percent / 100.), counts=counts) + args.high_filter = get_closest_bin(data.c, 1 - (args.high_percent / 100.), counts=data.counts) print(args.high_filter) - candidates = get_candidate_words(c, args.high_filter, sort=False, items=items, counts=counts) + candidates = get_candidate_words(data.c, args.high_filter, sort=False, items=data.items, counts=data.counts) if len(candidates): print("Filtering {} high frequency word{}.".format(len(candidates), 's' if len(candidates) > 1 else '')) - stoplist.update(candidates) + data.stoplist.update(candidates) - if args.low_filter is None and args.low_percent is None and not args.quiet: - args.low_filter, candidates = get_low_filter(c, words=stoplist, items=items, counts=counts) - if len(candidates): - print("Filtering {} low frequency word{}.".format(len(candidates), - 's' if len(candidates) > 1 else '')) - stoplist.update(candidates) - elif args.low_filter is None and args.low_percent is None and args.quiet: + # DO THIS AUTOMATICALLY BASE OFF ARGS, NOT THE FIRST IF + # TODO TEST USUAL BEHAVIOR + + # if args.low_filter is None and args.low_percent is None and not args.quiet: + # args.low_filter, candidates = get_low_filter(c, words=stoplist, items=items, counts=counts) + # if len(candidates): + # print("Filtering {} low frequency word{}.".format(len(candidates), + # 's' if len(candidates) > 1 else '')) + # stoplist.update(candidates) + # elif args.low_filter is None and args.low_percent is None and args.quiet: + if args.low_filter is None and args.low_percent is None and args.quiet: pass elif args.low_filter: - candidates = get_candidate_words(c, -1 * args.low_filter, sort=False, items=items, counts=counts) + candidates = get_candidate_words(data.c, -1 * args.low_filter, sort=False, items=data.items, counts=data.counts) if len(candidates): print("Filtering {} low frequency words.".format(len(candidates))) - stoplist.update(candidates) + data.stoplist.update(candidates) elif args.low_percent: - args.low_filter = get_closest_bin(c, 1 - (args.low_percent / 100.), reverse=True, counts=counts) + args.low_filter = get_closest_bin(data.c, 1 - (args.low_percent / 100.), reverse=True, counts=data.counts) print(args.low_filter) - candidates = get_candidate_words(c, -1 * args.low_filter, sort=False, items=items, counts=counts) + candidates = get_candidate_words(data.c, -1 * args.low_filter, sort=False, items=data.items, counts=data.counts) if len(candidates): print("Filtering {} low frequency word{}.".format(len(candidates), 's' if len(candidates) > 1 else '')) - stoplist.update(candidates) - - - - if not stoplist: + data.stoplist.update(candidates) + + def gui(screen, scene): + scenes = [ + Scene([Summary(screen)], -1, name="Summary"), + Scene([HighFreq(screen)], -1, name="High Freq"), + Scene([LowFreq(screen)], -1, name="Low Freq"), + Scene([Lang(screen)], -1, name="Lang") + ] + screen.play(scenes, stop_on_resize=True, start_scene=scene) + + last_scene = None + # global data = PrepData() + while True: + try: + Screen.wrapper(gui, catch_interrupt=True, arguments=[last_scene]) + break + # sys.exit(0) + except ResizeScreenError as e: + last_scene = e.scene + + # DO THIS WHEN PREPPING MAYBE? THE EXIT PORTION + # TODO TEST WHEN THIS HAPPENS, PUT IN SCREEN AFTER PREP + + print("out of the loop") + + data.stoplist.update(data.highCandidates) + data.stoplist.update(data.lowCandidates) + + if not data.stoplist: print("No stopwords applied.\n\n") sys.exit(0) else: - print("\n\nApplying {} stopword{}".format(len(stoplist), - 's' if len(stoplist) > 1 else '')) - c.in_place_stoplist(stoplist) + print("\n\nApplying {} stopword{}".format(len(data.stoplist), + 's' if len(data.stoplist) > 1 else '')) + data.c.in_place_stoplist(data.stoplist) print("\n") + # LEAVE THE REST, TILL THE END OF THIS METHOD AS IS + def name_corpus(dirname, languages, lowfreq=None, highfreq=None): corpus_name = [dirname] @@ -602,7 +942,7 @@ def name_corpus(dirname, languages, lowfreq=None, highfreq=None): model_path = os.path.dirname(args.corpus_path) args.corpus_path = os.path.join(model_path, corpus_name) - c.save(args.corpus_path) + data.c.save(args.corpus_path) config.set("main", "corpus_file", args.corpus_path) config.remove_option("main", "model_pattern") @@ -653,3 +993,5 @@ def populate_parser(parser): args = parser.parse_args() main(args) + +data = "" \ No newline at end of file From 79f7db48919f652115f76a8c2875c462ec1a86d9 Mon Sep 17 00:00:00 2001 From: Kirtan Sakariya Date: Mon, 12 Nov 2018 13:13:50 -0500 Subject: [PATCH 02/21] copy before removing comments --- topicexplorer/prep.py | 751 +++++++++++++++++++++++++++++++++++------- 1 file changed, 630 insertions(+), 121 deletions(-) diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py index c3430990..9606bb7f 100644 --- a/topicexplorer/prep.py +++ b/topicexplorer/prep.py @@ -131,7 +131,7 @@ from topicexplorer.lib.util import isint, is_valid_configfile, bool_prompt from asciimatics.widgets import Frame, ListBox, Layout, Divider, Text, \ - Button, TextBox, Widget, Label + Button, TextBox, Widget, Label, PopUpDialog, PopupMenu, CheckBox from asciimatics.scene import Scene from asciimatics.screen import Screen from asciimatics.exceptions import ResizeScreenError, NextScene, StopApplication @@ -277,7 +277,7 @@ def get_closest_bin(c, thresh, reverse=False, counts=None): if thresh == 0 and reverse: return max(counts) + 1 elif thresh == 0 and not reverse: - return 0 + return 1 else: # sort counts counts = counts[counts.argsort()] @@ -306,11 +306,11 @@ def get_high_filter_chart(c, words=None, items=None, counts=None, num=None): bins = sorted(set(bins)) bins.append(max(counts)) - try: - num = int(num) - except: - # TODO: show invalid num screen - num = "str" + # try: + # num = int(num) + # except: + # # TODO: show invalid num screen + # num = "str" ret = "" @@ -351,12 +351,12 @@ def get_high_filter_chart(c, words=None, items=None, counts=None, num=None): def get_high_filter_stops(c, words=None, items=None, counts=None, num=None): import numpy as np - try: - num = int(num) - except: - # TODO: show invalid num screen - num = "str" - return + # try: + # num = int(num) + # except: + # # TODO: show invalid num screen + # num = "str" + # return input_filter = num accept = None # while not input_filter or input_filter <= 0: @@ -427,11 +427,11 @@ def get_low_filter_chart(c, words=None, items=None, counts=None, num=None): bins = sorted(set(bins)) bins.append(max(counts)) - try: - num = int(num) - except: - # TODO: show invalid num screen - num = "str" + # try: + # num = int(num) + # except: + # # TODO: show invalid num screen + # num = "str" ret = "" @@ -439,8 +439,8 @@ def get_low_filter_chart(c, words=None, items=None, counts=None, num=None): # while low_filter is False: bin_counts, bins = np.histogram(counts[counts.argsort()[::-1]], bins=bins) # print "{0:>10s} {1:>10s}".format("# Tokens", "# Words") - print("{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Bottom', '% of corpus', - "# words", "Rate")) + # print("{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Bottom', '% of corpus', + # "# words", "Rate")) ret += "{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Bottom', '% of corpus', "# words", "Rate") + "\n" last_row = 0 for bin, count in zip(bins, np.cumsum(bin_counts)): @@ -471,12 +471,12 @@ def get_low_filter_chart(c, words=None, items=None, counts=None, num=None): def get_low_filter_stops(c, words=None, items=None, counts=None, num=None): import numpy as np - try: - num = int(num) - except: - # TODO: show invalid num screen - num = "str" - return + # try: + # num = int(num) + # except: + # # TODO: show invalid num screen + # num = "str" + # return input_filter = num accept = None # while not input_filter or input_filter <= 0: @@ -506,7 +506,7 @@ def get_low_filter_stops(c, words=None, items=None, counts=None, num=None): if len(candidates) == len(c.words): # print("\n\nChoice of", input_filter, "will remove ALL words from the corpus.") # print("Please choose a different filter.") - filtered += "\n\nChoice of" + input_filter + "will remove ALL words from the corpus." + filtered += "\n\nChoice of" + str(input_filter) + "will remove ALL words from the corpus." filtered += "Please choose a different filter." # low_filter = 0 # input_filter = 0 @@ -530,29 +530,99 @@ def get_low_filter_stops(c, words=None, items=None, counts=None, num=None): class PrepData(Frame): def __init__(self): + self.stoplist = set() # super(PrepData, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True, # title="null", reduce_cpu=True) self.label = Label("change this") - self.lang = Label("haha") - self.summaryHigh = Text("High frequency word filter (#):", "summaryHighFreq") - self.high = Text("High frequency word filter (#):", "highFreq") + # self.lang = Label("haha") + # self.summaryHigh = Text("High frequency word filter (#):", "summaryHighFreq") + # self.summaryHighPercent = Text("High frequency word filter (%):", "summaryHighPercent") + self.summaryHigh = Text(label="Number of word frequency:", name="summaryHighFreq", on_change=self.summaryHighNumFocus) + self.summaryHighPercent = Text("Percent of words:", "summaryHighPercent", on_change=self.summaryHighPercentFocus) + self.summaryHighFocus = False + self.high = Text("High frequency word filter (#):", "highFreq", on_change=self.highNumFocus) + self.highPercent = Text("High ferquency word filter (%):", "highPercent", on_change=self.highPercentFocus) self.highLabel = Label("high label", height=35) - self.highFiltered = Label("filtered", height = 10) + self.highFocus = False self.highCandidates = [] - self.summaryLow = Text("Low frequency word filter (#)", "summaryLowFreq") - self.low = Text("Low frequency word filter (#)", "lowFreq") + # self.highFiltered = Label("filtered", height = 10) + # self.summaryLow = Text("Low frequency word filter (#):", "summaryLowFreq") + # self.summaryLowPercent = Text("Low frequency word filter (%):", "summaryLowPercent") + self.summaryLow = Text("Number of word frequency:", "summaryLowFreq", on_change=self.summaryLowNumFocus) + self.summaryLowPercent = Text("Percent of words:", "summaryLowPercent", on_change=self.summaryLowPercentFocus) + self.summaryLowFocus = False + self.low = Text("Low frequency word filter (#):", "lowFreq", on_change=self.lowNumFocus) + self.lowPercent = Text("Low frequency word filter (%):", "lowPercent", on_change=self.lowPercentFocus) self.lowLabel = Label("low label", height=35) + self.lowFocus = False self.lowCandidates = [] + self.minWord = Text("Minimum word length: ", "length") self.counter = 0 + self.error = Label("Error message") + self.switch = 0 + self.stopCandidates = [] + self.english = CheckBox("Yes", label="Apply English stopwords") + self.englishCandidates = [] + self.prepSize = Label("need to update length", align="^") # self.high.value("hello") - def update_lang(self, l): - self.lang = l - - def setHigh(self): - # use this to change values of other elements - self.high._value = "hello" - # self._data["highFreq"] = val + # def update_lang(self, l): + # self.lang = l + + def summaryHighPercentFocus(self): + if self.summaryHighFocus: + self.summaryHighFocus = False + self.summaryHigh.blur() + if self.summaryLowFocus: + self.summaryLowFocus = False + self.summaryLow.blur() + self.summaryLowPercent.blur() + + def summaryHighNumFocus(self): + if self.summaryHighFocus: + self.summaryHighFocus = False + self.summaryHighPercent.blur() + if self.summaryLowFocus: + self.summaryLowFocus = False + self.summaryLow.blur() + self.summaryLowPercent.blur() + + def highPercentFocus(self): + if self.highFocus: + self.highFocus = False + self.high.blur() + + def highNumFocus(self): + if self.highFocus: + self.highFocus = False + self.highPercent.blur() + + def summaryLowPercentFocus(self): + if self.summaryLowFocus: + self.summaryLowFocus = False + self.summaryLow.blur() + if self.summaryHighFocus: + self.summaryHighFocus = False + self.summaryHigh.blur() + self.summaryHighPercent.blur() + + def summaryLowNumFocus(self): + if self.summaryLowFocus: + self.summaryLowFocus = False + self.summaryLowPercent.blur() + if self.summaryHighFocus: + self.summaryHigh.blur() + self.summaryHighPercent.blur() + + def lowPercentFocus(self): + if self.lowFocus: + self.lowFocus = False + self.low.blur() + + def lowNumFocus(self): + if self.lowFocus: + self.lowFocus = False + self.highPercent.blur() class Summary(Frame): def __init__(self, screen): @@ -563,78 +633,273 @@ def __init__(self, screen): # f = open("prep.txt", "a") # f.write("Summary init") + highTitle = Layout([100]) + highOptions = Layout([1, 1]) + self.add_layout(highTitle) + self.add_layout(highOptions) + lowTitle = Layout([100]) + lowOptions = Layout([1, 1]) + self.add_layout(lowTitle) + self.add_layout(lowOptions) layout = Layout([100], fill_frame=True) self.add_layout(layout) # layout.add_widget(Text("High frequency word filter (%):", "highFreq")) - layout.add_widget(data.summaryHigh) + + highTitle.add_widget(Divider(height=1, line_char=" ")) + highTitle.add_widget(Label("High Frequency Word Filter", align="^")) + # layout.add_widget(data.summaryHigh) + # layout.add_widget(data.summaryHighPercent) + highOptions.add_widget(data.summaryHigh, 0) + highOptions.add_widget(data.summaryHighPercent, 1) + highOptions.add_widget(Divider(height=1, line_char="-"), 0) + highOptions.add_widget(Divider(height=1, line_char="-"), 1) + # layout.add_widget(Text("Low frequency word filter (%): ", "lowFreq")) - layout.add_widget(data.summaryLow) - layout.add_widget(Text("Language-specific stopwords: ", "lang")) - layout.add_widget(Text("Minimum word length: ", "length")) - layout.add_widget(Label("need to add original corpus size")) - layout.add_widget(Label("need to add prepped corpus size")) - layout2 = Layout([1, 1, 1, 1, 1]) + lowTitle.add_widget(Label("Low Frequency Word Filter", align="^")) + # layout.add_widget(data.summaryLow) + # layout.add_widget(data.summaryLowPercent) + # layout.add_widget(Text("Language-specific stopwords: ", "lang")) + lowOptions.add_widget(data.summaryLow, 0) + lowOptions.add_widget(data.summaryLowPercent, 1) + lowOptions.add_widget(Divider(height=1, line_char="-"), 0) + lowOptions.add_widget(Divider(height=1, line_char="-"), 1) + lowOptions.add_widget(Divider(height=1, line_char=" "), 0) + lowOptions.add_widget(Divider(height=1, line_char=" "), 1) + + layout.add_widget(data.english) + # layout.add_widget(Text("Minimum word length: ", "length")) + layout.add_widget(data.minWord) + layout.add_widget(Label("Original corpus unique words: " + str(data.c.original_length), align="^")) + layout.add_widget(data.prepSize) + layout2 = Layout([1, 1, 1, 1]) self.add_layout(layout2) layout2.add_widget(Button("prep", self._prep), 0) layout2.add_widget(Button("high", self._high), 1) layout2.add_widget(Button("low", self._low), 2) - layout2.add_widget(Button("lang", self._lang), 3) - layout2.add_widget(Button("exit", self._exit), 4) + # layout2.add_widget(Button("lang", self._lang), 3) + layout2.add_widget(Button("exit", self._exit), 3) self.fix() # proceeds to scene with chart that displays with current settings def _prep(self): self.save() + global data + # try: + # high = int(data.summaryHigh.value) + # except: + # # switch to error screen for high + # data.error._value = "Please enter a valid high value" + # data.switch = "Summary" + # raise NextScene("Error") + # try: + # low = int(data.summaryLow.value) + # except: + # # switch to error screen for low + # data.error._value = "Please enter a valid low value" + # data.switch = "Summary" + # raise NextScene("Error") + # data.highCandidates, data.highFiltered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + # num=high) + # data.lowCandidates, data.lowFiltered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + # num=low) + minNum = 3 + try: + high = test(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False) + except Exception as e: + self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._prepHigh)) + return + try: + low = test(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True) + except Exception as e: + self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._prepLow)) + return + if data.minWord.value != "": + try: + minNum = int(data.minWord.value) + except Exception as e: + self._scene.add_effect(PopUpDialog(self._screen, "Please enter a valid value for Minimum Word Length", ["OK"])) + return + if data.english.value: + data.englishCandidates = stop_language(data.c, "english") + data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + num=high) + data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + num=low) + data.stopCandidates = get_small_words(data.c, minNum) raise StopApplication("Quitting") + @staticmethod + def _fix(selection): + global data + data.summaryHighPercent.blur() + data.summaryHigh.blur() + + @staticmethod + def _prepHigh(selection): + global data + if str(selection) == "0": + data.summaryHighPercent._value = "30.0" + elif str(selection) == "1": + data.summaryHighPercent._value = "0.0" + else: + data.summaryHighPercent.focus() + data.summaryHigh.focus() + data.summaryHighFocus = True + confirm() + + @staticmethod + def _prepLow(selection): + global data + if str(selection) == "0": + data.summaryLowPercent._value = "20.0" + elif str(selection) == "1": + data.summaryLowPercent._value = "0.0" + else: + data.summaryLowPercent.focus() + data.summaryLow.focus() + data.summaryLowFocus = True + confirm() + def _high(self): self.save() global data - data.high._value = data.summaryHigh.value + # if data.summaryHigh.value is None and data.summaryHighPercent.value is None: + # data.error.text = "Please enter a value for either the number of occurrences or percent" + # data.switch = "Summary" + # raise NextScene("Error") + # if data.summaryHigh.value is not None and data.summaryHighPercent.value is not None: + # data.error.text = "Please enter a value for only one field" + # data.switch = "Summary" + # raise NextScene("Error") + # try: + # if data.summaryHigh.value is not None: + # data.error.text = "Please enter a valid high value (int)" + # high = int(data.summaryHigh.value) + # if data.summaryHighPercent.value is not None: + # data.error.text = "Please enter a valid high percent value (float or int)" + # high = float(data.summaryHighPercent.value) + # except Exception as e: + # data.error.text = e.__str__() + # data.switch = "Summary" + # raise NextScene("Error") + # self._scene.add_effect(PopUpDialog(self._screen, "hellldoafaisdjfa", ["OK"])) + # return + try: + high = test(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False) + except Exception as e: + if e.args[2]: + self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popupHigh)) + else: + self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1])) + return + # data.high._value = str(high) TODO: do this in test() or here? data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, - num=data.summaryHigh.value) + num=high) # args.high_filter, candidates, data.highLabel.text, data.highFiltered.text = get_high_filter(data.c, # words=data.stoplist, items=data.items, counts=data.counts, num=data.summaryHigh.value) # TODO dont stoplist yet (do it at the end) temp = deepcopy(data.stoplist) temp.update(data.highCandidates) - temp.update(data.lowCandidates) # should I do this? + # temp.update(data.lowCandidates) # should I do this? data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, - num=data.summaryHigh.value) + num=high) data.highLabel.text += filtered # data.highLabel.text = get_high_filter(data.c, words=data.stoplist, items=data.items, counts=data.counts, # num=data.summaryHigh.value) # data.highLabel.text += str(data.counter) # data.counter = data.counter + 1 raise NextScene("High Freq") + + @staticmethod + def _popupHigh(selection): + global data + if str(selection) == "0": + data.summaryHighPercent._value = "30.0" + elif str(selection) == "1": + data.summaryHighPercent._value = "0.0" + else: + data.summaryHighPercent.focus() + data.summaryHigh.focus() + data.summaryHighFocus = True + confirm() + return + high = test(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False) + data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + num=high) + temp = deepcopy(data.stoplist) + temp.update(data.highCandidates) + data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + num=high) + data.highLabel.text += filtered + raise NextScene("High Freq") def _low(self): self.save() global data - data.low._value = data.summaryLow.value + try: + low = test(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True) + except Exception as e: + if e.args[2]: + self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popupLow)) + else: + self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1])) + return + data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, - num=data.summaryLow.value) + num=low) # TODO dont stoplist yet (do it at the end) temp = deepcopy(data.stoplist) - temp.update(data.highCandidates) + # temp.update(data.highCandidates) temp.update(data.lowCandidates) data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, - num=data.summaryLow.value) + num=low) data.lowLabel.text += filtered # data.lowLabel.text += str(data.counter) # data.counter = data.counter + 1 raise NextScene("Low Freq") + + @staticmethod + def _popupLow(selection): + global data + if str(selection) == "0": + data.summaryLowPercent._value = "20.0" + elif str(selection) == "1": + data.summaryLowPercent._value = "0.0" + else: + data.summaryLowPercent.focus() + data.summaryLow.focus() + data.summaryLowFocus = True + confirm() + return + low = test(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True) + data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + num=low) + temp = deepcopy(data.stoplist) + temp.update(data.lowCandidates) + data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + num=low) + data.lowLabel.text += filtered + raise NextScene("Low Freq") - def _lang(self): - self.save() - raise NextScene("Lang") + # def _lang(self): + # self.save() + # options = [("Danish", self._updateLang), ("Dutch", self._updateLang), ("English", self._updateLang), ("Finnish", self._updateLang), + # ("French", self._updateLang), ("German", self._updateLang), ("Hungarian", self._updateLang), ("Italian", self._updateLang), + # ("Norwegian", self._updateLang), ("Portuguese", self._updateLang), ("Russian", self._updateLang), ("Spanish", self._updateLang), + # ("Swedish", self._updateLang), ("Turkish", self._updateLang)] + # self._scene.add_effect(PopupMenu(self.screen, options, 0, 0)) + + # def _updateLang(self): + # self.save() + # raise NextScene("Lang") # exits without prepping - # @staticmethod - def _exit(self): + @staticmethod + def _exit(): # self._screen.close() + sys.exit(0) raise StopApplication("Quitting") class HighFreq(Frame): @@ -648,8 +913,8 @@ def __init__(self, screen): layout = Layout([100], fill_frame=True) self.add_layout(layout) layout.add_widget(data.highLabel) - # layout.add_widget(Text("High Freq Filter (%)", "highFreq")) layout.add_widget(data.high) + layout.add_widget(data.highPercent) layout2 = Layout([1, 1]) self.add_layout(layout2) layout2.add_widget(Button("Ok", self._ok), 0) @@ -659,45 +924,110 @@ def __init__(self, screen): def _ok(self): self.save() global data - data.summaryHigh._value = data.high.value + + try: + high = test(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False) + except Exception as e: + if e.args[2]: + self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popup)) + else: + self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1])) + return data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, - num=data.high.value) + num=high) # args.high_filter, candidates, data.highLabel.text, data.highFiltered.text = get_high_filter(data.c, # words=data.stoplist, items=data.items, counts=data.counts, num=data.summaryHigh.value) # TODO dont stoplist yet (do it at the end) temp = deepcopy(data.stoplist) temp.update(data.highCandidates) - temp.update(data.lowCandidates) # should I do this? + # temp.update(data.lowCandidates) # should I do this? data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, - num=data.high.value) + num=high) data.highLabel.text += filtered # data.highLabel.text = get_high_filter(data.c, words=data.stoplist, items=data.items, counts=data.counts, # num=data.summaryHigh.value) # data.highLabel.text += str(data.counter) # data.counter = data.counter + 1 + updatePreppedLength() + raise NextScene("Summary") + + @staticmethod + def _popup(selection): + if str(selection) == "0": + data.highPercent._value = "30.0" + elif str(selection) == "1": + data.highPercent._value = "0.0" + else: + data.highPercent.focus() + data.high.focus() + data.highFocus = True + confirm() + return + high = test(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False) + data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + num=high) + temp = deepcopy(data.stoplist) + temp.update(data.highCandidates) + # temp.update(data.lowCandidates) # should I do this? + data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + num=high) + data.highLabel.text += filtered + raise NextScene("Summary") def _change(self): self.save() global data + + try: + high = test(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False) + except Exception as e: + if e.args[2]: + self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popupChange)) + else: + self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1])) + return + data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, - num=data.high.value) + num=high) # args.high_filter, candidates, data.highLabel.text, data.highFiltered.text = get_high_filter(data.c, # words=data.stoplist, items=data.items, counts=data.counts, num=data.summaryHigh.value) # TODO dont stoplist yet (do it at the end) temp = deepcopy(data.stoplist) temp.update(data.highCandidates) - temp.update(data.lowCandidates) # should I do this? + # temp.update(data.lowCandidates) # should I do this? data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, - num=data.high.value) + num=high) data.highLabel.text += filtered # data.highLabel.text = get_high_filter(data.c, words=data.stoplist, items=data.items, counts=data.counts, # num=data.summaryHigh.value) # data.highLabel.text += str(data.counter) # data.counter = data.counter + 1 + + @staticmethod + def _popupChange(selection): + if str(selection) == "0": + data.highPercent._value = "30.0" + elif str(selection) == "1": + data.highPercent._value = "0.0" + else: + data.highPercent.focus() + data.high.focus() + data.highFocus = True + confirm() + return + high = test(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False) + data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + num=high) + temp = deepcopy(data.stoplist) + temp.update(data.highCandidates) + # temp.update(data.lowCandidates) # should I do this? + data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + num=high) + data.highLabel.text += filtered class LowFreq(Frame): def __init__(self, screen): @@ -712,6 +1042,7 @@ def __init__(self, screen): layout.add_widget(data.lowLabel) # layout.add_widget(Text("Low Freq Filter (%)", "lowFreq")) layout.add_widget(data.low) + layout.add_widget(data.lowPercent) layout2 = Layout([1, 1]) self.add_layout(layout2) layout2.add_widget(Button("Ok", self._ok), 0) @@ -720,38 +1051,201 @@ def __init__(self, screen): def _ok(self): self.save() + global data + try: + low = test(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True) + except Exception as e: + if e.args[2]: + self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popup)) + else: + self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1])) + return + + data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + num=low) + + # TODO dont stoplist yet (do it at the end) + temp = deepcopy(data.stoplist) + # temp.update(data.highCandidates) + temp.update(data.lowCandidates) + data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + num=low) + data.lowLabel.text += filtered + # data.lowLabel.text += str(data.counter) + # data.counter = data.counter + 1 + updatePreppedLength() + raise NextScene("Summary") + + @staticmethod + def _popup(selection): + if str(selection) == "0": + data.lowPercent._value = "20.0" + elif str(selection) == "1": + data.lowPercent._value = "0.0" + else: + data.lowPercent.focus() + data.low.focus() + data.lowFocus = True + confirm() + return + low = test(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True) + + data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + num=low) + temp = deepcopy(data.stoplist) + # temp.update(data.highCandidates) + temp.update(data.lowCandidates) + data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + num=low) + data.lowLabel.text += filtered + raise NextScene("Summary") def _change(self): self.save() global data - data.summaryLow._value = data.low + try: + low = test(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True) + except Exception as e: + if e.args[2]: + self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popupChange)) + else: + self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1])) + return + + data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + num=low) -class Lang(Frame): - def __init__(self, screen): - super(Lang, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True, - title="Lang", reduce_cpu=True) + # TODO dont stoplist yet (do it at the end) + temp = deepcopy(data.stoplist) + # temp.update(data.highCandidates) + temp.update(data.lowCandidates) + data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + num=low) + data.lowLabel.text += filtered + # data.lowLabel.text += str(data.counter) + # data.counter = data.counter + 1 - # f = open("prep.txt", "a") - # f.write("Lang init") - layout = Layout([100], fill_frame=True) - self.add_layout(layout) - layout.add_widget(Text(label="Language-specific stopwords: ", name="lang")) - layout.add_widget(Label("hello")) - layout2 = Layout([1, 1, 1]) - self.add_layout(layout2) - layout2.add_widget(Button("Ok", self._ok), 0) - self.fix() + @staticmethod + def _popupChange(selection): + if str(selection) == "0": + data.lowPercent._value = "20.0" + elif str(selection) == "1": + data.lowPercent._value = "0.0" + else: + data.lowPercent.focus() + data.low.focus() + data.lowFocus = True + confirm() + return + low = test(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True) + data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + num=low) + temp = deepcopy(data.stoplist) + temp.update(data.lowCandidates) + # temp.update(data.lowCandidates) # should I do this? + data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + num=low) + data.lowLabel.text += filtered + +# class Lang(Frame): +# def __init__(self, screen): +# super(Lang, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True, +# title="Lang", reduce_cpu=True) + +# # f = open("prep.txt", "a") +# # f.write("Lang init") +# layout = Layout([100], fill_frame=True) +# self.add_layout(layout) +# layout.add_widget(Text(label="Language-specific stopwords: ", name="lang")) +# layout.add_widget(Label("hello")) +# layout2 = Layout([1, 1, 1]) +# self.add_layout(layout2) +# layout2.add_widget(Button("Ok", self._ok), 0) +# self.fix() - # proceeds to scene with chart that displays with current settings - def _ok(self): - self.save() - raise NextScene("Summary") +# # proceeds to scene with chart that displays with current settings +# def _ok(self): +# self.save() +# raise NextScene("Summary") + +# class Error(Frame): +# def __init__(self, screen): +# super(Error, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 //3, hover_focus=True, +# title="Error", reduce_cpu=True) + +# global data + +# layout = Layout([100], fill_frame=True) +# self.add_layout(layout) +# layout.add_widget(data.error) +# layout2 = Layout([1]) +# self.add_layout(layout2) +# layout2.add_widget(Button("Ok", self._ok), 0) +# self.fix() + +# def _ok(self): +# self.save() +# global data +# raise NextScene(data.switch) + +def test(num, percent, numPair, percentPair, iden, rev): + defaults = {"high": "30%", "low": "20%"} + if num.value == "" and percent.value == "": + # raise Exception("Please enter a value for either the number of occurrences or percent") + raise Exception("Apply default of " + str(defaults[iden]) + " for the " + iden + " frequency, don't stop list, or edit value?", ["Yes", "Don't stop list", "Edit value"], True) + if num.value != "" and percent.value != "": + raise Exception("Pleae enter a value for only one " + iden + " field", ["Ok"], False) + try: + msg = "error" + if num.value != "": + msg = "Please enter a valid " + iden + " value (int)" + ret = int(num.value) + numPair._value = num.value + percentPair._value = "" + if percent.value != "": + msg = "Please enter a valid " + iden + " percent value (float or int)" + ret = float(percent.value) + percentPair._value = percent.value + numPair._value = "" + ret = get_closest_bin(data.c, 1 - (ret / 100.), reverse=rev, counts=data.counts) + except: + raise Exception(msg, ["Ok"], False) + return ret + +def updatePreppedLength(): + global data + temp = deepcopy(data.stoplist) + tempC = deepcopy(data.c) + if data.english.value: + data.englishCandidates = stop_language(tempC, "english") + temp.update(data.englishCandidates) + temp.update(data.lowCandidates) + temp.update(data.highCandidates) + tempC.in_place_stoplist(temp) + data.prepSize.text = str("Prepared corpus unique words: " + str(len(tempC))) + +def confirm(): + global data + tempScreen = data.wholeScreen.current_scene._effects[0]._screen + tempScene = data.wholeScreen.current_scene + tempScene.add_effect(PopUpDialog(tempScreen, "Please input a value in one of the highlighted fields", ["OK"], on_close=reset)) + +def reset(selection): + global data + data.summaryHigh.blur() + data.summaryHighPercent.blur() + data.summaryLow.blur() + data.summaryLowPercent.blur() + data.high.blur() + data.highPercent.blur() + data.low.blur() + data.lowPercent.blur() def main(args): global data data = PrepData() - print("IN MAINNNNNNNNNNNNNNNNN") + # print("IN MAINNNNNNNNNNNNNNNNN") config = topicexplorer.config.read(args.config_file) @@ -780,29 +1274,29 @@ def main(args): args.lang.extend(new_langs) """ - # NEXT 2 IF AND THE FOR ARE FOR LANG (PUT THEM IN LANG SCENE) - - # add default locale if no other languages are specified - # do not add if in quiet mode -- make everything explicit - if not args.lang and not args.quiet: - import locale - locale = locale.getdefaultlocale()[0].split('_')[0].lower() - if locale in langs.keys(): - args.lang.append(locale) - - # check for any new candidates - args.lang = [lang for lang in args.lang if stop_language(data.c, langs[lang])] - if args.lang and not args.quiet: - args.lang = lang_prompt(args.lang) - - data.stoplist = set() - # Apply stop words - print(" ") - for lang in args.lang: - print("Applying", langs[lang], "stopwords") - candidates = stop_language(data.c, langs[lang]) - if len(candidates): - data.stoplist.update(candidates) + # # Language information, not sure if I should remove it + + # # add default locale if no other languages are specified + # # do not add if in quiet mode -- make everything explicit + # if not args.lang and not args.quiet: + # import locale + # locale = locale.getdefaultlocale()[0].split('_')[0].lower() + # if locale in langs.keys(): + # args.lang.append(locale) + + # # check for any new candidates + # args.lang = [lang for lang in args.lang if stop_language(data.c, langs[lang])] + # if args.lang and not args.quiet: + # args.lang = lang_prompt(args.lang) + + # data.stoplist = set() + # # Apply stop words + # print(" ") + # for lang in args.lang: + # print("Applying", langs[lang], "stopwords") + # candidates = stop_language(data.c, langs[lang]) + # if len(candidates): + # data.stoplist.update(candidates) # DO THIS AUTOMATICALLY, NOT NEED FOR SCENE, MAYBE HAVE SOME SORT OF INFO SCENE TO DISPLAY THIS INFO IN @@ -843,9 +1337,11 @@ def main(args): elif args.high_filter: candidates = get_candidate_words(data.c, args.high_filter, sort=False, items=data.items, counts=data.counts) if len(candidates): - print("Filtering {} high frequency word{}.".format(len(candidates), - 's' if len(candidates) > 1 else '')) - data.stoplist.update(candidates) + # print("Filtering {} high frequency word{}.".format(len(candidates), + # 's' if len(candidates) > 1 else '')) + data.highCandidates = candidates + data.highLabel._value = args.high_filter + # data.stoplist.update(candidates) elif args.high_percent: args.high_filter = get_closest_bin(data.c, 1 - (args.high_percent / 100.), counts=data.counts) print(args.high_filter) @@ -870,9 +1366,10 @@ def main(args): elif args.low_filter: candidates = get_candidate_words(data.c, -1 * args.low_filter, sort=False, items=data.items, counts=data.counts) if len(candidates): - print("Filtering {} low frequency words.".format(len(candidates))) - data.stoplist.update(candidates) - + # print("Filtering {} low frequency words.".format(len(candidates))) + data.lowCandidates = candidates + data.lowLabel._value = args.low_filter + # data.stoplist.update(candidates) elif args.low_percent: args.low_filter = get_closest_bin(data.c, 1 - (args.low_percent / 100.), reverse=True, counts=data.counts) print(args.low_filter) @@ -886,11 +1383,16 @@ def gui(screen, scene): scenes = [ Scene([Summary(screen)], -1, name="Summary"), Scene([HighFreq(screen)], -1, name="High Freq"), - Scene([LowFreq(screen)], -1, name="Low Freq"), - Scene([Lang(screen)], -1, name="Lang") + Scene([LowFreq(screen)], -1, name="Low Freq") + # Scene([Lang(screen)], -1, name="Lang") ] + global data + data.wholeScreen = screen screen.play(scenes, stop_on_resize=True, start_scene=scene) + # global data + data.prepSize.text = str("Prepared corpus unique words: " + str(len(data.c))) + last_scene = None # global data = PrepData() while True: @@ -908,6 +1410,12 @@ def gui(screen, scene): data.stoplist.update(data.highCandidates) data.stoplist.update(data.lowCandidates) + data.stoplist.update(data.stopCandidates) + print(data.highCandidates) + # print(data.highFiltered) + print(data.lowCandidates) + # print(data.lowFiltered) + print(data.stopCandidates) if not data.stoplist: print("No stopwords applied.\n\n") @@ -917,6 +1425,7 @@ def gui(screen, scene): print("\n\nApplying {} stopword{}".format(len(data.stoplist), 's' if len(data.stoplist) > 1 else '')) data.c.in_place_stoplist(data.stoplist) + print(len(data.c)) print("\n") # LEAVE THE REST, TILL THE END OF THIS METHOD AS IS From 562df9373fab191a74470a48ef5b1b5f84b17e57 Mon Sep 17 00:00:00 2001 From: Kirtan Sakariya Date: Mon, 12 Nov 2018 13:26:32 -0500 Subject: [PATCH 03/21] removed comments --- topicexplorer/prep.py | 328 +----------------------------------------- 1 file changed, 2 insertions(+), 326 deletions(-) diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py index 9606bb7f..4afffc8b 100644 --- a/topicexplorer/prep.py +++ b/topicexplorer/prep.py @@ -293,10 +293,6 @@ def get_high_filter_chart(c, words=None, items=None, counts=None, num=None): import numpy as np header = "FILTER HIGH FREQUENCY WORDS" stars = old_div((80 - len(header) - 2), 2) - # print("\n\n{0} {1} {0}".format('*' * stars, header)) - # print(" This will remove all words occurring N or more times.") - # print(" The histogram below shows how many words will be removed") - # print(" by selecting each maximum frequency threshold.\n") # Get frequency bins if items is None or counts is None: @@ -306,32 +302,16 @@ def get_high_filter_chart(c, words=None, items=None, counts=None, num=None): bins = sorted(set(bins)) bins.append(max(counts)) - # try: - # num = int(num) - # except: - # # TODO: show invalid num screen - # num = "str" - ret = "" - # do input validation here - high_filter = False - # while not high_filter: bin_counts, bins = np.histogram(counts, bins=bins) - # print("{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Top', '% of corpus', - # "# words", "Rate")) ret += "{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Top', '% of corpus', "# words", "Rate") + "\n" last_row = 0 for bin, count in zip(bins[-2::-1], np.cumsum(bin_counts[::-1])): filtered_counts = counts[get_mask(c, words)] if (filtered_counts >= bin).sum() > last_row: percentage = 1. - (old_div(counts[counts < bin].sum(), float(c.original_length))) - # print("{0:>5.0f}x".format(bin).rjust(8), end=' ') - # print('{0:2.1f}%'.format(percentage * 100).rjust(8), end=' ') - # print((u'\u2588' * int(percentage * 36)).ljust(36), end=' ') - # print(" {0:0.0f} words".format((filtered_counts >= bin).sum()).rjust(14), end=' ') - # print(">= {0:>5.0f}x".format(bin).ljust(8)) ret += "{0:>5.0f}x".format(bin).rjust(8) ret += '{0:2.1f}% '.format(percentage * 100).rjust(10) ret += (u'\u2588' * int(percentage * 36)).ljust(36) @@ -340,70 +320,29 @@ def get_high_filter_chart(c, words=None, items=None, counts=None, num=None): last_row = (filtered_counts >= bin).sum() - # return ret - # print(' ' * 17, "{} total occurrences".format(counts.sum()).ljust(36), end=' ') - # print('{} words total'.format(get_mask(c, words).sum()).rjust(20)) - # print('') ret += (' ' * 18) + "{} total occurrences".format(counts.sum()).ljust(37) ret += '{} words total'.format(get_mask(c, words).sum()).rjust(20) + '\n' - # ret += str(type(num)) + " " + str(num) return ret def get_high_filter_stops(c, words=None, items=None, counts=None, num=None): import numpy as np - # try: - # num = int(num) - # except: - # # TODO: show invalid num screen - # num = "str" - # return input_filter = num accept = None - # while not input_filter or input_filter <= 0: try: - # if high_filter: - # input_filter = high_filter - # else: - # input_filter = int(input("Enter the maximum rate: ").replace('x', '')) candidates = get_candidate_words(c, input_filter, words=words, items=items, counts=counts) places = np.in1d(c.words, candidates) places = dict(zip(candidates, np.where(places)[0])) candidates = sorted(candidates, key=lambda x: counts[places[x]], reverse=True) filtered_counts = counts[get_mask(c, words)] - # print("Filter will remove", filtered_counts[filtered_counts >= input_filter].sum(), end=' ') - # print("occurrences", "of these", len(filtered_counts[filtered_counts >= input_filter]), "words:") - # print(u' '.join(candidates)) filtered = "" filtered += "Filter will remove " + str(filtered_counts[filtered_counts >= input_filter].sum()) filtered += " occurrences " + "of these " + str(len(filtered_counts[filtered_counts >= input_filter])) + " words: " filtered += u' '.join(candidates) - # print("\nFilter will remove", filtered_counts[filtered_counts >= input_filter].sum(), end=' ') - # print("occurrences", "of these", len(filtered_counts[filtered_counts >= input_filter]), "words.", end=' ') - - # filtered += "\nFilter will remove " + str(filtered_counts[filtered_counts >= input_filter].sum()) - # filtered += " occurrences " + " of these " + str(len(filtered_counts[filtered_counts >= input_filter])) + " words." - if len(candidates) == len(c.words): - # print("\n\nChoice of", input_filter, "will remove ALL words from the corpus.") - # print("Please choose a different filter.") filtered += "\n\nChoice of" + str(input_filter) + "will remove ALL words from the corpus." filtered += "Please choose a different filter." - # high_filter = 0 - # input_filter = 0 - # else: - # accept = None - # while accept not in ['y', 'n']: - # accept = input("\nAccept filter? [y/n/[different max number]] ") - # if isint(accept): - # high_filter = int(accept) - # input_filter = 0 - # accept = 'n' - # elif accept == 'y': - # high_filter = input_filter - # elif accept == 'n': - # high_filter = 0 except ValueError: input_filter = 0 @@ -414,10 +353,6 @@ def get_low_filter_chart(c, words=None, items=None, counts=None, num=None): import numpy as np header = "FILTER LOW FREQUENCY WORDS" stars = old_div((80 - len(header) - 2), 2) - # print("\n\n{0} {1} {0}".format('*' * stars, header)) - # print(" This will remove all words occurring less than N times.") - # print(" The histogram below shows how many words will be removed") - # print(" by selecting each minimum frequency threshold.\n") # Get frequency bins if items is None or counts is None: @@ -427,31 +362,16 @@ def get_low_filter_chart(c, words=None, items=None, counts=None, num=None): bins = sorted(set(bins)) bins.append(max(counts)) - # try: - # num = int(num) - # except: - # # TODO: show invalid num screen - # num = "str" - ret = "" low_filter = False - # while low_filter is False: bin_counts, bins = np.histogram(counts[counts.argsort()[::-1]], bins=bins) - # print "{0:>10s} {1:>10s}".format("# Tokens", "# Words") - # print("{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Bottom', '% of corpus', - # "# words", "Rate")) ret += "{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Bottom', '% of corpus', "# words", "Rate") + "\n" last_row = 0 for bin, count in zip(bins, np.cumsum(bin_counts)): filtered_counts = counts[get_mask(c, words)] if last_row < (filtered_counts < bin).sum() <= len(filtered_counts): percentage = (old_div(counts[counts <= bin].sum(), float(c.original_length))) - # print("{0:>5.0f}x".format(bin).rjust(8), end=' ') - # print('{0:2.1f}%'.format(percentage * 100).rjust(8), end=' ') - # print((u'\u2588' * int(percentage * 36)).ljust(36), end=' ') - # print(" {0:0.0f} words".format((filtered_counts <= bin).sum()).rjust(14), end=' ') - # print("<= {0:>5.0f}x".format(bin).ljust(8)) ret += "{0:>5.0f}x".format(bin).rjust(8) ret += '{0:2.1f}%'.format(percentage * 100).rjust(9) ret += " " + (u'\u2588' * int(percentage * 36)).ljust(36) @@ -461,30 +381,15 @@ def get_low_filter_chart(c, words=None, items=None, counts=None, num=None): break last_row = (filtered_counts >= bin).sum() - - # print(' ' * 17, "{} total occurrences".format(counts.sum()).ljust(36), end=' ') - # print('{} words total'.format(get_mask(c, words).sum()).rjust(20)) - # print('') ret += (' ' * 18) + "{} total occurrences".format(counts.sum()).ljust(37) ret += '{} words total'.format(get_mask(c, words).sum()).rjust(20) + '\n' return ret def get_low_filter_stops(c, words=None, items=None, counts=None, num=None): import numpy as np - # try: - # num = int(num) - # except: - # # TODO: show invalid num screen - # num = "str" - # return input_filter = num accept = None - # while not input_filter or input_filter <= 0: try: - # if low_filter: - # input_filter = low_filter - # else: - # input_filter = int(input("Enter the minimum rate: ").replace('x', '')) candidates = get_candidate_words(c, -input_filter, words=words, items=items, counts=counts) places = np.in1d(c.words, candidates) @@ -492,51 +397,26 @@ def get_low_filter_stops(c, words=None, items=None, counts=None, num=None): candidates = sorted(candidates, key=lambda x: counts[places[x]]) filtered_counts = counts[get_mask(c, words)] - # print("Filter will remove", filtered_counts[filtered_counts <= input_filter].sum(), "tokens", end=' ') - # print("of these", len(filtered_counts[filtered_counts <= input_filter]), "words:") - # print(u' '.join(candidates)) filtered = "" filtered += "Filter will remove " + str(filtered_counts[filtered_counts <= input_filter].sum()) + " tokens" filtered += "of these " + str(len(filtered_counts[filtered_counts <= input_filter])) + " words: " filtered += u' '.join(candidates) - # print("\nFilter will remove", filtered_counts[filtered_counts <= input_filter].sum(), "tokens", end=' ') - # print("of these", len(filtered_counts[filtered_counts <= input_filter]), "words.", end=' ') if len(candidates) == len(c.words): - # print("\n\nChoice of", input_filter, "will remove ALL words from the corpus.") - # print("Please choose a different filter.") filtered += "\n\nChoice of" + str(input_filter) + "will remove ALL words from the corpus." filtered += "Please choose a different filter." - # low_filter = 0 - # input_filter = 0 - # else: - # accept = None - # while accept not in ['y', 'n']: - # accept = input("\nAccept filter? [y/n/[different min. number] ") - # if isint(accept): - # low_filter = int(accept) - # input_filter = 0 - # accept = 'n' - # elif accept == 'y': - # low_filter = input_filter - # elif accept == 'n': - # low_filter = False except ValueError: input_filter = 0 return (candidates, filtered) +# Stores all of the variables for the labels class PrepData(Frame): def __init__(self): self.stoplist = set() - # super(PrepData, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True, - # title="null", reduce_cpu=True) self.label = Label("change this") - # self.lang = Label("haha") - # self.summaryHigh = Text("High frequency word filter (#):", "summaryHighFreq") - # self.summaryHighPercent = Text("High frequency word filter (%):", "summaryHighPercent") self.summaryHigh = Text(label="Number of word frequency:", name="summaryHighFreq", on_change=self.summaryHighNumFocus) self.summaryHighPercent = Text("Percent of words:", "summaryHighPercent", on_change=self.summaryHighPercentFocus) self.summaryHighFocus = False @@ -545,9 +425,6 @@ def __init__(self): self.highLabel = Label("high label", height=35) self.highFocus = False self.highCandidates = [] - # self.highFiltered = Label("filtered", height = 10) - # self.summaryLow = Text("Low frequency word filter (#):", "summaryLowFreq") - # self.summaryLowPercent = Text("Low frequency word filter (%):", "summaryLowPercent") self.summaryLow = Text("Number of word frequency:", "summaryLowFreq", on_change=self.summaryLowNumFocus) self.summaryLowPercent = Text("Percent of words:", "summaryLowPercent", on_change=self.summaryLowPercentFocus) self.summaryLowFocus = False @@ -564,10 +441,6 @@ def __init__(self): self.english = CheckBox("Yes", label="Apply English stopwords") self.englishCandidates = [] self.prepSize = Label("need to update length", align="^") - # self.high.value("hello") - - # def update_lang(self, l): - # self.lang = l def summaryHighPercentFocus(self): if self.summaryHighFocus: @@ -631,8 +504,6 @@ def __init__(self, screen): global data - # f = open("prep.txt", "a") - # f.write("Summary init") highTitle = Layout([100]) highOptions = Layout([1, 1]) self.add_layout(highTitle) @@ -643,22 +514,15 @@ def __init__(self, screen): self.add_layout(lowOptions) layout = Layout([100], fill_frame=True) self.add_layout(layout) - # layout.add_widget(Text("High frequency word filter (%):", "highFreq")) highTitle.add_widget(Divider(height=1, line_char=" ")) highTitle.add_widget(Label("High Frequency Word Filter", align="^")) - # layout.add_widget(data.summaryHigh) - # layout.add_widget(data.summaryHighPercent) highOptions.add_widget(data.summaryHigh, 0) highOptions.add_widget(data.summaryHighPercent, 1) highOptions.add_widget(Divider(height=1, line_char="-"), 0) highOptions.add_widget(Divider(height=1, line_char="-"), 1) - # layout.add_widget(Text("Low frequency word filter (%): ", "lowFreq")) lowTitle.add_widget(Label("Low Frequency Word Filter", align="^")) - # layout.add_widget(data.summaryLow) - # layout.add_widget(data.summaryLowPercent) - # layout.add_widget(Text("Language-specific stopwords: ", "lang")) lowOptions.add_widget(data.summaryLow, 0) lowOptions.add_widget(data.summaryLowPercent, 1) lowOptions.add_widget(Divider(height=1, line_char="-"), 0) @@ -667,7 +531,6 @@ def __init__(self, screen): lowOptions.add_widget(Divider(height=1, line_char=" "), 1) layout.add_widget(data.english) - # layout.add_widget(Text("Minimum word length: ", "length")) layout.add_widget(data.minWord) layout.add_widget(Label("Original corpus unique words: " + str(data.c.original_length), align="^")) layout.add_widget(data.prepSize) @@ -676,7 +539,6 @@ def __init__(self, screen): layout2.add_widget(Button("prep", self._prep), 0) layout2.add_widget(Button("high", self._high), 1) layout2.add_widget(Button("low", self._low), 2) - # layout2.add_widget(Button("lang", self._lang), 3) layout2.add_widget(Button("exit", self._exit), 3) self.fix() @@ -684,24 +546,6 @@ def __init__(self, screen): def _prep(self): self.save() global data - # try: - # high = int(data.summaryHigh.value) - # except: - # # switch to error screen for high - # data.error._value = "Please enter a valid high value" - # data.switch = "Summary" - # raise NextScene("Error") - # try: - # low = int(data.summaryLow.value) - # except: - # # switch to error screen for low - # data.error._value = "Please enter a valid low value" - # data.switch = "Summary" - # raise NextScene("Error") - # data.highCandidates, data.highFiltered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, - # num=high) - # data.lowCandidates, data.lowFiltered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, - # num=low) minNum = 3 try: high = test(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False) @@ -763,27 +607,6 @@ def _prepLow(selection): def _high(self): self.save() global data - # if data.summaryHigh.value is None and data.summaryHighPercent.value is None: - # data.error.text = "Please enter a value for either the number of occurrences or percent" - # data.switch = "Summary" - # raise NextScene("Error") - # if data.summaryHigh.value is not None and data.summaryHighPercent.value is not None: - # data.error.text = "Please enter a value for only one field" - # data.switch = "Summary" - # raise NextScene("Error") - # try: - # if data.summaryHigh.value is not None: - # data.error.text = "Please enter a valid high value (int)" - # high = int(data.summaryHigh.value) - # if data.summaryHighPercent.value is not None: - # data.error.text = "Please enter a valid high percent value (float or int)" - # high = float(data.summaryHighPercent.value) - # except Exception as e: - # data.error.text = e.__str__() - # data.switch = "Summary" - # raise NextScene("Error") - # self._scene.add_effect(PopUpDialog(self._screen, "hellldoafaisdjfa", ["OK"])) - # return try: high = test(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False) except Exception as e: @@ -792,23 +615,14 @@ def _high(self): else: self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1])) return - # data.high._value = str(high) TODO: do this in test() or here? data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=high) - # args.high_filter, candidates, data.highLabel.text, data.highFiltered.text = get_high_filter(data.c, - # words=data.stoplist, items=data.items, counts=data.counts, num=data.summaryHigh.value) - # TODO dont stoplist yet (do it at the end) temp = deepcopy(data.stoplist) temp.update(data.highCandidates) - # temp.update(data.lowCandidates) # should I do this? data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, num=high) data.highLabel.text += filtered - # data.highLabel.text = get_high_filter(data.c, words=data.stoplist, items=data.items, counts=data.counts, - # num=data.summaryHigh.value) - # data.highLabel.text += str(data.counter) - # data.counter = data.counter + 1 raise NextScene("High Freq") @staticmethod @@ -849,15 +663,11 @@ def _low(self): data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=low) - # TODO dont stoplist yet (do it at the end) temp = deepcopy(data.stoplist) - # temp.update(data.highCandidates) temp.update(data.lowCandidates) data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, num=low) data.lowLabel.text += filtered - # data.lowLabel.text += str(data.counter) - # data.counter = data.counter + 1 raise NextScene("Low Freq") @staticmethod @@ -883,22 +693,9 @@ def _popupLow(selection): data.lowLabel.text += filtered raise NextScene("Low Freq") - # def _lang(self): - # self.save() - # options = [("Danish", self._updateLang), ("Dutch", self._updateLang), ("English", self._updateLang), ("Finnish", self._updateLang), - # ("French", self._updateLang), ("German", self._updateLang), ("Hungarian", self._updateLang), ("Italian", self._updateLang), - # ("Norwegian", self._updateLang), ("Portuguese", self._updateLang), ("Russian", self._updateLang), ("Spanish", self._updateLang), - # ("Swedish", self._updateLang), ("Turkish", self._updateLang)] - # self._scene.add_effect(PopupMenu(self.screen, options, 0, 0)) - - # def _updateLang(self): - # self.save() - # raise NextScene("Lang") - # exits without prepping @staticmethod def _exit(): - # self._screen.close() sys.exit(0) raise StopApplication("Quitting") @@ -907,7 +704,6 @@ def __init__(self, screen): super(HighFreq, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True, title="High Frequency Word Filter", reduce_cpu=True) - # self._data = data global data layout = Layout([100], fill_frame=True) @@ -936,20 +732,12 @@ def _ok(self): data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=high) - # args.high_filter, candidates, data.highLabel.text, data.highFiltered.text = get_high_filter(data.c, - # words=data.stoplist, items=data.items, counts=data.counts, num=data.summaryHigh.value) - # TODO dont stoplist yet (do it at the end) temp = deepcopy(data.stoplist) temp.update(data.highCandidates) - # temp.update(data.lowCandidates) # should I do this? data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, num=high) data.highLabel.text += filtered - # data.highLabel.text = get_high_filter(data.c, words=data.stoplist, items=data.items, counts=data.counts, - # num=data.summaryHigh.value) - # data.highLabel.text += str(data.counter) - # data.counter = data.counter + 1 updatePreppedLength() raise NextScene("Summary") @@ -970,7 +758,6 @@ def _popup(selection): num=high) temp = deepcopy(data.stoplist) temp.update(data.highCandidates) - # temp.update(data.lowCandidates) # should I do this? data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, num=high) data.highLabel.text += filtered @@ -992,20 +779,12 @@ def _change(self): data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=high) - # args.high_filter, candidates, data.highLabel.text, data.highFiltered.text = get_high_filter(data.c, - # words=data.stoplist, items=data.items, counts=data.counts, num=data.summaryHigh.value) - # TODO dont stoplist yet (do it at the end) temp = deepcopy(data.stoplist) temp.update(data.highCandidates) - # temp.update(data.lowCandidates) # should I do this? data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, num=high) data.highLabel.text += filtered - # data.highLabel.text = get_high_filter(data.c, words=data.stoplist, items=data.items, counts=data.counts, - # num=data.summaryHigh.value) - # data.highLabel.text += str(data.counter) - # data.counter = data.counter + 1 @staticmethod def _popupChange(selection): @@ -1024,7 +803,6 @@ def _popupChange(selection): num=high) temp = deepcopy(data.stoplist) temp.update(data.highCandidates) - # temp.update(data.lowCandidates) # should I do this? data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, num=high) data.highLabel.text += filtered @@ -1034,13 +812,11 @@ def __init__(self, screen): super(LowFreq, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True, title="Low Frequency Word Filter", reduce_cpu=True) - # self._data = data global data layout = Layout([100], fill_frame=True) self.add_layout(layout) layout.add_widget(data.lowLabel) - # layout.add_widget(Text("Low Freq Filter (%)", "lowFreq")) layout.add_widget(data.low) layout.add_widget(data.lowPercent) layout2 = Layout([1, 1]) @@ -1064,15 +840,11 @@ def _ok(self): data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=low) - # TODO dont stoplist yet (do it at the end) temp = deepcopy(data.stoplist) - # temp.update(data.highCandidates) temp.update(data.lowCandidates) data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, num=low) data.lowLabel.text += filtered - # data.lowLabel.text += str(data.counter) - # data.counter = data.counter + 1 updatePreppedLength() raise NextScene("Summary") @@ -1093,7 +865,6 @@ def _popup(selection): data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=low) temp = deepcopy(data.stoplist) - # temp.update(data.highCandidates) temp.update(data.lowCandidates) data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, num=low) @@ -1116,15 +887,11 @@ def _change(self): data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=low) - # TODO dont stoplist yet (do it at the end) temp = deepcopy(data.stoplist) - # temp.update(data.highCandidates) temp.update(data.lowCandidates) data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, num=low) data.lowLabel.text += filtered - # data.lowLabel.text += str(data.counter) - # data.counter = data.counter + 1 @staticmethod def _popupChange(selection): @@ -1143,56 +910,13 @@ def _popupChange(selection): num=low) temp = deepcopy(data.stoplist) temp.update(data.lowCandidates) - # temp.update(data.lowCandidates) # should I do this? data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, num=low) data.lowLabel.text += filtered -# class Lang(Frame): -# def __init__(self, screen): -# super(Lang, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True, -# title="Lang", reduce_cpu=True) - -# # f = open("prep.txt", "a") -# # f.write("Lang init") -# layout = Layout([100], fill_frame=True) -# self.add_layout(layout) -# layout.add_widget(Text(label="Language-specific stopwords: ", name="lang")) -# layout.add_widget(Label("hello")) -# layout2 = Layout([1, 1, 1]) -# self.add_layout(layout2) -# layout2.add_widget(Button("Ok", self._ok), 0) -# self.fix() - -# # proceeds to scene with chart that displays with current settings -# def _ok(self): -# self.save() -# raise NextScene("Summary") - -# class Error(Frame): -# def __init__(self, screen): -# super(Error, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 //3, hover_focus=True, -# title="Error", reduce_cpu=True) - -# global data - -# layout = Layout([100], fill_frame=True) -# self.add_layout(layout) -# layout.add_widget(data.error) -# layout2 = Layout([1]) -# self.add_layout(layout2) -# layout2.add_widget(Button("Ok", self._ok), 0) -# self.fix() - -# def _ok(self): -# self.save() -# global data -# raise NextScene(data.switch) - def test(num, percent, numPair, percentPair, iden, rev): defaults = {"high": "30%", "low": "20%"} if num.value == "" and percent.value == "": - # raise Exception("Please enter a value for either the number of occurrences or percent") raise Exception("Apply default of " + str(defaults[iden]) + " for the " + iden + " frequency, don't stop list, or edit value?", ["Yes", "Don't stop list", "Edit value"], True) if num.value != "" and percent.value != "": raise Exception("Pleae enter a value for only one " + iden + " field", ["Ok"], False) @@ -1245,7 +969,6 @@ def reset(selection): def main(args): global data data = PrepData() - # print("IN MAINNNNNNNNNNNNNNNNN") config = topicexplorer.config.read(args.config_file) @@ -1297,8 +1020,6 @@ def main(args): # candidates = stop_language(data.c, langs[lang]) # if len(candidates): # data.stoplist.update(candidates) - - # DO THIS AUTOMATICALLY, NOT NEED FOR SCENE, MAYBE HAVE SOME SORT OF INFO SCENE TO DISPLAY THIS INFO IN # Apply custom stopwords file if args.stopword_file: @@ -1310,8 +1031,6 @@ def main(args): print("Applying custom stopword file to remove {} word{}.".format( len(candidates), 's' if len(candidates) > 1 else '')) data.stoplist.update(candidates) - - # DO THIS AUTOMATICALLY BASED OFF ARGS if args.min_word_len: candidates = get_small_words(data.c, args.min_word_len) @@ -1320,63 +1039,35 @@ def main(args): len(candidates), 's' if len(candidates) > 1 else '', args.min_word_len)) data.stoplist.update(candidates) - # DO THIS AUTOMATICALLY BASED OFF ARGS, NOT THE FIRST IF - # TODO TEST USUAL BEHAVIOR - # cache item counts data.items, data.counts = get_corpus_counts(data.c) - # if args.high_filter is None and args.high_percent is None and not args.quiet: - # args.high_filter, candidates = get_high_filter(c, words=stoplist, items=items, counts=counts) - # if len(candidates): - # print("Filtering {} high frequency word{}.".format(len(candidates), - # 's' if len(candidates) > 1 else '')) - # stoplist.update(candidates) - # elif args.high_filter is None and args.high_percent is None and args.quiet: if args.high_filter is None and args.high_percent is None and args.quiet: pass elif args.high_filter: candidates = get_candidate_words(data.c, args.high_filter, sort=False, items=data.items, counts=data.counts) if len(candidates): - # print("Filtering {} high frequency word{}.".format(len(candidates), - # 's' if len(candidates) > 1 else '')) data.highCandidates = candidates data.highLabel._value = args.high_filter - # data.stoplist.update(candidates) elif args.high_percent: args.high_filter = get_closest_bin(data.c, 1 - (args.high_percent / 100.), counts=data.counts) print(args.high_filter) candidates = get_candidate_words(data.c, args.high_filter, sort=False, items=data.items, counts=data.counts) if len(candidates): - print("Filtering {} high frequency word{}.".format(len(candidates), - 's' if len(candidates) > 1 else '')) data.stoplist.update(candidates) - - # DO THIS AUTOMATICALLY BASE OFF ARGS, NOT THE FIRST IF - # TODO TEST USUAL BEHAVIOR - # if args.low_filter is None and args.low_percent is None and not args.quiet: - # args.low_filter, candidates = get_low_filter(c, words=stoplist, items=items, counts=counts) - # if len(candidates): - # print("Filtering {} low frequency word{}.".format(len(candidates), - # 's' if len(candidates) > 1 else '')) - # stoplist.update(candidates) - # elif args.low_filter is None and args.low_percent is None and args.quiet: + if args.low_filter is None and args.low_percent is None and args.quiet: pass elif args.low_filter: candidates = get_candidate_words(data.c, -1 * args.low_filter, sort=False, items=data.items, counts=data.counts) if len(candidates): - # print("Filtering {} low frequency words.".format(len(candidates))) data.lowCandidates = candidates data.lowLabel._value = args.low_filter - # data.stoplist.update(candidates) elif args.low_percent: args.low_filter = get_closest_bin(data.c, 1 - (args.low_percent / 100.), reverse=True, counts=data.counts) print(args.low_filter) candidates = get_candidate_words(data.c, -1 * args.low_filter, sort=False, items=data.items, counts=data.counts) if len(candidates): - print("Filtering {} low frequency word{}.".format(len(candidates), - 's' if len(candidates) > 1 else '')) data.stoplist.update(candidates) def gui(screen, scene): @@ -1384,17 +1075,14 @@ def gui(screen, scene): Scene([Summary(screen)], -1, name="Summary"), Scene([HighFreq(screen)], -1, name="High Freq"), Scene([LowFreq(screen)], -1, name="Low Freq") - # Scene([Lang(screen)], -1, name="Lang") ] global data data.wholeScreen = screen screen.play(scenes, stop_on_resize=True, start_scene=scene) - # global data data.prepSize.text = str("Prepared corpus unique words: " + str(len(data.c))) last_scene = None - # global data = PrepData() while True: try: Screen.wrapper(gui, catch_interrupt=True, arguments=[last_scene]) @@ -1403,19 +1091,9 @@ def gui(screen, scene): except ResizeScreenError as e: last_scene = e.scene - # DO THIS WHEN PREPPING MAYBE? THE EXIT PORTION - # TODO TEST WHEN THIS HAPPENS, PUT IN SCREEN AFTER PREP - - print("out of the loop") - data.stoplist.update(data.highCandidates) data.stoplist.update(data.lowCandidates) data.stoplist.update(data.stopCandidates) - print(data.highCandidates) - # print(data.highFiltered) - print(data.lowCandidates) - # print(data.lowFiltered) - print(data.stopCandidates) if not data.stoplist: print("No stopwords applied.\n\n") @@ -1428,8 +1106,6 @@ def gui(screen, scene): print(len(data.c)) print("\n") - # LEAVE THE REST, TILL THE END OF THIS METHOD AS IS - def name_corpus(dirname, languages, lowfreq=None, highfreq=None): corpus_name = [dirname] From 4f8b7995ad28c020d5bfc5eb2ca9cd53822578b8 Mon Sep 17 00:00:00 2001 From: Kirtan Sakariya Date: Mon, 12 Nov 2018 13:30:08 -0500 Subject: [PATCH 04/21] removing booleans meant for highlighting fields --- topicexplorer/prep.py | 83 +++++-------------------------------------- 1 file changed, 8 insertions(+), 75 deletions(-) diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py index 4afffc8b..07be621a 100644 --- a/topicexplorer/prep.py +++ b/topicexplorer/prep.py @@ -417,21 +417,17 @@ class PrepData(Frame): def __init__(self): self.stoplist = set() self.label = Label("change this") - self.summaryHigh = Text(label="Number of word frequency:", name="summaryHighFreq", on_change=self.summaryHighNumFocus) - self.summaryHighPercent = Text("Percent of words:", "summaryHighPercent", on_change=self.summaryHighPercentFocus) - self.summaryHighFocus = False - self.high = Text("High frequency word filter (#):", "highFreq", on_change=self.highNumFocus) - self.highPercent = Text("High ferquency word filter (%):", "highPercent", on_change=self.highPercentFocus) + self.summaryHigh = Text(label="Number of word frequency:", name="summaryHighFreq") + self.summaryHighPercent = Text("Percent of words:", "summaryHighPercent") + self.high = Text("High frequency word filter (#):", "highFreq") + self.highPercent = Text("High ferquency word filter (%):", "highPercent") self.highLabel = Label("high label", height=35) - self.highFocus = False self.highCandidates = [] - self.summaryLow = Text("Number of word frequency:", "summaryLowFreq", on_change=self.summaryLowNumFocus) - self.summaryLowPercent = Text("Percent of words:", "summaryLowPercent", on_change=self.summaryLowPercentFocus) - self.summaryLowFocus = False - self.low = Text("Low frequency word filter (#):", "lowFreq", on_change=self.lowNumFocus) - self.lowPercent = Text("Low frequency word filter (%):", "lowPercent", on_change=self.lowPercentFocus) + self.summaryLow = Text("Number of word frequency:", "summaryLowFreq") + self.summaryLowPercent = Text("Percent of words:", "summaryLowPercent") + self.low = Text("Low frequency word filter (#):", "lowFreq") + self.lowPercent = Text("Low frequency word filter (%):", "lowPercent") self.lowLabel = Label("low label", height=35) - self.lowFocus = False self.lowCandidates = [] self.minWord = Text("Minimum word length: ", "length") self.counter = 0 @@ -442,61 +438,6 @@ def __init__(self): self.englishCandidates = [] self.prepSize = Label("need to update length", align="^") - def summaryHighPercentFocus(self): - if self.summaryHighFocus: - self.summaryHighFocus = False - self.summaryHigh.blur() - if self.summaryLowFocus: - self.summaryLowFocus = False - self.summaryLow.blur() - self.summaryLowPercent.blur() - - def summaryHighNumFocus(self): - if self.summaryHighFocus: - self.summaryHighFocus = False - self.summaryHighPercent.blur() - if self.summaryLowFocus: - self.summaryLowFocus = False - self.summaryLow.blur() - self.summaryLowPercent.blur() - - def highPercentFocus(self): - if self.highFocus: - self.highFocus = False - self.high.blur() - - def highNumFocus(self): - if self.highFocus: - self.highFocus = False - self.highPercent.blur() - - def summaryLowPercentFocus(self): - if self.summaryLowFocus: - self.summaryLowFocus = False - self.summaryLow.blur() - if self.summaryHighFocus: - self.summaryHighFocus = False - self.summaryHigh.blur() - self.summaryHighPercent.blur() - - def summaryLowNumFocus(self): - if self.summaryLowFocus: - self.summaryLowFocus = False - self.summaryLowPercent.blur() - if self.summaryHighFocus: - self.summaryHigh.blur() - self.summaryHighPercent.blur() - - def lowPercentFocus(self): - if self.lowFocus: - self.lowFocus = False - self.low.blur() - - def lowNumFocus(self): - if self.lowFocus: - self.lowFocus = False - self.highPercent.blur() - class Summary(Frame): def __init__(self, screen): super(Summary, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True, @@ -588,7 +529,6 @@ def _prepHigh(selection): else: data.summaryHighPercent.focus() data.summaryHigh.focus() - data.summaryHighFocus = True confirm() @staticmethod @@ -601,7 +541,6 @@ def _prepLow(selection): else: data.summaryLowPercent.focus() data.summaryLow.focus() - data.summaryLowFocus = True confirm() def _high(self): @@ -635,7 +574,6 @@ def _popupHigh(selection): else: data.summaryHighPercent.focus() data.summaryHigh.focus() - data.summaryHighFocus = True confirm() return high = test(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False) @@ -680,7 +618,6 @@ def _popupLow(selection): else: data.summaryLowPercent.focus() data.summaryLow.focus() - data.summaryLowFocus = True confirm() return low = test(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True) @@ -750,7 +687,6 @@ def _popup(selection): else: data.highPercent.focus() data.high.focus() - data.highFocus = True confirm() return high = test(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False) @@ -795,7 +731,6 @@ def _popupChange(selection): else: data.highPercent.focus() data.high.focus() - data.highFocus = True confirm() return high = test(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False) @@ -857,7 +792,6 @@ def _popup(selection): else: data.lowPercent.focus() data.low.focus() - data.lowFocus = True confirm() return low = test(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True) @@ -902,7 +836,6 @@ def _popupChange(selection): else: data.lowPercent.focus() data.low.focus() - data.lowFocus = True confirm() return low = test(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True) From 07c39d38124114c9e18abe55293a7ca8116c18ea Mon Sep 17 00:00:00 2001 From: Kirtan Sakariya Date: Mon, 12 Nov 2018 13:48:53 -0500 Subject: [PATCH 05/21] added comments --- topicexplorer/prep.py | 77 +++++++++++++++++++++++++++++++++---------- 1 file changed, 60 insertions(+), 17 deletions(-) diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py index 07be621a..e700f78f 100644 --- a/topicexplorer/prep.py +++ b/topicexplorer/prep.py @@ -438,7 +438,9 @@ def __init__(self): self.englishCandidates = [] self.prepSize = Label("need to update length", align="^") +# Initial landing scene class Summary(Frame): + # Makes the layout of the scene def __init__(self, screen): super(Summary, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True, title="Summary", reduce_cpu=True) @@ -483,27 +485,31 @@ def __init__(self, screen): layout2.add_widget(Button("exit", self._exit), 3) self.fix() - # proceeds to scene with chart that displays with current settings + # Preps the corpus def _prep(self): self.save() global data minNum = 3 + # Ensure that there is a valid value for one of the high fields try: - high = test(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False) + high = validate(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False) except Exception as e: self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._prepHigh)) return + # Ensure that there is a valid value for one of the low fields try: - low = test(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True) + low = validate(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True) except Exception as e: self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._prepLow)) return + # Ensure there is a valid calue for the min word field if data.minWord.value != "": try: minNum = int(data.minWord.value) except Exception as e: self._scene.add_effect(PopUpDialog(self._screen, "Please enter a valid value for Minimum Word Length", ["OK"])) return + # Apply English stopwords if the checkbox is selected if data.english.value: data.englishCandidates = stop_language(data.c, "english") data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, @@ -513,12 +519,14 @@ def _prep(self): data.stopCandidates = get_small_words(data.c, minNum) raise StopApplication("Quitting") + # Reset highlighting of fields @staticmethod def _fix(selection): global data data.summaryHighPercent.blur() data.summaryHigh.blur() + # Handle button clicks for high value popup @staticmethod def _prepHigh(selection): global data @@ -531,6 +539,7 @@ def _prepHigh(selection): data.summaryHigh.focus() confirm() + # Handle button clicks for low value popup @staticmethod def _prepLow(selection): global data @@ -543,11 +552,13 @@ def _prepLow(selection): data.summaryLow.focus() confirm() + # Handle button click of the button on the Summary scene def _high(self): self.save() global data + # Determine if one of the high values are valid try: - high = test(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False) + high = validate(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False) except Exception as e: if e.args[2]: self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popupHigh)) @@ -564,6 +575,7 @@ def _high(self): data.highLabel.text += filtered raise NextScene("High Freq") + # Handle button clicks for high popup @staticmethod def _popupHigh(selection): global data @@ -576,7 +588,7 @@ def _popupHigh(selection): data.summaryHigh.focus() confirm() return - high = test(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False) + high = validate(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False) data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=high) temp = deepcopy(data.stoplist) @@ -586,11 +598,13 @@ def _popupHigh(selection): data.highLabel.text += filtered raise NextScene("High Freq") + # Handle button click of the button on the Summary scene def _low(self): self.save() global data + # Determine if one of the low values are valid try: - low = test(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True) + low = validate(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True) except Exception as e: if e.args[2]: self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popupLow)) @@ -608,6 +622,7 @@ def _low(self): data.lowLabel.text += filtered raise NextScene("Low Freq") + # Handle button clicks for low popup @staticmethod def _popupLow(selection): global data @@ -620,7 +635,7 @@ def _popupLow(selection): data.summaryLow.focus() confirm() return - low = test(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True) + low = validate(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True) data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=low) temp = deepcopy(data.stoplist) @@ -630,13 +645,15 @@ def _popupLow(selection): data.lowLabel.text += filtered raise NextScene("Low Freq") - # exits without prepping + # Exits without prepping @staticmethod def _exit(): sys.exit(0) raise StopApplication("Quitting") +# High frequency scene class HighFreq(Frame): + # Loads in the scene layout def __init__(self, screen): super(HighFreq, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True, title="High Frequency Word Filter", reduce_cpu=True) @@ -654,12 +671,14 @@ def __init__(self, screen): layout2.add_widget(Button("Update", self._change), 1) self.fix() + # Handle button click of Ok def _ok(self): self.save() global data + # Determines if one of the high values are valid try: - high = test(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False) + high = validate(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False) except Exception as e: if e.args[2]: self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popup)) @@ -678,8 +697,10 @@ def _ok(self): updatePreppedLength() raise NextScene("Summary") + # Handle button clicks for high popup @staticmethod def _popup(selection): + # Handle the selections if str(selection) == "0": data.highPercent._value = "30.0" elif str(selection) == "1": @@ -689,7 +710,7 @@ def _popup(selection): data.high.focus() confirm() return - high = test(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False) + high = validate(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False) data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=high) temp = deepcopy(data.stoplist) @@ -700,12 +721,14 @@ def _popup(selection): raise NextScene("Summary") + # Handle button click for Update def _change(self): self.save() global data + # Determine if one of the high values are valid try: - high = test(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False) + high = validate(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False) except Exception as e: if e.args[2]: self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popupChange)) @@ -722,8 +745,10 @@ def _change(self): num=high) data.highLabel.text += filtered + # Handle button click for popup after clicking change @staticmethod def _popupChange(selection): + # Handle the selections if str(selection) == "0": data.highPercent._value = "30.0" elif str(selection) == "1": @@ -733,7 +758,7 @@ def _popupChange(selection): data.high.focus() confirm() return - high = test(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False) + high = validate(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False) data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=high) temp = deepcopy(data.stoplist) @@ -742,7 +767,9 @@ def _popupChange(selection): num=high) data.highLabel.text += filtered +# Low frequency scene class LowFreq(Frame): + # Loads in the scene layout def __init__(self, screen): super(LowFreq, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True, title="Low Frequency Word Filter", reduce_cpu=True) @@ -760,11 +787,14 @@ def __init__(self, screen): layout2.add_widget(Button("Update", self._change), 1) self.fix() + # Handle button click of Ok def _ok(self): self.save() global data + + # Determines if one of the low values are valid try: - low = test(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True) + low = validate(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True) except Exception as e: if e.args[2]: self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popup)) @@ -783,8 +813,10 @@ def _ok(self): updatePreppedLength() raise NextScene("Summary") + # Handle button clicks for low popup @staticmethod def _popup(selection): + # Handle the selections if str(selection) == "0": data.lowPercent._value = "20.0" elif str(selection) == "1": @@ -794,7 +826,7 @@ def _popup(selection): data.low.focus() confirm() return - low = test(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True) + low = validate(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True) data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=low) @@ -806,11 +838,14 @@ def _popup(selection): raise NextScene("Summary") + # Handle button click for Update def _change(self): self.save() global data + + # Determine if one of the low values are valid try: - low = test(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True) + low = validate(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True) except Exception as e: if e.args[2]: self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popupChange)) @@ -827,8 +862,11 @@ def _change(self): num=low) data.lowLabel.text += filtered + + # Handle button click for popup after clicking change @staticmethod def _popupChange(selection): + # Handle the selections if str(selection) == "0": data.lowPercent._value = "20.0" elif str(selection) == "1": @@ -838,7 +876,7 @@ def _popupChange(selection): data.low.focus() confirm() return - low = test(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True) + low = validate(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True) data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=low) temp = deepcopy(data.stoplist) @@ -847,7 +885,8 @@ def _popupChange(selection): num=low) data.lowLabel.text += filtered -def test(num, percent, numPair, percentPair, iden, rev): +# Determin if the values for the num and percent fields are valid +def validate(num, percent, numPair, percentPair, iden, rev): defaults = {"high": "30%", "low": "20%"} if num.value == "" and percent.value == "": raise Exception("Apply default of " + str(defaults[iden]) + " for the " + iden + " frequency, don't stop list, or edit value?", ["Yes", "Don't stop list", "Edit value"], True) @@ -870,6 +909,8 @@ def test(num, percent, numPair, percentPair, iden, rev): raise Exception(msg, ["Ok"], False) return ret +# Update the prepped length by storing c and stoplist in temp varibles, +# then updaing the originals, and then restoring the originals def updatePreppedLength(): global data temp = deepcopy(data.stoplist) @@ -882,12 +923,14 @@ def updatePreppedLength(): tempC.in_place_stoplist(temp) data.prepSize.text = str("Prepared corpus unique words: " + str(len(tempC))) +# Highlight the necessary fields def confirm(): global data tempScreen = data.wholeScreen.current_scene._effects[0]._screen tempScene = data.wholeScreen.current_scene tempScene.add_effect(PopUpDialog(tempScreen, "Please input a value in one of the highlighted fields", ["OK"], on_close=reset)) +# Reset all highlighted fields def reset(selection): global data data.summaryHigh.blur() From 47891977472fff037ab626af7c240136148ac8f7 Mon Sep 17 00:00:00 2001 From: Kirtan Sakariya Date: Mon, 12 Nov 2018 13:51:57 -0500 Subject: [PATCH 06/21] removed most unused variables --- topicexplorer/prep.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py index e700f78f..ac5dc809 100644 --- a/topicexplorer/prep.py +++ b/topicexplorer/prep.py @@ -291,8 +291,6 @@ def get_closest_bin(c, thresh, reverse=False, counts=None): def get_high_filter_chart(c, words=None, items=None, counts=None, num=None): import numpy as np - header = "FILTER HIGH FREQUENCY WORDS" - stars = old_div((80 - len(header) - 2), 2) # Get frequency bins if items is None or counts is None: @@ -304,7 +302,6 @@ def get_high_filter_chart(c, words=None, items=None, counts=None, num=None): ret = "" - high_filter = False bin_counts, bins = np.histogram(counts, bins=bins) ret += "{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Top', '% of corpus', "# words", "Rate") + "\n" last_row = 0 @@ -327,7 +324,6 @@ def get_high_filter_chart(c, words=None, items=None, counts=None, num=None): def get_high_filter_stops(c, words=None, items=None, counts=None, num=None): import numpy as np input_filter = num - accept = None try: candidates = get_candidate_words(c, input_filter, words=words, items=items, counts=counts) places = np.in1d(c.words, candidates) @@ -351,8 +347,6 @@ def get_high_filter_stops(c, words=None, items=None, counts=None, num=None): def get_low_filter_chart(c, words=None, items=None, counts=None, num=None): import numpy as np - header = "FILTER LOW FREQUENCY WORDS" - stars = old_div((80 - len(header) - 2), 2) # Get frequency bins if items is None or counts is None: @@ -364,7 +358,6 @@ def get_low_filter_chart(c, words=None, items=None, counts=None, num=None): ret = "" - low_filter = False bin_counts, bins = np.histogram(counts[counts.argsort()[::-1]], bins=bins) ret += "{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Bottom', '% of corpus', "# words", "Rate") + "\n" last_row = 0 @@ -388,7 +381,6 @@ def get_low_filter_chart(c, words=None, items=None, counts=None, num=None): def get_low_filter_stops(c, words=None, items=None, counts=None, num=None): import numpy as np input_filter = num - accept = None try: candidates = get_candidate_words(c, -input_filter, words=words, items=items, counts=counts) From 9299811f097c4dfff698dd787fbe4c8a93c17fc4 Mon Sep 17 00:00:00 2001 From: Kirtan Sakariya Date: Wed, 14 Nov 2018 17:30:53 -0500 Subject: [PATCH 07/21] making text fields on summary smaller --- topicexplorer/prep.py | 53 ++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py index ac5dc809..1d371932 100644 --- a/topicexplorer/prep.py +++ b/topicexplorer/prep.py @@ -409,14 +409,22 @@ class PrepData(Frame): def __init__(self): self.stoplist = set() self.label = Label("change this") - self.summaryHigh = Text(label="Number of word frequency:", name="summaryHighFreq") - self.summaryHighPercent = Text("Percent of words:", "summaryHighPercent") + # self.summaryHigh = Text(label="Words:", name="summaryHighFreq") + self.summaryHighText = Label("Words:", align=">") + self.summaryHigh = Text(label="") + # self.summaryHighPercent = Text("Percent of words:", "summaryHighPercent") + self.summaryHighPercentText = Label("Percent:", align=">") + self.summaryHighPercent = Text(label="") self.high = Text("High frequency word filter (#):", "highFreq") self.highPercent = Text("High ferquency word filter (%):", "highPercent") self.highLabel = Label("high label", height=35) self.highCandidates = [] - self.summaryLow = Text("Number of word frequency:", "summaryLowFreq") - self.summaryLowPercent = Text("Percent of words:", "summaryLowPercent") + # self.summaryLow = Text("Number of word frequency:", "summaryLowFreq") + self.summaryLowText = Label("Words:", align=">") + self.summaryLow = Text(label="") + # self.summaryLowPercent = Text("Percent of words:", "summaryLowPercent") + self.summaryLowPercentText = Label("Percent:", align=">") + self.summaryLowPercent = Text(label="") self.low = Text("Low frequency word filter (#):", "lowFreq") self.lowPercent = Text("Low frequency word filter (%):", "lowPercent") self.lowLabel = Label("low label", height=35) @@ -440,35 +448,48 @@ def __init__(self, screen): global data highTitle = Layout([100]) - highOptions = Layout([1, 1]) self.add_layout(highTitle) - self.add_layout(highOptions) - lowTitle = Layout([100]) - lowOptions = Layout([1, 1]) - self.add_layout(lowTitle) - self.add_layout(lowOptions) - layout = Layout([100], fill_frame=True) - self.add_layout(layout) - highTitle.add_widget(Divider(height=1, line_char=" ")) highTitle.add_widget(Label("High Frequency Word Filter", align="^")) - highOptions.add_widget(data.summaryHigh, 0) + + highOptions = Layout([10, 1, 9]) + self.add_layout(highOptions) + highOptions.add_widget(data.summaryHighText, 0) + highOptions.add_widget(data.summaryHigh, 1) + highOptions.add_widget(Label(""), 2) + highOptions.add_widget(data.summaryHighPercentText, 0) highOptions.add_widget(data.summaryHighPercent, 1) + highOptions.add_widget(Label(""), 2) highOptions.add_widget(Divider(height=1, line_char="-"), 0) highOptions.add_widget(Divider(height=1, line_char="-"), 1) - + highOptions.add_widget(Divider(height=1, line_char="-"), 2) + + lowTitle = Layout([100]) + self.add_layout(lowTitle) lowTitle.add_widget(Label("Low Frequency Word Filter", align="^")) - lowOptions.add_widget(data.summaryLow, 0) + + lowOptions = Layout([10, 1, 9]) + self.add_layout(lowOptions) + lowOptions.add_widget(data.summaryLowText, 0) + lowOptions.add_widget(data.summaryLow, 1) + lowOptions.add_widget(Label(""), 2) + lowOptions.add_widget(data.summaryLowPercentText, 0) lowOptions.add_widget(data.summaryLowPercent, 1) + lowOptions.add_widget(Label(""), 2) lowOptions.add_widget(Divider(height=1, line_char="-"), 0) lowOptions.add_widget(Divider(height=1, line_char="-"), 1) + lowOptions.add_widget(Divider(height=1, line_char="-"), 2) lowOptions.add_widget(Divider(height=1, line_char=" "), 0) lowOptions.add_widget(Divider(height=1, line_char=" "), 1) + lowOptions.add_widget(Divider(height=1, line_char=" "), 2) + layout = Layout([100], fill_frame=True) + self.add_layout(layout) layout.add_widget(data.english) layout.add_widget(data.minWord) layout.add_widget(Label("Original corpus unique words: " + str(data.c.original_length), align="^")) layout.add_widget(data.prepSize) + layout2 = Layout([1, 1, 1, 1]) self.add_layout(layout2) layout2.add_widget(Button("prep", self._prep), 0) From 0cc848d3a9cb2741ac27f0eaeff6c04836d54739 Mon Sep 17 00:00:00 2001 From: Kirtan Sakariya Date: Tue, 20 Nov 2018 21:26:05 -0500 Subject: [PATCH 08/21] need to handle file selection in file browser --- topicexplorer/prep.py | 202 +++++++++++++++++++++++++++++++----------- 1 file changed, 149 insertions(+), 53 deletions(-) diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py index 1d371932..0372fb0d 100644 --- a/topicexplorer/prep.py +++ b/topicexplorer/prep.py @@ -131,7 +131,7 @@ from topicexplorer.lib.util import isint, is_valid_configfile, bool_prompt from asciimatics.widgets import Frame, ListBox, Layout, Divider, Text, \ - Button, TextBox, Widget, Label, PopUpDialog, PopupMenu, CheckBox + Button, TextBox, Widget, Label, PopUpDialog, PopupMenu, CheckBox, FileBrowser, KeyboardEvent from asciimatics.scene import Scene from asciimatics.screen import Screen from asciimatics.exceptions import ResizeScreenError, NextScene, StopApplication @@ -409,27 +409,27 @@ class PrepData(Frame): def __init__(self): self.stoplist = set() self.label = Label("change this") - # self.summaryHigh = Text(label="Words:", name="summaryHighFreq") - self.summaryHighText = Label("Words:", align=">") - self.summaryHigh = Text(label="") - # self.summaryHighPercent = Text("Percent of words:", "summaryHighPercent") - self.summaryHighPercentText = Label("Percent:", align=">") - self.summaryHighPercent = Text(label="") - self.high = Text("High frequency word filter (#):", "highFreq") - self.highPercent = Text("High ferquency word filter (%):", "highPercent") + self.summaryHigh = Text(label=" Words:", name="summaryHighFreq", max_length=5) + # self.summaryHighText = Label("Words:", align=">") + # self.summaryHigh = Text(label="") + self.summaryHighPercent = Text("Percent:", "summaryHighPercent", max_length=5) + # self.summaryHighPercentText = Label("Percent:", align=">") + # self.summaryHighPercent = Text(label="") + self.high = Text("High frequency word filter (#):", "highFreq", max_length=5) + self.highPercent = Text("High ferquency word filter (%):", "highPercent", max_length=5) self.highLabel = Label("high label", height=35) self.highCandidates = [] - # self.summaryLow = Text("Number of word frequency:", "summaryLowFreq") - self.summaryLowText = Label("Words:", align=">") - self.summaryLow = Text(label="") - # self.summaryLowPercent = Text("Percent of words:", "summaryLowPercent") - self.summaryLowPercentText = Label("Percent:", align=">") - self.summaryLowPercent = Text(label="") - self.low = Text("Low frequency word filter (#):", "lowFreq") - self.lowPercent = Text("Low frequency word filter (%):", "lowPercent") + self.summaryLow = Text(" Words:", "summaryLowFreq", max_length=5) + # self.summaryLowText = Label("Words:", align=">") + # self.summaryLow = Text(label="") + self.summaryLowPercent = Text("Percent:", "summaryLowPercent", max_length=5) + # self.summaryLowPercentText = Label("Percent:", align=">") + # self.summaryLowPercent = Text(label="") + self.low = Text("Low frequency word filter (#):", "lowFreq", max_length=5) + self.lowPercent = Text("Low frequency word filter (%):", "lowPercent", max_length=5) self.lowLabel = Label("low label", height=35) self.lowCandidates = [] - self.minWord = Text("Minimum word length: ", "length") + self.minWord = Text("Minimum word length:", "length", max_length=5) self.counter = 0 self.error = Label("Error message") self.switch = 0 @@ -437,65 +437,91 @@ def __init__(self): self.english = CheckBox("Yes", label="Apply English stopwords") self.englishCandidates = [] self.prepSize = Label("need to update length", align="^") + self.stopwordFile = Label("Current stopworded file: ", align="^") # Initial landing scene class Summary(Frame): # Makes the layout of the scene def __init__(self, screen): - super(Summary, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True, + super(Summary, self).__init__(screen, screen.height, screen.width, hover_focus=True, title="Summary", reduce_cpu=True) global data + # super().set_theme("green") + highTitle = Layout([100]) self.add_layout(highTitle) highTitle.add_widget(Divider(height=1, line_char=" ")) - highTitle.add_widget(Label("High Frequency Word Filter", align="^")) + highTitle.add_widget(Label("High Frequency Word Filter\n--------------------------", align="^", height=2)) - highOptions = Layout([10, 1, 9]) + highOptions = Layout([7, 2, 6]) self.add_layout(highOptions) - highOptions.add_widget(data.summaryHighText, 0) + # highOptions.add_widget(data.summaryHighText, 0) + # highOptions.add_widget(data.summaryHigh, 1) highOptions.add_widget(data.summaryHigh, 1) - highOptions.add_widget(Label(""), 2) - highOptions.add_widget(data.summaryHighPercentText, 0) + # highOptions.add_widget(Label(""), 2) + # highOptions.add_widget(data.summaryHighPercentText, 0) + # highOptions.add_widget(data.summaryHighPercent, 1) highOptions.add_widget(data.summaryHighPercent, 1) - highOptions.add_widget(Label(""), 2) - highOptions.add_widget(Divider(height=1, line_char="-"), 0) - highOptions.add_widget(Divider(height=1, line_char="-"), 1) - highOptions.add_widget(Divider(height=1, line_char="-"), 2) + # highOptions.add_widget(Label(""), 2) + + highButton = Layout([1]) + self.add_layout(highButton) + highButton.add_widget(Divider(height=1, line_char=" "), 0) + highButton.add_widget(Button("High frequency wizard", self._high), 0) + highButton.add_widget(Divider(height=2, line_char="-"), 0) + highButton.add_widget(Divider(height=1, line_char=" "), 0) lowTitle = Layout([100]) self.add_layout(lowTitle) - lowTitle.add_widget(Label("Low Frequency Word Filter", align="^")) + lowTitle.add_widget(Label("Low Frequency Word Filter\n-------------------------", align="^", height=2)) - lowOptions = Layout([10, 1, 9]) + lowOptions = Layout([7, 2, 6]) self.add_layout(lowOptions) - lowOptions.add_widget(data.summaryLowText, 0) + # lowOptions.add_widget(data.summaryLowText, 0) + # lowOptions.add_widget(data.summaryLow, 1) lowOptions.add_widget(data.summaryLow, 1) - lowOptions.add_widget(Label(""), 2) - lowOptions.add_widget(data.summaryLowPercentText, 0) + # lowOptions.add_widget(Label(""), 2) + # lowOptions.add_widget(data.summaryLowPercentText, 0) + # lowOptions.add_widget(data.summaryLowPercent, 1) lowOptions.add_widget(data.summaryLowPercent, 1) - lowOptions.add_widget(Label(""), 2) - lowOptions.add_widget(Divider(height=1, line_char="-"), 0) - lowOptions.add_widget(Divider(height=1, line_char="-"), 1) - lowOptions.add_widget(Divider(height=1, line_char="-"), 2) - lowOptions.add_widget(Divider(height=1, line_char=" "), 0) - lowOptions.add_widget(Divider(height=1, line_char=" "), 1) - lowOptions.add_widget(Divider(height=1, line_char=" "), 2) - - layout = Layout([100], fill_frame=True) - self.add_layout(layout) - layout.add_widget(data.english) - layout.add_widget(data.minWord) - layout.add_widget(Label("Original corpus unique words: " + str(data.c.original_length), align="^")) - layout.add_widget(data.prepSize) + # lowOptions.add_widget(Label(""), 2) + + lowButton = Layout([1]) + self.add_layout(lowButton) + lowButton.add_widget(Divider(height=1, line_char=" "), 0) + lowButton.add_widget(Button("Low frequency wizard", self._low), 0) + lowButton.add_widget(Divider(height=2, line_char="-"), 0) + lowButton.add_widget(Divider(height=1, line_char=" "), 0) + + stopwordHeader = Layout([1]) + self.add_layout(stopwordHeader) + stopwordHeader.add_widget(Label("Stopwords\n---------", align="^", height=2), 0) + + stopwords = Layout([8, 6, 4]) + self.add_layout(stopwords) + stopwords.add_widget(data.english, 1) + + stopMinWords = Layout([8, 5, 5]) + self.add_layout(stopMinWords) + stopMinWords.add_widget(data.minWord, 1) + + stopwordFileLayout = Layout([1]) + self.add_layout(stopwordFileLayout) + stopwordFileLayout.add_widget(data.stopwordFile, 0) + stopwordFileLayout.add_widget(Divider(height=1, line_char=" "), 0) + stopwordFileLayout.add_widget(Button("Select new file", self._chooseFile), 0) + stopwordFileLayout.add_widget(Divider(height=2, line_char=" "), 0) + # layout.add_widget(Label("Original corpus unique words: " + str(data.c.original_length), align="^")) + # layout.add_widget(data.prepSize) - layout2 = Layout([1, 1, 1, 1]) + layout2 = Layout([1, 1]) self.add_layout(layout2) layout2.add_widget(Button("prep", self._prep), 0) - layout2.add_widget(Button("high", self._high), 1) - layout2.add_widget(Button("low", self._low), 2) - layout2.add_widget(Button("exit", self._exit), 3) + # layout2.add_widget(Button("high", self._high), 1) + # layout2.add_widget(Button("low", self._low), 2) + layout2.add_widget(Button("exit", self._exit), 1) self.fix() # Preps the corpus @@ -658,6 +684,9 @@ def _popupLow(selection): data.lowLabel.text += filtered raise NextScene("Low Freq") + def _chooseFile(self): + raise NextScene("File Browser") + # Exits without prepping @staticmethod def _exit(): @@ -898,7 +927,73 @@ def _popupChange(selection): num=low) data.lowLabel.text += filtered -# Determin if the values for the num and percent fields are valid +# Taken from: https://github.com/peterbrittain/asciimatics/blob/master/samples/treeview.py +class Files(Frame): + def __init__(self, screen): + super(Files, self).__init__( + screen, screen.height, screen.width, has_border=False) + + # Create the (very simple) form layout... + layout = Layout([1], fill_frame=True) + self.add_layout(layout) + + # Now populate it with the widgets we want to use. + self._details = Text() + self._details.disabled = True + self._details.custom_colour = "field" + regex = "((?:\w+)(?:.)?(?:txt))|(\w+)$" + self._list = FileBrowser(Widget.FILL_FRAME, + os.path.abspath("."), + name="mc_list", + on_select=self.popup, + on_change=self.details, + file_filter=regex) + layout.add_widget(Label("Local disk browser sample")) + layout.add_widget(Divider()) + layout.add_widget(self._list) + layout.add_widget(Divider()) + layout.add_widget(self._details) + layout.add_widget(Label("Press Enter to select or `q` to quit.")) + + # Prepare the Frame for use. + self.fix() + + def popup(self): + # Just confirm whenever the user actually selects something. + if not self._list.value.endswith(".txt") and "." in self._list.value: + self._scene.add_effect(PopUpDialog(self._screen, "Please pick a valid file (a .txt file or a file with no extension)", ["OK"])) + else: + data.stopwordFile.text = "Current stopworded file: " + self._list.value + raise NextScene("Summary") + # self._scene.add_effect(PopUpDialog(self._screen, "You selected: {}".format(self._list.value), ["OK"])) + + def details(self): + # If python magic is installed, provide a little more detail of the current file. + if self._list.value: + if os.path.isdir(self._list.value): + self._details.value = "Directory" + elif os.path.isfile(self._list.value): + try: + self._details.value = magic.from_file(self._list.value) + except NameError: + self._details.value = "File (run 'pip install python-magic' for more details)" + else: + self._details.value = "--" + + def process_event(self, event): + # Do the key handling for this Frame. + global data + if isinstance(event, KeyboardEvent): + if event.key_code in [ord('q'), ord('Q'), Screen.ctrl("c")]: + raise NextScene("Summary") + elif event.key_code in [ord('c'), ord('C')]: + data.stopwordFile.text = "hello" + raise NextScene("Summary") + + # Now pass on to lower levels for normal handling of the event. + return super(Files, self).process_event(event) + +# Determine if the values for the num and percent fields are valid def validate(num, percent, numPair, percentPair, iden, rev): defaults = {"high": "30%", "low": "20%"} if num.value == "" and percent.value == "": @@ -1063,7 +1158,8 @@ def gui(screen, scene): scenes = [ Scene([Summary(screen)], -1, name="Summary"), Scene([HighFreq(screen)], -1, name="High Freq"), - Scene([LowFreq(screen)], -1, name="Low Freq") + Scene([LowFreq(screen)], -1, name="Low Freq"), + Scene([Files(screen)], -1, name="File Browser") ] global data data.wholeScreen = screen From fe80ac13bca133ebd834489d52da9bf13bbf2523 Mon Sep 17 00:00:00 2001 From: Kirtan Sakariya Date: Tue, 27 Nov 2018 15:16:18 -0500 Subject: [PATCH 09/21] handling invalid values for percentages --- topicexplorer/prep.py | 150 +++++++++++++++++++++++++++++++----------- 1 file changed, 113 insertions(+), 37 deletions(-) diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py index 0372fb0d..bcb6dedb 100644 --- a/topicexplorer/prep.py +++ b/topicexplorer/prep.py @@ -324,6 +324,7 @@ def get_high_filter_chart(c, words=None, items=None, counts=None, num=None): def get_high_filter_stops(c, words=None, items=None, counts=None, num=None): import numpy as np input_filter = num + valid = True try: candidates = get_candidate_words(c, input_filter, words=words, items=items, counts=counts) places = np.in1d(c.words, candidates) @@ -337,12 +338,13 @@ def get_high_filter_stops(c, words=None, items=None, counts=None, num=None): filtered += u' '.join(candidates) if len(candidates) == len(c.words): - filtered += "\n\nChoice of" + str(input_filter) + "will remove ALL words from the corpus." - filtered += "Please choose a different filter." + valid = False + # filtered += "\n\nChoice of" + str(input_filter) + "will remove ALL words from the corpus." + # filtered += "Please choose a different filter." except ValueError: input_filter = 0 - return (candidates, filtered) + return (candidates, filtered, valid) def get_low_filter_chart(c, words=None, items=None, counts=None, num=None): @@ -381,8 +383,8 @@ def get_low_filter_chart(c, words=None, items=None, counts=None, num=None): def get_low_filter_stops(c, words=None, items=None, counts=None, num=None): import numpy as np input_filter = num + valid = True try: - candidates = get_candidate_words(c, -input_filter, words=words, items=items, counts=counts) places = np.in1d(c.words, candidates) places = dict(zip(candidates, np.where(places)[0])) @@ -396,13 +398,14 @@ def get_low_filter_stops(c, words=None, items=None, counts=None, num=None): if len(candidates) == len(c.words): - filtered += "\n\nChoice of" + str(input_filter) + "will remove ALL words from the corpus." - filtered += "Please choose a different filter." + valid = False + # filtered += "\n\nChoice of" + str(input_filter) + "will remove ALL words from the corpus." + # filtered += "Please choose a different filter." except ValueError: input_filter = 0 - return (candidates, filtered) + return (candidates, filtered, valid) # Stores all of the variables for the labels class PrepData(Frame): @@ -417,7 +420,7 @@ def __init__(self): # self.summaryHighPercent = Text(label="") self.high = Text("High frequency word filter (#):", "highFreq", max_length=5) self.highPercent = Text("High ferquency word filter (%):", "highPercent", max_length=5) - self.highLabel = Label("high label", height=35) + self.highLabel = Label("high label", height=58) self.highCandidates = [] self.summaryLow = Text(" Words:", "summaryLowFreq", max_length=5) # self.summaryLowText = Label("Words:", align=">") @@ -427,7 +430,7 @@ def __init__(self): # self.summaryLowPercent = Text(label="") self.low = Text("Low frequency word filter (#):", "lowFreq", max_length=5) self.lowPercent = Text("Low frequency word filter (%):", "lowPercent", max_length=5) - self.lowLabel = Label("low label", height=35) + self.lowLabel = Label("low label", height=58) self.lowCandidates = [] self.minWord = Text("Minimum word length:", "length", max_length=5) self.counter = 0 @@ -437,7 +440,9 @@ def __init__(self): self.english = CheckBox("Yes", label="Apply English stopwords") self.englishCandidates = [] self.prepSize = Label("need to update length", align="^") + self.fileName = "" self.stopwordFile = Label("Current stopworded file: ", align="^") + self.fileCandidates = [] # Initial landing scene class Summary(Frame): @@ -512,16 +517,26 @@ def __init__(self, screen): stopwordFileLayout.add_widget(data.stopwordFile, 0) stopwordFileLayout.add_widget(Divider(height=1, line_char=" "), 0) stopwordFileLayout.add_widget(Button("Select new file", self._chooseFile), 0) - stopwordFileLayout.add_widget(Divider(height=2, line_char=" "), 0) + stopwordFileLayout.add_widget(Divider(height=2, line_char="-"), 0) + stopwordFileLayout.add_widget(Divider(height=1, line_char=" "), 0) # layout.add_widget(Label("Original corpus unique words: " + str(data.c.original_length), align="^")) # layout.add_widget(data.prepSize) + corpusLenLayout = Layout([1]) + self.add_layout(corpusLenLayout) + corpusLenLayout.add_widget(Label("Corpus Length\n-------------", align="^", height=2), 0) + corpusLenLayout.add_widget(Label("Original corpus unique works: " + str(data.c.original_length), align="^")) + corpusLenLayout.add_widget(data.prepSize) + corpusLenLayout.add_widget(Divider(height=1, line_char=" "), 0) + layout2 = Layout([1, 1]) self.add_layout(layout2) layout2.add_widget(Button("prep", self._prep), 0) + # layout2.add_widget(Divider(height=1, line_char="-"), 4) # layout2.add_widget(Button("high", self._high), 1) # layout2.add_widget(Button("low", self._low), 2) layout2.add_widget(Button("exit", self._exit), 1) + # layout2.add_widget(Divider(height=1, line_char="-"), 5) self.fix() # Preps the corpus @@ -551,10 +566,28 @@ def _prep(self): # Apply English stopwords if the checkbox is selected if data.english.value: data.englishCandidates = stop_language(data.c, "english") - data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + else: + data.englishCandidates = [] + # Get the stopwords from a file + if data.fileName != "": + with open(data.fileName, encoding='utf8') as swf: + data.fileCandidates = [word.strip() for word in swf] + + if len(data.fileCandidates): + print("Applying custom stopword file to remove {} word{}.".format( + len(data.fileCandidates), 's' if len(data.fileCandidates) > 1 else '')) + else: + data.fileCandidates = [] + data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=high) - data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + if not valid: + self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"])) + return + data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=low) + if not valid: + self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"])) + return data.stopCandidates = get_small_words(data.c, minNum) raise StopApplication("Quitting") @@ -595,6 +628,7 @@ def _prepLow(selection): def _high(self): self.save() global data + # Determine if one of the high values are valid try: high = validate(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False) @@ -604,8 +638,12 @@ def _high(self): else: self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1])) return - data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + + data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=high) + if not valid: + self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"])) + return temp = deepcopy(data.stoplist) temp.update(data.highCandidates) @@ -628,7 +666,7 @@ def _popupHigh(selection): confirm() return high = validate(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False) - data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=high) temp = deepcopy(data.stoplist) temp.update(data.highCandidates) @@ -641,6 +679,7 @@ def _popupHigh(selection): def _low(self): self.save() global data + # Determine if one of the low values are valid try: low = validate(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True) @@ -651,8 +690,11 @@ def _low(self): self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1])) return - data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=low) + if not valid: + self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"])) + return temp = deepcopy(data.stoplist) temp.update(data.lowCandidates) @@ -675,7 +717,7 @@ def _popupLow(selection): confirm() return low = validate(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True) - data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=low) temp = deepcopy(data.stoplist) temp.update(data.lowCandidates) @@ -697,7 +739,7 @@ def _exit(): class HighFreq(Frame): # Loads in the scene layout def __init__(self, screen): - super(HighFreq, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True, + super(HighFreq, self).__init__(screen, screen.height, screen.width, hover_focus=True, title="High Frequency Word Filter", reduce_cpu=True) global data @@ -728,8 +770,11 @@ def _ok(self): self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1])) return - data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=high) + if not valid: + self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"])) + return temp = deepcopy(data.stoplist) temp.update(data.highCandidates) @@ -753,7 +798,7 @@ def _popup(selection): confirm() return high = validate(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False) - data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + data.highCandidates, filtered, value = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=high) temp = deepcopy(data.stoplist) temp.update(data.highCandidates) @@ -778,8 +823,11 @@ def _change(self): self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1])) return - data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=high) + if not valid: + self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"])) + return temp = deepcopy(data.stoplist) temp.update(data.highCandidates) @@ -801,7 +849,7 @@ def _popupChange(selection): confirm() return high = validate(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False) - data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=high) temp = deepcopy(data.stoplist) temp.update(data.highCandidates) @@ -813,7 +861,7 @@ def _popupChange(selection): class LowFreq(Frame): # Loads in the scene layout def __init__(self, screen): - super(LowFreq, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True, + super(LowFreq, self).__init__(screen, screen.height, screen.width, hover_focus=True, title="Low Frequency Word Filter", reduce_cpu=True) global data @@ -844,8 +892,11 @@ def _ok(self): self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1])) return - data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=low) + if not valid: + self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"])) + return temp = deepcopy(data.stoplist) temp.update(data.lowCandidates) @@ -870,7 +921,7 @@ def _popup(selection): return low = validate(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True) - data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=low) temp = deepcopy(data.stoplist) temp.update(data.lowCandidates) @@ -895,8 +946,11 @@ def _change(self): self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1])) return - data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=low) + if not valid: + self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"])) + return temp = deepcopy(data.stoplist) temp.update(data.lowCandidates) @@ -919,7 +973,7 @@ def _popupChange(selection): confirm() return low = validate(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True) - data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=low) temp = deepcopy(data.stoplist) temp.update(data.lowCandidates) @@ -941,7 +995,7 @@ def __init__(self, screen): self._details = Text() self._details.disabled = True self._details.custom_colour = "field" - regex = "((?:\w+)(?:.)?(?:txt))|(\w+)$" + regex = "^([\w+\- ]*)(.txt)$" self._list = FileBrowser(Widget.FILL_FRAME, os.path.abspath("."), name="mc_list", @@ -953,7 +1007,7 @@ def __init__(self, screen): layout.add_widget(self._list) layout.add_widget(Divider()) layout.add_widget(self._details) - layout.add_widget(Label("Press Enter to select or `q` to quit.")) + layout.add_widget(Label("Press Enter to select, 'q' to quit without making changes, or 'c' to clear file selection.")) # Prepare the Frame for use. self.fix() @@ -964,6 +1018,7 @@ def popup(self): self._scene.add_effect(PopUpDialog(self._screen, "Please pick a valid file (a .txt file or a file with no extension)", ["OK"])) else: data.stopwordFile.text = "Current stopworded file: " + self._list.value + data.fileName = self._list.value raise NextScene("Summary") # self._scene.add_effect(PopUpDialog(self._screen, "You selected: {}".format(self._list.value), ["OK"])) @@ -987,7 +1042,8 @@ def process_event(self, event): if event.key_code in [ord('q'), ord('Q'), Screen.ctrl("c")]: raise NextScene("Summary") elif event.key_code in [ord('c'), ord('C')]: - data.stopwordFile.text = "hello" + data.stopwordFile.text = "Current stopword file: " + data.fileName = "" raise NextScene("Summary") # Now pass on to lower levels for normal handling of the event. @@ -1026,6 +1082,22 @@ def updatePreppedLength(): if data.english.value: data.englishCandidates = stop_language(tempC, "english") temp.update(data.englishCandidates) + if data.fileName != "": + with open(data.fileName, encoding='utf8') as swf: + data.fileCandidates = [word.strip() for word in swf] + + if len(data.fileCandidates): + print("Applying custom stopword file to remove {} word{}.".format( + len(data.fileCandidates), 's' if len(data.fileCandidates) > 1 else '')) + temp.update(data.fileCandidates) + minNum = 3 + if data.minWord.value != "": + try: + minNum = int(data.minWord.value) + except Exception: + minNum = 3 + data.stopCandidates = get_small_words(tempC, minNum) + temp.update(data.stopCandidates) temp.update(data.lowCandidates) temp.update(data.highCandidates) tempC.in_place_stoplist(temp) @@ -1107,21 +1179,23 @@ def main(args): # Apply custom stopwords file if args.stopword_file: - with open(args.stopword_file, encoding='utf8') as swf: - #candidates = [unidecode(word.strip()) for word in swf] - candidates = [word.strip() for word in swf] + data.fileName = args.stopword_file + data.stopwordFile.text = "Current stopworded file: " + args.stopword_file + # with open(args.stopword_file, encoding='utf8') as swf: + # candidates = [unidecode(word.strip()) for word in swf] + # data.fileCandidates = [word.strip() for word in swf] - if len(candidates): - print("Applying custom stopword file to remove {} word{}.".format( - len(candidates), 's' if len(candidates) > 1 else '')) - data.stoplist.update(candidates) + # if len(data.fileCandidates): + # print("Applying custom stopword file to remove {} word{}.".format( + # len(data.fileCandidates), 's' if len(data.fileCandidates) > 1 else '')) + # data.stoplist.update(candidates) if args.min_word_len: candidates = get_small_words(data.c, args.min_word_len) if len(candidates): print("Filtering {} small word{} with less than {} characters.".format( len(candidates), 's' if len(candidates) > 1 else '', args.min_word_len)) - data.stoplist.update(candidates) + # data.stoplist.update(candidates) # cache item counts data.items, data.counts = get_corpus_counts(data.c) @@ -1179,6 +1253,8 @@ def gui(screen, scene): data.stoplist.update(data.highCandidates) data.stoplist.update(data.lowCandidates) data.stoplist.update(data.stopCandidates) + data.stoplist.update(data.englishCandidates) + data.stoplist.update(data.fileCandidates) if not data.stoplist: print("No stopwords applied.\n\n") From d3964bc77ed4614ea65e0d584aef964a6aa45aa7 Mon Sep 17 00:00:00 2001 From: Kirtan Sakariya Date: Wed, 28 Nov 2018 14:37:49 -0500 Subject: [PATCH 10/21] adding comments --- topicexplorer/prep.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py index bcb6dedb..f5ad19f7 100644 --- a/topicexplorer/prep.py +++ b/topicexplorer/prep.py @@ -580,11 +580,13 @@ def _prep(self): data.fileCandidates = [] data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=high) + # Checks to see if the value entered with filter the whole corpus out if not valid: self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"])) return data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=low) + # Checks to see if the value entered with filter the whole corpus out if not valid: self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"])) return @@ -641,6 +643,7 @@ def _high(self): data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=high) + # Checks to see if the value entered with filter the whole corpus out if not valid: self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"])) return @@ -692,6 +695,7 @@ def _low(self): data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=low) + # Checks to see if the value entered with filter the whole corpus out if not valid: self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"])) return @@ -772,6 +776,7 @@ def _ok(self): data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=high) + # Checks to see if the value entered with filter the whole corpus out if not valid: self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"])) return @@ -798,7 +803,7 @@ def _popup(selection): confirm() return high = validate(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False) - data.highCandidates, filtered, value = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=high) temp = deepcopy(data.stoplist) temp.update(data.highCandidates) @@ -825,6 +830,7 @@ def _change(self): data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=high) + # Checks to see if the value entered with filter the whole corpus out if not valid: self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"])) return @@ -894,6 +900,7 @@ def _ok(self): data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=low) + # Checks to see if the value entered with filter the whole corpus out if not valid: self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"])) return @@ -948,6 +955,7 @@ def _change(self): data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=low) + # Checks to see if the value entered with filter the whole corpus out if not valid: self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"])) return @@ -1019,6 +1027,7 @@ def popup(self): else: data.stopwordFile.text = "Current stopworded file: " + self._list.value data.fileName = self._list.value + updatePreppedLength() raise NextScene("Summary") # self._scene.add_effect(PopUpDialog(self._screen, "You selected: {}".format(self._list.value), ["OK"])) @@ -1040,10 +1049,12 @@ def process_event(self, event): global data if isinstance(event, KeyboardEvent): if event.key_code in [ord('q'), ord('Q'), Screen.ctrl("c")]: + updatePreppedLength() raise NextScene("Summary") elif event.key_code in [ord('c'), ord('C')]: data.stopwordFile.text = "Current stopword file: " data.fileName = "" + updatePreppedLength() raise NextScene("Summary") # Now pass on to lower levels for normal handling of the event. @@ -1085,7 +1096,6 @@ def updatePreppedLength(): if data.fileName != "": with open(data.fileName, encoding='utf8') as swf: data.fileCandidates = [word.strip() for word in swf] - if len(data.fileCandidates): print("Applying custom stopword file to remove {} word{}.".format( len(data.fileCandidates), 's' if len(data.fileCandidates) > 1 else '')) From 3c7f4937affba0e54e8a8d3cfda96771c4987851 Mon Sep 17 00:00:00 2001 From: Kirtan Sakariya Date: Mon, 3 Dec 2018 18:34:43 -0500 Subject: [PATCH 11/21] file preview, rearranging wizard screens --- topicexplorer/prep.py | 267 +++++++++++++++++++++++++----------------- 1 file changed, 162 insertions(+), 105 deletions(-) diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py index f5ad19f7..53239797 100644 --- a/topicexplorer/prep.py +++ b/topicexplorer/prep.py @@ -300,26 +300,26 @@ def get_high_filter_chart(c, words=None, items=None, counts=None, num=None): bins = sorted(set(bins)) bins.append(max(counts)) - ret = "" + chart = "" bin_counts, bins = np.histogram(counts, bins=bins) - ret += "{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Top', '% of corpus', "# words", "Rate") + "\n" + chart += "{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Top', '% of corpus', "# words", "Rate") + "\n" last_row = 0 for bin, count in zip(bins[-2::-1], np.cumsum(bin_counts[::-1])): filtered_counts = counts[get_mask(c, words)] if (filtered_counts >= bin).sum() > last_row: percentage = 1. - (old_div(counts[counts < bin].sum(), float(c.original_length))) - ret += "{0:>5.0f}x".format(bin).rjust(8) - ret += '{0:2.1f}% '.format(percentage * 100).rjust(10) - ret += (u'\u2588' * int(percentage * 36)).ljust(36) - ret += "{0:0.0f} words".format((filtered_counts >= bin).sum()).rjust(15) - ret += " >={0:>5.0f}x".format(bin).ljust(8) + "\n" + chart += "{0:>5.0f}x".format(bin).rjust(8) + chart += '{0:2.1f}% '.format(percentage * 100).rjust(10) + chart += (u'\u2588' * int(percentage * 36)).ljust(36) + chart += "{0:0.0f} words".format((filtered_counts >= bin).sum()).rjust(15) + chart += " >={0:>5.0f}x".format(bin).ljust(8) + "\n" last_row = (filtered_counts >= bin).sum() - ret += (' ' * 18) + "{} total occurrences".format(counts.sum()).ljust(37) - ret += '{} words total'.format(get_mask(c, words).sum()).rjust(20) + '\n' - return ret + chart += (' ' * 18) + "{} total occurrences".format(counts.sum()).ljust(34) + chart += '{} words total'.format(get_mask(c, words).sum()).rjust(20) + return chart def get_high_filter_stops(c, words=None, items=None, counts=None, num=None): import numpy as np @@ -358,27 +358,27 @@ def get_low_filter_chart(c, words=None, items=None, counts=None, num=None): bins = sorted(set(bins)) bins.append(max(counts)) - ret = "" + chart = "" bin_counts, bins = np.histogram(counts[counts.argsort()[::-1]], bins=bins) - ret += "{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Bottom', '% of corpus', "# words", "Rate") + "\n" + chart += "{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Bottom', '% of corpus', "# words", "Rate") + "\n" last_row = 0 for bin, count in zip(bins, np.cumsum(bin_counts)): filtered_counts = counts[get_mask(c, words)] if last_row < (filtered_counts < bin).sum() <= len(filtered_counts): percentage = (old_div(counts[counts <= bin].sum(), float(c.original_length))) - ret += "{0:>5.0f}x".format(bin).rjust(8) - ret += '{0:2.1f}%'.format(percentage * 100).rjust(9) - ret += " " + (u'\u2588' * int(percentage * 36)).ljust(36) - ret += "{0:0.0f} words".format((filtered_counts <= bin).sum()).rjust(15) - ret += " <={0:>5.0f}x".format(bin).ljust(8) + "\n" + chart += "{0:>5.0f}x".format(bin).rjust(8) + chart += '{0:2.1f}%'.format(percentage * 100).rjust(9) + chart += " " + (u'\u2588' * int(percentage * 36)).ljust(36) + chart += "{0:0.0f} words".format((filtered_counts <= bin).sum()).rjust(15) + chart += " <={0:>5.0f}x".format(bin).ljust(8) + "\n" if (filtered_counts < bin).sum() == len(filtered_counts): break last_row = (filtered_counts >= bin).sum() - ret += (' ' * 18) + "{} total occurrences".format(counts.sum()).ljust(37) - ret += '{} words total'.format(get_mask(c, words).sum()).rjust(20) + '\n' - return ret + chart += (' ' * 18) + "{} total occurrences".format(counts.sum()).ljust(34) + chart += '{} words total'.format(get_mask(c, words).sum()).rjust(20) + return chart def get_low_filter_stops(c, words=None, items=None, counts=None, num=None): import numpy as np @@ -413,24 +413,19 @@ def __init__(self): self.stoplist = set() self.label = Label("change this") self.summaryHigh = Text(label=" Words:", name="summaryHighFreq", max_length=5) - # self.summaryHighText = Label("Words:", align=">") - # self.summaryHigh = Text(label="") self.summaryHighPercent = Text("Percent:", "summaryHighPercent", max_length=5) - # self.summaryHighPercentText = Label("Percent:", align=">") - # self.summaryHighPercent = Text(label="") - self.high = Text("High frequency word filter (#):", "highFreq", max_length=5) - self.highPercent = Text("High ferquency word filter (%):", "highPercent", max_length=5) - self.highLabel = Label("high label", height=58) + self.high = Text(" Words:", "highFreq", max_length=5) + self.highPercent = Text("Percent:", "highPercent", max_length=5) + self.highChart = Label("high label", align="^") + self.highStop = Label("high stop", align="^") + self.highStop.text = "hello" self.highCandidates = [] self.summaryLow = Text(" Words:", "summaryLowFreq", max_length=5) - # self.summaryLowText = Label("Words:", align=">") - # self.summaryLow = Text(label="") self.summaryLowPercent = Text("Percent:", "summaryLowPercent", max_length=5) - # self.summaryLowPercentText = Label("Percent:", align=">") - # self.summaryLowPercent = Text(label="") - self.low = Text("Low frequency word filter (#):", "lowFreq", max_length=5) - self.lowPercent = Text("Low frequency word filter (%):", "lowPercent", max_length=5) - self.lowLabel = Label("low label", height=58) + self.low = Text(" Words:", "lowFreq", max_length=5) + self.lowPercent = Text("Percent:", "lowPercent", max_length=5) + self.lowChart = Label("low label", align="^") + self.lowStop = Label("low stop", align="^") self.lowCandidates = [] self.minWord = Text("Minimum word length:", "length", max_length=5) self.counter = 0 @@ -462,14 +457,8 @@ def __init__(self, screen): highOptions = Layout([7, 2, 6]) self.add_layout(highOptions) - # highOptions.add_widget(data.summaryHighText, 0) - # highOptions.add_widget(data.summaryHigh, 1) highOptions.add_widget(data.summaryHigh, 1) - # highOptions.add_widget(Label(""), 2) - # highOptions.add_widget(data.summaryHighPercentText, 0) - # highOptions.add_widget(data.summaryHighPercent, 1) highOptions.add_widget(data.summaryHighPercent, 1) - # highOptions.add_widget(Label(""), 2) highButton = Layout([1]) self.add_layout(highButton) @@ -484,14 +473,8 @@ def __init__(self, screen): lowOptions = Layout([7, 2, 6]) self.add_layout(lowOptions) - # lowOptions.add_widget(data.summaryLowText, 0) - # lowOptions.add_widget(data.summaryLow, 1) lowOptions.add_widget(data.summaryLow, 1) - # lowOptions.add_widget(Label(""), 2) - # lowOptions.add_widget(data.summaryLowPercentText, 0) - # lowOptions.add_widget(data.summaryLowPercent, 1) lowOptions.add_widget(data.summaryLowPercent, 1) - # lowOptions.add_widget(Label(""), 2) lowButton = Layout([1]) self.add_layout(lowButton) @@ -519,24 +502,18 @@ def __init__(self, screen): stopwordFileLayout.add_widget(Button("Select new file", self._chooseFile), 0) stopwordFileLayout.add_widget(Divider(height=2, line_char="-"), 0) stopwordFileLayout.add_widget(Divider(height=1, line_char=" "), 0) - # layout.add_widget(Label("Original corpus unique words: " + str(data.c.original_length), align="^")) - # layout.add_widget(data.prepSize) corpusLenLayout = Layout([1]) self.add_layout(corpusLenLayout) corpusLenLayout.add_widget(Label("Corpus Length\n-------------", align="^", height=2), 0) - corpusLenLayout.add_widget(Label("Original corpus unique works: " + str(data.c.original_length), align="^")) + corpusLenLayout.add_widget(Label("Original corpus length: " + str(data.c.original_length), align="^")) corpusLenLayout.add_widget(data.prepSize) corpusLenLayout.add_widget(Divider(height=1, line_char=" "), 0) layout2 = Layout([1, 1]) self.add_layout(layout2) layout2.add_widget(Button("prep", self._prep), 0) - # layout2.add_widget(Divider(height=1, line_char="-"), 4) - # layout2.add_widget(Button("high", self._high), 1) - # layout2.add_widget(Button("low", self._low), 2) layout2.add_widget(Button("exit", self._exit), 1) - # layout2.add_widget(Divider(height=1, line_char="-"), 5) self.fix() # Preps the corpus @@ -650,9 +627,14 @@ def _high(self): temp = deepcopy(data.stoplist) temp.update(data.highCandidates) - data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + chart = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, num=high) - data.highLabel.text += filtered + (columns, line) = os.get_terminal_size() + data.highChart.text = chart + data.highChart._required_height = chart.count('\n') + 1 + data.highStop.text = filtered + data.highStop._required_height = line - data.highChart._required_height - 5 + data.highFreqScene.fix() raise NextScene("High Freq") # Handle button clicks for high popup @@ -673,9 +655,14 @@ def _popupHigh(selection): num=high) temp = deepcopy(data.stoplist) temp.update(data.highCandidates) - data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + chart = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, num=high) - data.highLabel.text += filtered + (columns, line) = os.get_terminal_size() + data.highChart.text = chart + data.highChart._required_height = chart.count('\n') + 1 + data.highStop.text = filtered + data.highStop._required_height = line - data.highChart._required_height - 5 + data.highFreqScene.fix() raise NextScene("High Freq") # Handle button click of the button on the Summary scene @@ -699,12 +686,16 @@ def _low(self): if not valid: self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"])) return - + (columns, line) = os.get_terminal_size() temp = deepcopy(data.stoplist) temp.update(data.lowCandidates) - data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + chart = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, num=low) - data.lowLabel.text += filtered + data.lowChart.text = chart + data.lowChart._required_height = chart.count('\n') + 1 + data.lowStop.text = filtered + data.lowStop._required_height = line - data.lowChart._required_height - 5 + data.lowFreqScene.fix() raise NextScene("Low Freq") # Handle button clicks for low popup @@ -725,9 +716,14 @@ def _popupLow(selection): num=low) temp = deepcopy(data.stoplist) temp.update(data.lowCandidates) - data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + chart = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, num=low) - data.lowLabel.text += filtered + (columns, line) = os.get_terminal_size() + data.lowChart.text = chart + data.lowChart._required_height = chart.count('\n') + 1 + data.lowStop.text = filtered + data.lowStop._required_height = line - data.lowChart._required_height - 5 + data.lowFreqScene.fix() raise NextScene("Low Freq") def _chooseFile(self): @@ -747,12 +743,18 @@ def __init__(self, screen): title="High Frequency Word Filter", reduce_cpu=True) global data + data.highFreqScene = self - layout = Layout([100], fill_frame=True) - self.add_layout(layout) - layout.add_widget(data.highLabel) - layout.add_widget(data.high) - layout.add_widget(data.highPercent) + chartLayout = Layout([1]) + self.add_layout(chartLayout) + chartLayout.add_widget(data.highChart, 0) + fieldsLayout = Layout([7, 2, 6]) + self.add_layout(fieldsLayout) + fieldsLayout.add_widget(data.high, 1) + fieldsLayout.add_widget(data.highPercent, 1) + stopLayout = Layout([1]) + self.add_layout(stopLayout) + stopLayout.add_widget(data.highStop) layout2 = Layout([1, 1]) self.add_layout(layout2) layout2.add_widget(Button("Ok", self._ok), 0) @@ -780,12 +782,16 @@ def _ok(self): if not valid: self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"])) return - + (columns, line) = os.get_terminal_size() temp = deepcopy(data.stoplist) temp.update(data.highCandidates) - data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + chart = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, num=high) - data.highLabel.text += filtered + data.highChart.text = chart + data.highChart._required_height = chart.count('\n') + 1 + data.highStop.text = filtered + data.highStop._required_height = line - data.highChart._required_height - 5 + data.highFreqScene.fix() updatePreppedLength() raise NextScene("Summary") @@ -805,12 +811,16 @@ def _popup(selection): high = validate(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False) data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=high) + (columns, line) = os.get_terminal_size() temp = deepcopy(data.stoplist) temp.update(data.highCandidates) - data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + chart = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, num=high) - data.highLabel.text += filtered - + data.highChart.text = chart + data.highChart._required_height = chart.count('\n') + 1 + data.highStop.text = filtered + data.highStop._required_height = line - data.highChart._required_height - 5 + data.highFreqScene.fix() raise NextScene("Summary") # Handle button click for Update @@ -835,11 +845,16 @@ def _change(self): self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"])) return + (columns, line) = os.get_terminal_size() temp = deepcopy(data.stoplist) temp.update(data.highCandidates) - data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + chart = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, num=high) - data.highLabel.text += filtered + data.highChart.text = chart + data.highChart._required_height = chart.count('\n') + 1 + data.highStop.text = filtered + data.highStop._required_height = line - data.highChart._required_height - 5 + data.highFreqScene.fix() # Handle button click for popup after clicking change @staticmethod @@ -857,11 +872,16 @@ def _popupChange(selection): high = validate(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False) data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=high) + (columns, line) = os.get_terminal_size() temp = deepcopy(data.stoplist) temp.update(data.highCandidates) - data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + chart = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, num=high) - data.highLabel.text += filtered + data.highChart.text = chart + data.highChart._required_height = chart.count('\n') + 1 + data.highStop.text = filtered + data.highStop._required_height = line - data.highChart._required_height - 5 + data.highFreqScene.fix() # Low frequency scene class LowFreq(Frame): @@ -871,12 +891,18 @@ def __init__(self, screen): title="Low Frequency Word Filter", reduce_cpu=True) global data + data.lowFreqScene = self - layout = Layout([100], fill_frame=True) - self.add_layout(layout) - layout.add_widget(data.lowLabel) - layout.add_widget(data.low) - layout.add_widget(data.lowPercent) + chartLayout = Layout([1]) + self.add_layout(chartLayout) + chartLayout.add_widget(data.lowChart, 0) + fieldsLayout = Layout([7, 2, 6]) + self.add_layout(fieldsLayout) + fieldsLayout.add_widget(data.low, 1) + fieldsLayout.add_widget(data.lowPercent, 1) + stopLayout = Layout([1]) + self.add_layout(stopLayout) + stopLayout.add_widget(data.lowStop) layout2 = Layout([1, 1]) self.add_layout(layout2) layout2.add_widget(Button("Ok", self._ok), 0) @@ -904,12 +930,16 @@ def _ok(self): if not valid: self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"])) return - + (columns, line) = os.get_terminal_size() temp = deepcopy(data.stoplist) temp.update(data.lowCandidates) - data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + chart = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, num=low) - data.lowLabel.text += filtered + data.lowChart.text = chart + data.lowChart._required_height = chart.count('\n') + 1 + data.lowStop.text = filtered + data.lowStop._required_height = line - data.lowChart._required_height - 5 + data.lowFreqScene.fix() updatePreppedLength() raise NextScene("Summary") @@ -930,12 +960,16 @@ def _popup(selection): data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=low) + (columns, line) = os.get_terminal_size() temp = deepcopy(data.stoplist) temp.update(data.lowCandidates) - data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + (chart, text) = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, num=low) - data.lowLabel.text += filtered - + data.lowChart.text = chart + data.lowChart._required_height = chart.count('\n') + 1 + data.lowStop.text = filtered + data.lowStop._required_height = line - data.lowChart._required_height - 5 + data.lowFreqScene.fix() raise NextScene("Summary") # Handle button click for Update @@ -959,13 +993,16 @@ def _change(self): if not valid: self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"])) return - + (columns, line) = os.get_terminal_size() temp = deepcopy(data.stoplist) temp.update(data.lowCandidates) - data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + chart = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, num=low) - data.lowLabel.text += filtered - + data.lowChart.text = chart + data.lowChart._required_height = chart.count('\n') + 1 + data.lowStop.text = filtered + data.lowStop._required_height = line - data.lowChart._required_height - 5 + data.lowFreqScene.fix() # Handle button click for popup after clicking change @staticmethod @@ -983,11 +1020,16 @@ def _popupChange(selection): low = validate(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True) data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=low) + (columns, line) = os.get_terminal_size() temp = deepcopy(data.stoplist) temp.update(data.lowCandidates) - data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, + chart = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts, num=low) - data.lowLabel.text += filtered + data.lowChart.text = chart + data.lowChart._required_height = chart.count('\n') + 1 + data.lowStop.text = filtered + data.lowStop._required_height = line - data.lowChart._required_height - 5 + data.lowFreqScene.fix() # Taken from: https://github.com/peterbrittain/asciimatics/blob/master/samples/treeview.py class Files(Frame): @@ -1025,11 +1067,20 @@ def popup(self): if not self._list.value.endswith(".txt") and "." in self._list.value: self._scene.add_effect(PopUpDialog(self._screen, "Please pick a valid file (a .txt file or a file with no extension)", ["OK"])) else: - data.stopwordFile.text = "Current stopworded file: " + self._list.value - data.fileName = self._list.value + global data + f = open(self._list.value, "r") + text = f.read() + data.tempFileName = self._list.value + self._scene.add_effect(PopUpDialog(self._screen, "Use the selected file with the following text?\n" + text, ["Yes", "No"], on_close=self.handlePopup)) + + @staticmethod + def handlePopup(selection): + if str(selection) == "0": + global data + data.stopwordFile.text = "Current stopworded file: " + data.tempFileName + data.fileName = data.tempFileName updatePreppedLength() raise NextScene("Summary") - # self._scene.add_effect(PopUpDialog(self._screen, "You selected: {}".format(self._list.value), ["OK"])) def details(self): # If python magic is installed, provide a little more detail of the current file. @@ -1111,7 +1162,7 @@ def updatePreppedLength(): temp.update(data.lowCandidates) temp.update(data.highCandidates) tempC.in_place_stoplist(temp) - data.prepSize.text = str("Prepared corpus unique words: " + str(len(tempC))) + data.prepSize.text = str("Prepared corpus length: " + str(len(tempC))) # Highlight the necessary fields def confirm(): @@ -1203,8 +1254,10 @@ def main(args): if args.min_word_len: candidates = get_small_words(data.c, args.min_word_len) if len(candidates): - print("Filtering {} small word{} with less than {} characters.".format( - len(candidates), 's' if len(candidates) > 1 else '', args.min_word_len)) + data.lowCandidates = candidates + data.minWord._value = args.min_word_len + # print("Filtering {} small word{} with less than {} characters.".format( + # len(candidates), 's' if len(candidates) > 1 else '', args.min_word_len)) # data.stoplist.update(candidates) # cache item counts @@ -1215,13 +1268,14 @@ def main(args): candidates = get_candidate_words(data.c, args.high_filter, sort=False, items=data.items, counts=data.counts) if len(candidates): data.highCandidates = candidates - data.highLabel._value = args.high_filter + data.summaryHigh._value = args.high_filter elif args.high_percent: args.high_filter = get_closest_bin(data.c, 1 - (args.high_percent / 100.), counts=data.counts) print(args.high_filter) candidates = get_candidate_words(data.c, args.high_filter, sort=False, items=data.items, counts=data.counts) if len(candidates): - data.stoplist.update(candidates) + data.highCandidates = candidates + data.summaryHighPercent._value = args.high_percent if args.low_filter is None and args.low_percent is None and args.quiet: @@ -1230,13 +1284,14 @@ def main(args): candidates = get_candidate_words(data.c, -1 * args.low_filter, sort=False, items=data.items, counts=data.counts) if len(candidates): data.lowCandidates = candidates - data.lowLabel._value = args.low_filter + data.summaryLow._value = args.low_filter elif args.low_percent: args.low_filter = get_closest_bin(data.c, 1 - (args.low_percent / 100.), reverse=True, counts=data.counts) print(args.low_filter) candidates = get_candidate_words(data.c, -1 * args.low_filter, sort=False, items=data.items, counts=data.counts) if len(candidates): - data.stoplist.update(candidates) + data.lowCandidates = candidates + data.summaryLowPercent._value = args.low_percent def gui(screen, scene): scenes = [ @@ -1249,7 +1304,9 @@ def gui(screen, scene): data.wholeScreen = screen screen.play(scenes, stop_on_resize=True, start_scene=scene) - data.prepSize.text = str("Prepared corpus unique words: " + str(len(data.c))) + data.prepSize.text = str("Prepared corpus length: " + str(len(data.c))) + + updatePreppedLength() last_scene = None while True: From 6af97af3b91219777a374309cdff1550b2a7cec0 Mon Sep 17 00:00:00 2001 From: Kirtan Sakariya Date: Mon, 3 Dec 2018 19:36:33 -0500 Subject: [PATCH 12/21] adding asciimatics to the requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index a93e3805..884dcfd6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +-e git+https://github.com/peterbrittain/asciimatics.git@fcedb4947933de7e1507ec0dee8ca7a3f466928a#egg=asciimatics bottle>=0.12.0 brewer2mpl>=1.4.0,<1.5.0 decorator>=4.0.5 From 2ffed42a8af3142b783c9a885aecde1b959c544c Mon Sep 17 00:00:00 2001 From: Kirtan Sakariya Date: Mon, 3 Dec 2018 22:08:57 -0500 Subject: [PATCH 13/21] demo works now --- topicexplorer/prep.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py index 53239797..382b6377 100644 --- a/topicexplorer/prep.py +++ b/topicexplorer/prep.py @@ -339,8 +339,6 @@ def get_high_filter_stops(c, words=None, items=None, counts=None, num=None): if len(candidates) == len(c.words): valid = False - # filtered += "\n\nChoice of" + str(input_filter) + "will remove ALL words from the corpus." - # filtered += "Please choose a different filter." except ValueError: input_filter = 0 @@ -1309,7 +1307,7 @@ def gui(screen, scene): updatePreppedLength() last_scene = None - while True: + while not args.quiet: try: Screen.wrapper(gui, catch_interrupt=True, arguments=[last_scene]) break From bb3867194cd72e8f4b61da679e037a3aa5723ff4 Mon Sep 17 00:00:00 2001 From: Kirtan Sakariya Date: Tue, 4 Dec 2018 09:54:45 -0500 Subject: [PATCH 14/21] working on tests --- tests/test_prep.py | 13 +++++++++++-- topicexplorer/prep.py | 17 ++++++++++++++++- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/tests/test_prep.py b/tests/test_prep.py index dcb2a1f3..a14653f3 100644 --- a/tests/test_prep.py +++ b/tests/test_prep.py @@ -20,6 +20,7 @@ dtype=[('idx', ' Date: Tue, 4 Dec 2018 15:24:03 -0500 Subject: [PATCH 15/21] modifying test cases --- tests/test_prep.py | 91 ++++++++++++++++++++----------------------- topicexplorer/prep.py | 2 +- 2 files changed, 44 insertions(+), 49 deletions(-) diff --git a/tests/test_prep.py b/tests/test_prep.py index a14653f3..22edf65f 100644 --- a/tests/test_prep.py +++ b/tests/test_prep.py @@ -20,7 +20,6 @@ dtype=[('idx', ' Date: Wed, 5 Dec 2018 14:34:38 -0500 Subject: [PATCH 16/21] adding asciimatics install to windows build --- appveyor.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/appveyor.yml b/appveyor.yml index 2e78203b..de8d7fb9 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -17,6 +17,7 @@ install: - "%PYTHON%\\python.exe -m conda install -q --yes cython scikit-learn pandas" # for vsm - "%PYTHON%\\python.exe -c \"import nltk; nltk.download('stopwords'); nltk.download('punkt')\"" - "%PYTHON%\\python.exe -m pip install unittest2 nose wget" + - "%PYTHON%\\python.exe -m pip install -e git+https://github.com/peterbrittain/asciimatics.git@fcedb4947933de7e1507ec0dee8ca7a3f466928a#egg=asciimatics" - "%PYTHON%\\python.exe -m pip install ." build: off From 6d43236ec7c9e6325c6693aba67c4bc768ced144 Mon Sep 17 00:00:00 2001 From: Kirtan Sakariya Date: Thu, 6 Dec 2018 12:25:34 -0500 Subject: [PATCH 17/21] adding dividers for wizard screens --- topicexplorer/prep.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py index 8a5d34e9..88449016 100644 --- a/topicexplorer/prep.py +++ b/topicexplorer/prep.py @@ -646,7 +646,7 @@ def _high(self): data.highChart.text = chart data.highChart._required_height = chart.count('\n') + 1 data.highStop.text = filtered - data.highStop._required_height = line - data.highChart._required_height - 5 + data.highStop._required_height = line - data.highChart._required_height - 7 data.highFreqScene.fix() raise NextScene("High Freq") @@ -674,7 +674,7 @@ def _popupHigh(selection): data.highChart.text = chart data.highChart._required_height = chart.count('\n') + 1 data.highStop.text = filtered - data.highStop._required_height = line - data.highChart._required_height - 5 + data.highStop._required_height = line - data.highChart._required_height - 7 data.highFreqScene.fix() raise NextScene("High Freq") @@ -707,7 +707,7 @@ def _low(self): data.lowChart.text = chart data.lowChart._required_height = chart.count('\n') + 1 data.lowStop.text = filtered - data.lowStop._required_height = line - data.lowChart._required_height - 5 + data.lowStop._required_height = line - data.lowChart._required_height - 7 data.lowFreqScene.fix() raise NextScene("Low Freq") @@ -735,7 +735,7 @@ def _popupLow(selection): data.lowChart.text = chart data.lowChart._required_height = chart.count('\n') + 1 data.lowStop.text = filtered - data.lowStop._required_height = line - data.lowChart._required_height - 5 + data.lowStop._required_height = line - data.lowChart._required_height - 7 data.lowFreqScene.fix() raise NextScene("Low Freq") @@ -761,12 +761,14 @@ def __init__(self, screen): chartLayout = Layout([1]) self.add_layout(chartLayout) chartLayout.add_widget(data.highChart, 0) + chartLayout.add_widget(Divider()) fieldsLayout = Layout([7, 2, 6]) self.add_layout(fieldsLayout) fieldsLayout.add_widget(data.high, 1) fieldsLayout.add_widget(data.highPercent, 1) stopLayout = Layout([1]) self.add_layout(stopLayout) + stopLayout.add_widget(Divider()) stopLayout.add_widget(data.highStop) layout2 = Layout([1, 1]) self.add_layout(layout2) @@ -803,7 +805,7 @@ def _ok(self): data.highChart.text = chart data.highChart._required_height = chart.count('\n') + 1 data.highStop.text = filtered - data.highStop._required_height = line - data.highChart._required_height - 5 + data.highStop._required_height = line - data.highChart._required_height - 7 data.highFreqScene.fix() updatePreppedLength() raise NextScene("Summary") @@ -832,7 +834,7 @@ def _popup(selection): data.highChart.text = chart data.highChart._required_height = chart.count('\n') + 1 data.highStop.text = filtered - data.highStop._required_height = line - data.highChart._required_height - 5 + data.highStop._required_height = line - data.highChart._required_height - 7 data.highFreqScene.fix() raise NextScene("Summary") @@ -866,7 +868,7 @@ def _change(self): data.highChart.text = chart data.highChart._required_height = chart.count('\n') + 1 data.highStop.text = filtered - data.highStop._required_height = line - data.highChart._required_height - 5 + data.highStop._required_height = line - data.highChart._required_height - 7 data.highFreqScene.fix() # Handle button click for popup after clicking change @@ -893,7 +895,7 @@ def _popupChange(selection): data.highChart.text = chart data.highChart._required_height = chart.count('\n') + 1 data.highStop.text = filtered - data.highStop._required_height = line - data.highChart._required_height - 5 + data.highStop._required_height = line - data.highChart._required_height - 7 data.highFreqScene.fix() # Low frequency scene @@ -909,12 +911,14 @@ def __init__(self, screen): chartLayout = Layout([1]) self.add_layout(chartLayout) chartLayout.add_widget(data.lowChart, 0) + chartLayout.add_widget(Divider()) fieldsLayout = Layout([7, 2, 6]) self.add_layout(fieldsLayout) fieldsLayout.add_widget(data.low, 1) fieldsLayout.add_widget(data.lowPercent, 1) stopLayout = Layout([1]) self.add_layout(stopLayout) + stopLayout.add_widget(Divider()) stopLayout.add_widget(data.lowStop) layout2 = Layout([1, 1]) self.add_layout(layout2) @@ -951,7 +955,7 @@ def _ok(self): data.lowChart.text = chart data.lowChart._required_height = chart.count('\n') + 1 data.lowStop.text = filtered - data.lowStop._required_height = line - data.lowChart._required_height - 5 + data.lowStop._required_height = line - data.lowChart._required_height - 7 data.lowFreqScene.fix() updatePreppedLength() raise NextScene("Summary") @@ -981,7 +985,7 @@ def _popup(selection): data.lowChart.text = chart data.lowChart._required_height = chart.count('\n') + 1 data.lowStop.text = filtered - data.lowStop._required_height = line - data.lowChart._required_height - 5 + data.lowStop._required_height = line - data.lowChart._required_height - 7 data.lowFreqScene.fix() raise NextScene("Summary") @@ -1014,7 +1018,7 @@ def _change(self): data.lowChart.text = chart data.lowChart._required_height = chart.count('\n') + 1 data.lowStop.text = filtered - data.lowStop._required_height = line - data.lowChart._required_height - 5 + data.lowStop._required_height = line - data.lowChart._required_height - 7 data.lowFreqScene.fix() # Handle button click for popup after clicking change @@ -1041,7 +1045,7 @@ def _popupChange(selection): data.lowChart.text = chart data.lowChart._required_height = chart.count('\n') + 1 data.lowStop.text = filtered - data.lowStop._required_height = line - data.lowChart._required_height - 5 + data.lowStop._required_height = line - data.lowChart._required_height - 7 data.lowFreqScene.fix() # Taken from: https://github.com/peterbrittain/asciimatics/blob/master/samples/treeview.py From 7b7ede8cbeb770e0d7185c9f38c200ae5d9b4a5f Mon Sep 17 00:00:00 2001 From: Kirtan Sakariya Date: Tue, 18 Dec 2018 16:28:26 -0500 Subject: [PATCH 18/21] raising ValueError now --- tests/test_prep.py | 39 +++++------ topicexplorer/prep.py | 156 ++++++++++++++++++------------------------ 2 files changed, 82 insertions(+), 113 deletions(-) diff --git a/tests/test_prep.py b/tests/test_prep.py index 22edf65f..da3ca61a 100644 --- a/tests/test_prep.py +++ b/tests/test_prep.py @@ -60,52 +60,47 @@ def test_get_candidate_words(): def test_get_high_filter(input_mock): # Test with high filter of 3 items, counts = topicexplorer.prep.get_corpus_counts(corpus) - candidates, filtered, valid = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=3) + candidates, filtered = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=3) assert len(corpus.words) - len(candidates) == 3 assert candidates == ['I'] - assert valid == True # Test with high filter of 0 - candidates, filtered, valid = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=0) + candidates, filtered = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=0) assert len(corpus.words) - len(candidates) == 4 assert candidates == [] - assert valid == True # Test with high filter of 1, should return invalid - candidates, filtered, valid = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=1) - assert len(corpus.words) - len(candidates) == 0 - assert candidates == ['I', 'came', 'conquered', 'saw'] - assert valid == False + with unittest.TestCase.assertRaises(unittest.TestCase, ValueError): + candidates, filtered = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=1) + assert len(corpus.words) - len(candidates) == 0 + assert candidates == ['I', 'came', 'conquered', 'saw'] # Test with high filter of 100 - candidates, filtered, valid = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=100) + candidates, filtered = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=100) assert len(corpus.words) - len(candidates) == 4 assert candidates == [] - assert valid == True @patch('topicexplorer.prep.input') def test_get_low_filter(input_mock): # Test with low filter of 1 items, counts = topicexplorer.prep.get_corpus_counts(corpus) - candidates, filtered, valid = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=1) + candidates, filtered = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=1) assert len(corpus.words) - len(candidates) == 1 assert all(w in candidates for w in ['came', 'saw', 'conquered']) - assert valid == True # Test with low filter of 3 - candidates, filtered, valid = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=3) - assert len(corpus.words) - len(candidates) == 0 - assert all(w in candidates for w in ['came', 'saw', 'conquered', 'I']) - assert valid == False + with unittest.TestCase.assertRaises(unittest.TestCase, ValueError): + candidates, filtered = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=3) + assert len(corpus.words) - len(candidates) == 0 + assert all(w in candidates for w in ['came', 'saw', 'conquered', 'I']) # Test with low filter of 0 - candidates, filtered, valid = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=0) + candidates, filtered = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=0) assert len(corpus.words) - len(candidates) == 4 assert all(w in candidates for w in []) - assert valid == True # Test with low filter of 100 - candidates, filtered, valid = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=100) - assert len(corpus.words) - len(candidates) == 0 - assert all(w in candidates for w in ['came', 'saw', 'conquered', 'I']) - assert valid == False + with unittest.TestCase.assertRaises(unittest.TestCase, ValueError): + candidates, filtered = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=100) + assert len(corpus.words) - len(candidates) == 0 + assert all(w in candidates for w in ['came', 'saw', 'conquered', 'I']) diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py index 88449016..e28c729f 100644 --- a/topicexplorer/prep.py +++ b/topicexplorer/prep.py @@ -324,40 +324,22 @@ def get_high_filter_chart(c, words=None, items=None, counts=None, num=None): def get_high_filter_stops(c, words=None, items=None, counts=None, num=None): import numpy as np input_filter = num - valid = True - f = open("test.txt", "w+") - f.write("c" + "\n") - f.write(str(c) + "\n") - f.write("c words" + "\n") - f.write(str(c.words) + "\n") - f.write("words" + "\n") - f.write(str(words) + "\n") - f.write("items" + "\n") - f.write(str(items) + "\n") - f.write("counts" + "\n") - f.write(str(counts) + "\n") - f.write("num" + "\n") - f.write(str(num) + "\n") - f.write("lengths" + "\n") - f.write(str(len(c.words)) + " " + str(len(items)) + " " + str(len(counts))) - try: - candidates = get_candidate_words(c, input_filter, words=words, items=items, counts=counts) - places = np.in1d(c.words, candidates) - places = dict(zip(candidates, np.where(places)[0])) - candidates = sorted(candidates, key=lambda x: counts[places[x]], reverse=True) - filtered_counts = counts[get_mask(c, words)] - filtered = "" - filtered += "Filter will remove " + str(filtered_counts[filtered_counts >= input_filter].sum()) - filtered += " occurrences " + "of these " + str(len(filtered_counts[filtered_counts >= input_filter])) + " words: " - filtered += u' '.join(candidates) + candidates = get_candidate_words(c, input_filter, words=words, items=items, counts=counts) + places = np.in1d(c.words, candidates) + places = dict(zip(candidates, np.where(places)[0])) + candidates = sorted(candidates, key=lambda x: counts[places[x]], reverse=True) + filtered_counts = counts[get_mask(c, words)] - if len(candidates) == len(c.words): - valid = False + filtered = "" + filtered += "Filter will remove " + str(filtered_counts[filtered_counts >= input_filter].sum()) + filtered += " occurrences " + "of these " + str(len(filtered_counts[filtered_counts >= input_filter])) + " words: " + filtered += u' '.join(candidates) - except ValueError: - input_filter = 0 - return (candidates, filtered, valid) + if len(candidates) == len(c.words): + raise ValueError + + return (candidates, filtered) def get_low_filter_chart(c, words=None, items=None, counts=None, num=None): @@ -396,29 +378,21 @@ def get_low_filter_chart(c, words=None, items=None, counts=None, num=None): def get_low_filter_stops(c, words=None, items=None, counts=None, num=None): import numpy as np input_filter = num - valid = True - try: - candidates = get_candidate_words(c, -input_filter, words=words, items=items, counts=counts) - places = np.in1d(c.words, candidates) - places = dict(zip(candidates, np.where(places)[0])) - candidates = sorted(candidates, key=lambda x: counts[places[x]]) - filtered_counts = counts[get_mask(c, words)] - - filtered = "" - filtered += "Filter will remove " + str(filtered_counts[filtered_counts <= input_filter].sum()) + " tokens" - filtered += "of these " + str(len(filtered_counts[filtered_counts <= input_filter])) + " words: " - filtered += u' '.join(candidates) - + candidates = get_candidate_words(c, -input_filter, words=words, items=items, counts=counts) + places = np.in1d(c.words, candidates) + places = dict(zip(candidates, np.where(places)[0])) + candidates = sorted(candidates, key=lambda x: counts[places[x]]) + filtered_counts = counts[get_mask(c, words)] - if len(candidates) == len(c.words): - valid = False - # filtered += "\n\nChoice of" + str(input_filter) + "will remove ALL words from the corpus." - # filtered += "Please choose a different filter." + filtered = "" + filtered += "Filter will remove " + str(filtered_counts[filtered_counts <= input_filter].sum()) + " tokens" + filtered += "of these " + str(len(filtered_counts[filtered_counts <= input_filter])) + " words: " + filtered += u' '.join(candidates) - except ValueError: - input_filter = 0 + if len(candidates) == len(c.words): + raise ValueError - return (candidates, filtered, valid) + return (candidates, filtered) # Stores all of the variables for the labels class PrepData(Frame): @@ -568,16 +542,16 @@ def _prep(self): len(data.fileCandidates), 's' if len(data.fileCandidates) > 1 else '')) else: data.fileCandidates = [] - data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, - num=high) - # Checks to see if the value entered with filter the whole corpus out - if not valid: + try: + data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + num=high) + except ValueError: self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"])) return - data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, - num=low) - # Checks to see if the value entered with filter the whole corpus out - if not valid: + try: + data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + num=low) + except ValueError: self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"])) return data.stopCandidates = get_small_words(data.c, minNum) @@ -631,10 +605,10 @@ def _high(self): self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1])) return - data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, - num=high) - # Checks to see if the value entered with filter the whole corpus out - if not valid: + try: + data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + num=high) + except ValueError: self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"])) return @@ -664,7 +638,7 @@ def _popupHigh(selection): confirm() return high = validate(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False) - data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=high) temp = deepcopy(data.stoplist) temp.update(data.highCandidates) @@ -693,10 +667,10 @@ def _low(self): self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1])) return - data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, - num=low) - # Checks to see if the value entered with filter the whole corpus out - if not valid: + try: + data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + num=low) + except ValueError: self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"])) return (columns, line) = os.get_terminal_size() @@ -725,7 +699,7 @@ def _popupLow(selection): confirm() return low = validate(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True) - data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=low) temp = deepcopy(data.stoplist) temp.update(data.lowCandidates) @@ -791,10 +765,10 @@ def _ok(self): self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1])) return - data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, - num=high) - # Checks to see if the value entered with filter the whole corpus out - if not valid: + try: + data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + num=high) + except ValueError: self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"])) return (columns, line) = os.get_terminal_size() @@ -824,7 +798,7 @@ def _popup(selection): confirm() return high = validate(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False) - data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=high) (columns, line) = os.get_terminal_size() temp = deepcopy(data.stoplist) @@ -853,10 +827,10 @@ def _change(self): self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1])) return - data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, - num=high) - # Checks to see if the value entered with filter the whole corpus out - if not valid: + try: + data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + num=high) + except ValueError: self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"])) return @@ -885,7 +859,7 @@ def _popupChange(selection): confirm() return high = validate(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False) - data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=high) (columns, line) = os.get_terminal_size() temp = deepcopy(data.stoplist) @@ -940,11 +914,11 @@ def _ok(self): else: self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1])) return - - data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, - num=low) - # Checks to see if the value entered with filter the whole corpus out - if not valid: + + try: + data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + num=low) + except ValueError: self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"])) return (columns, line) = os.get_terminal_size() @@ -975,7 +949,7 @@ def _popup(selection): return low = validate(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True) - data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=low) (columns, line) = os.get_terminal_size() temp = deepcopy(data.stoplist) @@ -1003,11 +977,11 @@ def _change(self): else: self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1])) return - - data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, - num=low) - # Checks to see if the value entered with filter the whole corpus out - if not valid: + + try: + data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + num=low) + except ValueError: self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"])) return (columns, line) = os.get_terminal_size() @@ -1035,7 +1009,7 @@ def _popupChange(selection): confirm() return low = validate(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True) - data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, + data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts, num=low) (columns, line) = os.get_terminal_size() temp = deepcopy(data.stoplist) From af695edae4364cfd4f8839db0566fbd4b7cea57d Mon Sep 17 00:00:00 2001 From: Kirtan Sakariya Date: Sun, 23 Dec 2018 14:14:30 -0600 Subject: [PATCH 19/21] using instance of unittest.TestCase --- tests/test_prep.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_prep.py b/tests/test_prep.py index da3ca61a..04aff21e 100644 --- a/tests/test_prep.py +++ b/tests/test_prep.py @@ -56,8 +56,7 @@ def test_get_candidate_words(): corpus, -low_freq, words=low_words) assert len(mask_words) == 0 -@patch('topicexplorer.prep.input') -def test_get_high_filter(input_mock): +def test_get_high_filter(): # Test with high filter of 3 items, counts = topicexplorer.prep.get_corpus_counts(corpus) candidates, filtered = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=3) @@ -68,9 +67,10 @@ def test_get_high_filter(input_mock): candidates, filtered = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=0) assert len(corpus.words) - len(candidates) == 4 assert candidates == [] - + # Test with high filter of 1, should return invalid - with unittest.TestCase.assertRaises(unittest.TestCase, ValueError): + t = unittest.TestCase('run') + with t.assertRaises(ValueError): candidates, filtered = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=1) assert len(corpus.words) - len(candidates) == 0 assert candidates == ['I', 'came', 'conquered', 'saw'] @@ -80,8 +80,7 @@ def test_get_high_filter(input_mock): assert len(corpus.words) - len(candidates) == 4 assert candidates == [] -@patch('topicexplorer.prep.input') -def test_get_low_filter(input_mock): +def test_get_low_filter(): # Test with low filter of 1 items, counts = topicexplorer.prep.get_corpus_counts(corpus) candidates, filtered = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=1) @@ -89,7 +88,8 @@ def test_get_low_filter(input_mock): assert all(w in candidates for w in ['came', 'saw', 'conquered']) # Test with low filter of 3 - with unittest.TestCase.assertRaises(unittest.TestCase, ValueError): + t = unittest.TestCase('run') + with t.assertRaises(ValueError): candidates, filtered = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=3) assert len(corpus.words) - len(candidates) == 0 assert all(w in candidates for w in ['came', 'saw', 'conquered', 'I']) @@ -100,7 +100,7 @@ def test_get_low_filter(input_mock): assert all(w in candidates for w in []) # Test with low filter of 100 - with unittest.TestCase.assertRaises(unittest.TestCase, ValueError): + with t.assertRaises(ValueError): candidates, filtered = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=100) assert len(corpus.words) - len(candidates) == 0 assert all(w in candidates for w in ['came', 'saw', 'conquered', 'I']) From 7751cfeda8b2c77f253bbc45ca914589e9c04acb Mon Sep 17 00:00:00 2001 From: Jaimie Murdock Date: Tue, 28 Apr 2020 22:57:23 -0600 Subject: [PATCH 20/21] updating asciimatics reqs --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 61fbf24e..a20d43d2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ --e git+https://github.com/peterbrittain/asciimatics.git@fcedb4947933de7e1507ec0dee8ca7a3f466928a#egg=asciimatics +asciimatics>=1.11.0 bottle>=0.12.0 brewer2mpl>=1.4.0,<1.5.0 decorator>=4.0.5 From d470a71aed3f1895a96c8696398099c51a92f2fc Mon Sep 17 00:00:00 2001 From: Kirtan Sakariya Date: Tue, 5 May 2020 16:17:58 -0400 Subject: [PATCH 21/21] interrupts enabled --- topicexplorer/prep.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py index 9af80eb2..283a3be8 100644 --- a/topicexplorer/prep.py +++ b/topicexplorer/prep.py @@ -1301,7 +1301,7 @@ def gui(screen, scene): last_scene = None while not args.quiet: try: - Screen.wrapper(gui, catch_interrupt=True, arguments=[last_scene]) + Screen.wrapper(gui, catch_interrupt=False, arguments=[last_scene]) break # sys.exit(0) except ResizeScreenError as e: