From de39576c075c4402b2ef0fb2ebc1ec08be629d68 Mon Sep 17 00:00:00 2001
From: Kirtan Sakariya <kirtansakariya@gmail.com>
Date: Wed, 3 Oct 2018 22:05:40 -0400
Subject: [PATCH 01/21] done with skeleton for asciimatics

---
 topicexplorer/prep.py | 708 +++++++++++++++++++++++++++++++-----------
 1 file changed, 525 insertions(+), 183 deletions(-)

diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py
index 26947c9a..c3430990 100644
--- a/topicexplorer/prep.py
+++ b/topicexplorer/prep.py
@@ -130,6 +130,13 @@
 import topicexplorer.config
 from topicexplorer.lib.util import isint, is_valid_configfile, bool_prompt
 
+from asciimatics.widgets import Frame, ListBox, Layout, Divider, Text, \
+    Button, TextBox, Widget, Label
+from asciimatics.scene import Scene
+from asciimatics.screen import Screen
+from asciimatics.exceptions import ResizeScreenError, NextScene, StopApplication
+from copy import deepcopy
+
 # NLTK Langauges
 langs = dict(da='danish', nl='dutch', en='english', fi='finnish', fr='french',
              de='german', hu='hungarian', it='italian', no='norwegian',
@@ -282,14 +289,14 @@ def get_closest_bin(c, thresh, reverse=False, counts=None):
         return counts[min(np.searchsorted(cumsum, thresh), len(counts)-1)]
 
 
-def get_high_filter(c, words=None, items=None, counts=None):
+def get_high_filter_chart(c, words=None, items=None, counts=None, num=None):
     import numpy as np
     header = "FILTER HIGH FREQUENCY WORDS"
     stars = old_div((80 - len(header) - 2), 2)
-    print("\n\n{0} {1} {0}".format('*' * stars, header))
-    print("    This will remove all words occurring N or more times.")
-    print("    The histogram below shows how many words will be removed")
-    print("    by selecting each maximum frequency threshold.\n")
+    # print("\n\n{0} {1} {0}".format('*' * stars, header))
+    # print("    This will remove all words occurring N or more times.")
+    # print("    The histogram below shows how many words will be removed")
+    # print("    by selecting each maximum frequency threshold.\n")
 
     # Get frequency bins
     if items is None or counts is None:
@@ -299,79 +306,118 @@ def get_high_filter(c, words=None, items=None, counts=None):
     bins = sorted(set(bins))
     bins.append(max(counts))
 
-    high_filter = False
-    while not high_filter:
-        bin_counts, bins = np.histogram(counts, bins=bins)
-        print("{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Top', '% of corpus',
-                                                                 "# words", "Rate"))
-        last_row = 0
-        for bin, count in zip(bins[-2::-1], np.cumsum(bin_counts[::-1])):
-            filtered_counts = counts[get_mask(c, words)]
-            if (filtered_counts >= bin).sum() > last_row:
-                percentage = 1. - (old_div(counts[counts < bin].sum(), float(c.original_length)))
-                print("{0:>5.0f}x".format(bin).rjust(8), end=' ')
-                print('{0:2.1f}%'.format(percentage * 100).rjust(8), end=' ')
-                print((u'\u2588' * int(percentage * 36)).ljust(36), end=' ')
-                print("  {0:0.0f} words".format((filtered_counts >= bin).sum()).rjust(14), end=' ')
-                print(">= {0:>5.0f}x".format(bin).ljust(8))
-
-            last_row = (filtered_counts >= bin).sum()
-
-        print(' ' * 17, "{} total occurrences".format(counts.sum()).ljust(36), end=' ')
-        print('{} words total'.format(get_mask(c, words).sum()).rjust(20))
-        print('')
+    try:
+        num = int(num)
+    except:
+        # TODO: show invalid num screen
+        num = "str"
+
+    ret = ""
+
+    # do input validation here
 
+    high_filter = False
+    # while not high_filter:
+    bin_counts, bins = np.histogram(counts, bins=bins)
+    # print("{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Top', '% of corpus',
+    #                                                          "# words", "Rate"))
+    ret += "{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Top', '% of corpus', "# words", "Rate") + "\n"
+    last_row = 0
+    for bin, count in zip(bins[-2::-1], np.cumsum(bin_counts[::-1])):
+        filtered_counts = counts[get_mask(c, words)]
+        if (filtered_counts >= bin).sum() > last_row:
+            percentage = 1. - (old_div(counts[counts < bin].sum(), float(c.original_length)))
+            # print("{0:>5.0f}x".format(bin).rjust(8), end=' ')
+            # print('{0:2.1f}%'.format(percentage * 100).rjust(8), end=' ')
+            # print((u'\u2588' * int(percentage * 36)).ljust(36), end=' ')
+            # print("  {0:0.0f} words".format((filtered_counts >= bin).sum()).rjust(14), end=' ')
+            # print(">= {0:>5.0f}x".format(bin).ljust(8))
+            ret += "{0:>5.0f}x".format(bin).rjust(8)
+            ret += '{0:2.1f}% '.format(percentage * 100).rjust(10)
+            ret += (u'\u2588' * int(percentage * 36)).ljust(36)
+            ret += "{0:0.0f} words".format((filtered_counts >= bin).sum()).rjust(15)
+            ret += " >={0:>5.0f}x".format(bin).ljust(8) + "\n"
+
+        last_row = (filtered_counts >= bin).sum()
+
+    # return ret
+    # print(' ' * 17, "{} total occurrences".format(counts.sum()).ljust(36), end=' ')
+    # print('{} words total'.format(get_mask(c, words).sum()).rjust(20))
+    # print('')
+    ret += (' ' * 18) + "{} total occurrences".format(counts.sum()).ljust(37)
+    ret += '{} words total'.format(get_mask(c, words).sum()).rjust(20) + '\n'
+    # ret += str(type(num)) + " " + str(num)
+    return ret
+
+def get_high_filter_stops(c, words=None, items=None, counts=None, num=None):
+    import numpy as np
+    try:
+        num = int(num)
+    except:
+        # TODO: show invalid num screen
+        num = "str"
+        return
+    input_filter = num
+    accept = None
+    # while not input_filter or input_filter <= 0:
+    try:
+        # if high_filter:
+        #     input_filter = high_filter
+        # else:
+        #     input_filter = int(input("Enter the maximum rate: ").replace('x', ''))
+        candidates = get_candidate_words(c, input_filter, words=words, items=items, counts=counts)
+        places = np.in1d(c.words, candidates)
+        places = dict(zip(candidates, np.where(places)[0]))
+        candidates = sorted(candidates, key=lambda x: counts[places[x]], reverse=True)
+        filtered_counts = counts[get_mask(c, words)]
+
+        # print("Filter will remove", filtered_counts[filtered_counts >= input_filter].sum(), end=' ')
+        # print("occurrences", "of these", len(filtered_counts[filtered_counts >= input_filter]), "words:")
+        # print(u' '.join(candidates))
+        filtered = ""
+        filtered += "Filter will remove " + str(filtered_counts[filtered_counts >= input_filter].sum())
+        filtered += " occurrences " + "of these " + str(len(filtered_counts[filtered_counts >= input_filter])) + " words: "
+        filtered += u' '.join(candidates)
+
+        # print("\nFilter will remove", filtered_counts[filtered_counts >= input_filter].sum(), end=' ')
+        # print("occurrences", "of these", len(filtered_counts[filtered_counts >= input_filter]), "words.", end=' ')
+
+        # filtered += "\nFilter will remove " + str(filtered_counts[filtered_counts >= input_filter].sum())
+        # filtered += " occurrences " + " of these " + str(len(filtered_counts[filtered_counts >= input_filter])) + " words."
+
+        if len(candidates) == len(c.words):
+            # print("\n\nChoice of", input_filter, "will remove ALL words from the corpus.")
+            # print("Please choose a different filter.")
+            filtered += "\n\nChoice of" + str(input_filter) + "will remove ALL words from the corpus."
+            filtered += "Please choose a different filter."
+            # high_filter = 0
+            # input_filter = 0
+        # else:
+        #     accept = None
+        #     while accept not in ['y', 'n']:
+        #         accept = input("\nAccept filter? [y/n/[different max number]] ")
+        #         if isint(accept):
+        #             high_filter = int(accept)
+        #             input_filter = 0
+        #             accept = 'n'
+        #         elif accept == 'y':
+        #             high_filter = input_filter
+        #         elif accept == 'n':
+        #             high_filter = 0
+
+    except ValueError:
         input_filter = 0
-        accept = None
-        while not input_filter or input_filter <= 0:
-            try:
-                if high_filter:
-                    input_filter = high_filter
-                else:
-                    input_filter = int(input("Enter the maximum rate: ").replace('x', ''))
-                candidates = get_candidate_words(c, input_filter, words=words, items=items, counts=counts)
-                places = np.in1d(c.words, candidates)
-                places = dict(zip(candidates, np.where(places)[0]))
-                candidates = sorted(candidates, key=lambda x: counts[places[x]], reverse=True)
-                filtered_counts = counts[get_mask(c, words)]
-
-                print("Filter will remove", filtered_counts[filtered_counts >= input_filter].sum(), end=' ')
-                print("occurrences", "of these", len(filtered_counts[filtered_counts >= input_filter]), "words:")
-                print(u' '.join(candidates))
-
-                print("\nFilter will remove", filtered_counts[filtered_counts >= input_filter].sum(), end=' ')
-                print("occurrences", "of these", len(filtered_counts[filtered_counts >= input_filter]), "words.", end=' ')
-                if len(candidates) == len(c.words):
-                    print("\n\nChoice of", input_filter, "will remove ALL words from the corpus.")
-                    print("Please choose a different filter.")
-                    high_filter = 0
-                    input_filter = 0
-                else:
-                    accept = None
-                    while accept not in ['y', 'n']:
-                        accept = input("\nAccept filter? [y/n/[different max number]] ")
-                        if isint(accept):
-                            high_filter = int(accept)
-                            input_filter = 0
-                            accept = 'n'
-                        elif accept == 'y':
-                            high_filter = input_filter
-                        elif accept == 'n':
-                            high_filter = 0
-
-            except ValueError:
-                input_filter = 0
-    return (high_filter, candidates)
-
-
-def get_low_filter(c, words=None, items=None, counts=None):
+    return (candidates, filtered)
+
+
+def get_low_filter_chart(c, words=None, items=None, counts=None, num=None):
     import numpy as np
     header = "FILTER LOW FREQUENCY WORDS"
     stars = old_div((80 - len(header) - 2), 2)
-    print("\n\n{0} {1} {0}".format('*' * stars, header))
-    print("    This will remove all words occurring less than N times.")
-    print("    The histogram below shows how many words will be removed")
-    print("    by selecting each minimum frequency threshold.\n")
+    # print("\n\n{0} {1} {0}".format('*' * stars, header))
+    # print("    This will remove all words occurring less than N times.")
+    # print("    The histogram below shows how many words will be removed")
+    # print("    by selecting each minimum frequency threshold.\n")
 
     # Get frequency bins
     if items is None or counts is None:
@@ -381,78 +427,332 @@ def get_low_filter(c, words=None, items=None, counts=None):
     bins = sorted(set(bins))
     bins.append(max(counts))
 
-    low_filter = False
-    while low_filter is False:
-        bin_counts, bins = np.histogram(counts[counts.argsort()[::-1]], bins=bins)
-        # print "{0:>10s} {1:>10s}".format("# Tokens", "# Words")
-        print("{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Bottom', '% of corpus',
-                                                                 "# words", "Rate"))
-
-        last_row = 0
-        for bin, count in zip(bins, np.cumsum(bin_counts)):
-            filtered_counts = counts[get_mask(c, words)]
-            if last_row < (filtered_counts < bin).sum() <= len(filtered_counts):
-                percentage = (old_div(counts[counts <= bin].sum(), float(c.original_length)))
-                print("{0:>5.0f}x".format(bin).rjust(8), end=' ')
-                print('{0:2.1f}%'.format(percentage * 100).rjust(8), end=' ')
-                print((u'\u2588' * int(percentage * 36)).ljust(36), end=' ')
-                print("  {0:0.0f} words".format((filtered_counts <= bin).sum()).rjust(14), end=' ')
-                print("<= {0:>5.0f}x".format(bin).ljust(8))
-                if (filtered_counts < bin).sum() == len(filtered_counts):
-                    break
-            last_row = (filtered_counts >= bin).sum()
-
-
-        print(' ' * 17, "{} total occurrences".format(counts.sum()).ljust(36), end=' ')
-        print('{} words total'.format(get_mask(c, words).sum()).rjust(20))
-        print('')
+    try:
+        num = int(num)
+    except:
+        # TODO: show invalid num screen
+        num = "str"
+
+    ret = ""
 
+    low_filter = False
+    # while low_filter is False:
+    bin_counts, bins = np.histogram(counts[counts.argsort()[::-1]], bins=bins)
+    # print "{0:>10s} {1:>10s}".format("# Tokens", "# Words")
+    print("{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Bottom', '% of corpus',
+                                                                "# words", "Rate"))
+    ret += "{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Bottom', '% of corpus', "# words", "Rate") + "\n"
+    last_row = 0
+    for bin, count in zip(bins, np.cumsum(bin_counts)):
+        filtered_counts = counts[get_mask(c, words)]
+        if last_row < (filtered_counts < bin).sum() <= len(filtered_counts):
+            percentage = (old_div(counts[counts <= bin].sum(), float(c.original_length)))
+            # print("{0:>5.0f}x".format(bin).rjust(8), end=' ')
+            # print('{0:2.1f}%'.format(percentage * 100).rjust(8), end=' ')
+            # print((u'\u2588' * int(percentage * 36)).ljust(36), end=' ')
+            # print("  {0:0.0f} words".format((filtered_counts <= bin).sum()).rjust(14), end=' ')
+            # print("<= {0:>5.0f}x".format(bin).ljust(8))
+            ret += "{0:>5.0f}x".format(bin).rjust(8)
+            ret += '{0:2.1f}%'.format(percentage * 100).rjust(9)
+            ret += " " + (u'\u2588' * int(percentage * 36)).ljust(36)
+            ret += "{0:0.0f} words".format((filtered_counts <= bin).sum()).rjust(15)
+            ret += " <={0:>5.0f}x".format(bin).ljust(8) + "\n"
+            if (filtered_counts < bin).sum() == len(filtered_counts):
+                break
+        last_row = (filtered_counts >= bin).sum()
+
+
+    # print(' ' * 17, "{} total occurrences".format(counts.sum()).ljust(36), end=' ')
+    # print('{} words total'.format(get_mask(c, words).sum()).rjust(20))
+    # print('')
+    ret += (' ' * 18) + "{} total occurrences".format(counts.sum()).ljust(37)
+    ret += '{} words total'.format(get_mask(c, words).sum()).rjust(20) + '\n'
+    return ret
+
+def get_low_filter_stops(c, words=None, items=None, counts=None, num=None):
+    import numpy as np
+    try:
+        num = int(num)
+    except:
+        # TODO: show invalid num screen
+        num = "str"
+        return
+    input_filter = num
+    accept = None
+    # while not input_filter or input_filter <= 0:
+    try:
+        # if low_filter:
+        #     input_filter = low_filter
+        # else:
+        #     input_filter = int(input("Enter the minimum rate: ").replace('x', ''))
+
+        candidates = get_candidate_words(c, -input_filter, words=words, items=items, counts=counts)
+        places = np.in1d(c.words, candidates)
+        places = dict(zip(candidates, np.where(places)[0]))
+        candidates = sorted(candidates, key=lambda x: counts[places[x]])
+        filtered_counts = counts[get_mask(c, words)]
+
+        # print("Filter will remove", filtered_counts[filtered_counts <= input_filter].sum(), "tokens", end=' ')
+        # print("of these", len(filtered_counts[filtered_counts <= input_filter]), "words:")
+        # print(u' '.join(candidates))
+        filtered = ""
+        filtered += "Filter will remove " + str(filtered_counts[filtered_counts <= input_filter].sum()) + " tokens"
+        filtered += "of these " + str(len(filtered_counts[filtered_counts <= input_filter])) + " words: "
+        filtered += u' '.join(candidates)
+
+        # print("\nFilter will remove", filtered_counts[filtered_counts <= input_filter].sum(), "tokens", end=' ')
+        # print("of these", len(filtered_counts[filtered_counts <= input_filter]), "words.", end=' ')
+
+        if len(candidates) == len(c.words):
+            # print("\n\nChoice of", input_filter, "will remove ALL words from the corpus.")
+            # print("Please choose a different filter.")
+            filtered += "\n\nChoice of" + input_filter + "will remove ALL words from the corpus."
+            filtered += "Please choose a different filter."
+            # low_filter = 0
+            # input_filter = 0
+        # else:
+        #     accept = None
+        #     while accept not in ['y', 'n']:
+        #         accept = input("\nAccept filter? [y/n/[different min. number] ")
+        #         if isint(accept):
+        #             low_filter = int(accept)
+        #             input_filter = 0
+        #             accept = 'n'
+        #         elif accept == 'y':
+        #             low_filter = input_filter
+        #         elif accept == 'n':
+        #             low_filter = False
+
+    except ValueError:
         input_filter = 0
-        accept = None
-        while not input_filter or input_filter <= 0:
-            try:
-                if low_filter:
-                    input_filter = low_filter
-                else:
-                    input_filter = int(input("Enter the minimum rate: ").replace('x', ''))
-
-                candidates = get_candidate_words(c, -input_filter, words=words, items=items, counts=counts)
-                places = np.in1d(c.words, candidates)
-                places = dict(zip(candidates, np.where(places)[0]))
-                candidates = sorted(candidates, key=lambda x: counts[places[x]])
-                filtered_counts = counts[get_mask(c, words)]
-
-                print("Filter will remove", filtered_counts[filtered_counts <= input_filter].sum(), "tokens", end=' ')
-                print("of these", len(filtered_counts[filtered_counts <= input_filter]), "words:")
-                print(u' '.join(candidates))
-
-                print("\nFilter will remove", filtered_counts[filtered_counts <= input_filter].sum(), "tokens", end=' ')
-                print("of these", len(filtered_counts[filtered_counts <= input_filter]), "words.", end=' ')
-
-                if len(candidates) == len(c.words):
-                    print("\n\nChoice of", input_filter, "will remove ALL words from the corpus.")
-                    print("Please choose a different filter.")
-                    low_filter = 0
-                    input_filter = 0
-                else:
-                    accept = None
-                    while accept not in ['y', 'n']:
-                        accept = input("\nAccept filter? [y/n/[different min. number] ")
-                        if isint(accept):
-                            low_filter = int(accept)
-                            input_filter = 0
-                            accept = 'n'
-                        elif accept == 'y':
-                            low_filter = input_filter
-                        elif accept == 'n':
-                            low_filter = False
-
-            except ValueError:
-                input_filter = 0
-
-    return (low_filter, candidates)
+
+    return (candidates, filtered)
+
+class PrepData(Frame):
+    def __init__(self):
+        # super(PrepData, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True,
+        #                                 title="null", reduce_cpu=True)
+        self.label = Label("change this")
+        self.lang = Label("haha")
+        self.summaryHigh = Text("High frequency word filter (#):", "summaryHighFreq")
+        self.high = Text("High frequency word filter (#):", "highFreq")
+        self.highLabel = Label("high label", height=35)
+        self.highFiltered = Label("filtered", height = 10)
+        self.highCandidates = []
+        self.summaryLow = Text("Low frequency word filter (#)", "summaryLowFreq")
+        self.low = Text("Low frequency word filter (#)", "lowFreq")
+        self.lowLabel = Label("low label", height=35)
+        self.lowCandidates = []
+        self.counter = 0
+        # self.high.value("hello")
+    
+    def update_lang(self, l):
+        self.lang = l
+
+    def setHigh(self):
+        # use this to change values of other elements
+        self.high._value = "hello"
+        # self._data["highFreq"] = val
+
+class Summary(Frame):
+    def __init__(self, screen):
+        super(Summary, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True,
+                                        title="Summary", reduce_cpu=True)
+
+        global data
+
+        # f = open("prep.txt", "a")
+        # f.write("Summary init")
+        layout = Layout([100], fill_frame=True)
+        self.add_layout(layout)
+        # layout.add_widget(Text("High frequency word filter (%):", "highFreq"))
+        layout.add_widget(data.summaryHigh)
+        # layout.add_widget(Text("Low frequency word filter (%): ", "lowFreq"))
+        layout.add_widget(data.summaryLow)
+        layout.add_widget(Text("Language-specific stopwords: ", "lang"))
+        layout.add_widget(Text("Minimum word length: ", "length"))
+        layout.add_widget(Label("need to add original corpus size"))
+        layout.add_widget(Label("need to add prepped corpus size"))
+        layout2 = Layout([1, 1, 1, 1, 1])
+        self.add_layout(layout2)
+        layout2.add_widget(Button("prep", self._prep), 0)
+        layout2.add_widget(Button("high", self._high), 1)
+        layout2.add_widget(Button("low", self._low), 2)
+        layout2.add_widget(Button("lang", self._lang), 3)
+        layout2.add_widget(Button("exit", self._exit), 4)
+        self.fix()
+    
+    # proceeds to scene with chart that displays with current settings
+    def _prep(self):
+        self.save()
+        raise StopApplication("Quitting")
+
+    def _high(self):
+        self.save()
+        global data
+        data.high._value = data.summaryHigh.value
+        data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+                                                                num=data.summaryHigh.value)
+        # args.high_filter, candidates, data.highLabel.text, data.highFiltered.text = get_high_filter(data.c,
+        #                                 words=data.stoplist, items=data.items, counts=data.counts, num=data.summaryHigh.value)
+        
+        # TODO dont stoplist yet (do it at the end)
+        temp = deepcopy(data.stoplist)
+        temp.update(data.highCandidates)
+        temp.update(data.lowCandidates) # should I do this?
+        data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+                                                                num=data.summaryHigh.value)
+        data.highLabel.text += filtered
+        # data.highLabel.text = get_high_filter(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        #                                                         num=data.summaryHigh.value)
+        # data.highLabel.text += str(data.counter)
+        # data.counter = data.counter + 1
+        raise NextScene("High Freq")
+    
+    def _low(self):
+        self.save()
+        global data
+        data.low._value = data.summaryLow.value
+        data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+                                                                num=data.summaryLow.value)
+
+        # TODO dont stoplist yet (do it at the end)
+        temp = deepcopy(data.stoplist)
+        temp.update(data.highCandidates)
+        temp.update(data.lowCandidates)
+        data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+                                                            num=data.summaryLow.value)
+        data.lowLabel.text += filtered
+        # data.lowLabel.text += str(data.counter)
+        # data.counter = data.counter + 1
+        raise NextScene("Low Freq")
+
+    def _lang(self):
+        self.save()
+        raise NextScene("Lang")
+
+    # exits without prepping
+    # @staticmethod
+    def _exit(self):
+        # self._screen.close()
+        raise StopApplication("Quitting")
+
+class HighFreq(Frame):
+    def __init__(self, screen):
+        super(HighFreq, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True,
+                                        title="High Frequency Word Filter", reduce_cpu=True)
+
+        # self._data = data
+        global data
+        
+        layout = Layout([100], fill_frame=True)
+        self.add_layout(layout)
+        layout.add_widget(data.highLabel)
+        # layout.add_widget(Text("High Freq Filter (%)", "highFreq"))
+        layout.add_widget(data.high)
+        layout2 = Layout([1, 1])
+        self.add_layout(layout2)
+        layout2.add_widget(Button("Ok", self._ok), 0)
+        layout2.add_widget(Button("Update", self._change), 1)
+        self.fix()
+
+    def _ok(self):
+        self.save()
+        global data
+        data.summaryHigh._value = data.high.value
+
+        data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+                                                                num=data.high.value)
+        # args.high_filter, candidates, data.highLabel.text, data.highFiltered.text = get_high_filter(data.c,
+        #                                 words=data.stoplist, items=data.items, counts=data.counts, num=data.summaryHigh.value)
+        
+        # TODO dont stoplist yet (do it at the end)
+        temp = deepcopy(data.stoplist)
+        temp.update(data.highCandidates)
+        temp.update(data.lowCandidates) # should I do this?
+        data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+                                                                num=data.high.value)
+        data.highLabel.text += filtered
+        # data.highLabel.text = get_high_filter(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        #                                                         num=data.summaryHigh.value)
+        # data.highLabel.text += str(data.counter)
+        # data.counter = data.counter + 1
+        raise NextScene("Summary")
+    
+    def _change(self):
+        self.save()
+        global data
+        data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+                                                                num=data.high.value)
+        # args.high_filter, candidates, data.highLabel.text, data.highFiltered.text = get_high_filter(data.c,
+        #                                 words=data.stoplist, items=data.items, counts=data.counts, num=data.summaryHigh.value)
+        
+        # TODO dont stoplist yet (do it at the end)
+        temp = deepcopy(data.stoplist)
+        temp.update(data.highCandidates)
+        temp.update(data.lowCandidates) # should I do this?
+        data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+                                                                num=data.high.value)
+        data.highLabel.text += filtered
+        # data.highLabel.text = get_high_filter(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        #                                                         num=data.summaryHigh.value)
+        # data.highLabel.text += str(data.counter)
+        # data.counter = data.counter + 1
+
+class LowFreq(Frame):
+    def __init__(self, screen):
+        super(LowFreq, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True,
+                                        title="Low Frequency Word Filter", reduce_cpu=True)
+
+        # self._data = data
+        global data
+        
+        layout = Layout([100], fill_frame=True)
+        self.add_layout(layout)
+        layout.add_widget(data.lowLabel)
+        # layout.add_widget(Text("Low Freq Filter (%)", "lowFreq"))
+        layout.add_widget(data.low)
+        layout2 = Layout([1, 1])
+        self.add_layout(layout2)
+        layout2.add_widget(Button("Ok", self._ok), 0)
+        layout2.add_widget(Button("Update", self._change), 1)
+        self.fix()
+
+    def _ok(self):
+        self.save()
+        raise NextScene("Summary")
+    
+    def _change(self):
+        self.save()
+        global data
+        data.summaryLow._value = data.low
+
+class Lang(Frame):
+    def __init__(self, screen):
+        super(Lang, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True,
+                                        title="Lang", reduce_cpu=True)
+
+        # f = open("prep.txt", "a")
+        # f.write("Lang init")
+        layout = Layout([100], fill_frame=True)
+        self.add_layout(layout)
+        layout.add_widget(Text(label="Language-specific stopwords: ", name="lang"))
+        layout.add_widget(Label("hello"))
+        layout2 = Layout([1, 1, 1])
+        self.add_layout(layout2)
+        layout2.add_widget(Button("Ok", self._ok), 0)
+        self.fix()
+    
+    # proceeds to scene with chart that displays with current settings
+    def _ok(self):
+        self.save()
+        raise NextScene("Summary")
 
 def main(args):
+    global data
+    data = PrepData()
+    print("IN MAINNNNNNNNNNNNNNNNN")
+
     config = topicexplorer.config.read(args.config_file)
 
     if config.getboolean("main", "sentences"):
@@ -464,9 +764,9 @@ def main(args):
         args.lang = []
 
     args.corpus_path = config.get("main", "corpus_file")
-    c = Corpus.load(args.corpus_path)
+    data.c = Corpus.load(args.corpus_path)
 
-    if c.original_length != len(c.corpus):
+    if data.c.original_length != len(data.c.corpus):
         print("Corpus has already been prepared. Proceed to training or")
         print("re-init the corpus to apply a different set of stopwords.")
         print("\nTIP: Train the LDA models with:")
@@ -480,6 +780,8 @@ def main(args):
         args.lang.extend(new_langs)
     """
 
+    # NEXT 2 IF AND THE FOR ARE FOR LANG (PUT THEM IN LANG SCENE)
+
     # add default locale if no other languages are specified
     # do not add if in quiet mode -- make everything explicit
     if not args.lang and not args.quiet:
@@ -489,19 +791,21 @@ def main(args):
             args.lang.append(locale)
 
     # check for any new candidates
-    args.lang = [lang for lang in args.lang if stop_language(c, langs[lang])]
+    args.lang = [lang for lang in args.lang if stop_language(data.c, langs[lang])]
     if args.lang and not args.quiet:
         args.lang = lang_prompt(args.lang)
 
-    stoplist = set()
+    data.stoplist = set()
     # Apply stop words
     print(" ")
     for lang in args.lang:
         print("Applying", langs[lang], "stopwords")
-        candidates = stop_language(c, langs[lang])
+        candidates = stop_language(data.c, langs[lang])
         if len(candidates):
-            stoplist.update(candidates)
+            data.stoplist.update(candidates)
 
+    # DO THIS AUTOMATICALLY, NOT NEED FOR SCENE, MAYBE HAVE SOME SORT OF INFO SCENE TO DISPLAY THIS INFO IN
+    
     # Apply custom stopwords file
     if args.stopword_file:
         with open(args.stopword_file, encoding='utf8') as swf:
@@ -511,76 +815,112 @@ def main(args):
             if len(candidates):
                 print("Applying custom stopword file to remove {} word{}.".format(
                     len(candidates), 's' if len(candidates) > 1 else ''))
-                stoplist.update(candidates)
+                data.stoplist.update(candidates)
     
+    # DO THIS AUTOMATICALLY BASED OFF ARGS
+
     if args.min_word_len:
-        candidates = get_small_words(c, args.min_word_len)
+        candidates = get_small_words(data.c, args.min_word_len)
         if len(candidates):
             print("Filtering {} small word{} with less than {} characters.".format(
                 len(candidates), 's' if len(candidates) > 1 else '', args.min_word_len))
-            stoplist.update(candidates)
+            data.stoplist.update(candidates)
 
+    # DO THIS AUTOMATICALLY BASED OFF ARGS, NOT THE FIRST IF
+    # TODO TEST USUAL BEHAVIOR
 
     # cache item counts
-    items, counts = get_corpus_counts(c)
-    if args.high_filter is None and args.high_percent is None and not args.quiet:
-        args.high_filter, candidates = get_high_filter(c, words=stoplist, items=items, counts=counts)
-        if len(candidates):
-            print("Filtering {} high frequency word{}.".format(len(candidates),
-                                                               's' if len(candidates) > 1 else ''))
-            stoplist.update(candidates)
-    elif args.high_filter is None and args.high_percent is None and args.quiet:
+    data.items, data.counts = get_corpus_counts(data.c)
+    # if args.high_filter is None and args.high_percent is None and not args.quiet:
+    #     args.high_filter, candidates = get_high_filter(c, words=stoplist, items=items, counts=counts)
+    #     if len(candidates):
+    #         print("Filtering {} high frequency word{}.".format(len(candidates),
+    #                                                            's' if len(candidates) > 1 else ''))
+    #         stoplist.update(candidates)
+    # elif args.high_filter is None and args.high_percent is None and args.quiet:
+    if args.high_filter is None and args.high_percent is None and args.quiet:
         pass
     elif args.high_filter:
-        candidates = get_candidate_words(c, args.high_filter, sort=False, items=items, counts=counts)
+        candidates = get_candidate_words(data.c, args.high_filter, sort=False, items=data.items, counts=data.counts)
         if len(candidates):
             print("Filtering {} high frequency word{}.".format(len(candidates),
                                                                's' if len(candidates) > 1 else ''))
-            stoplist.update(candidates)
+            data.stoplist.update(candidates)
     elif args.high_percent:
-        args.high_filter = get_closest_bin(c, 1 - (args.high_percent / 100.), counts=counts)
+        args.high_filter = get_closest_bin(data.c, 1 - (args.high_percent / 100.), counts=data.counts)
         print(args.high_filter)
-        candidates = get_candidate_words(c, args.high_filter, sort=False, items=items, counts=counts)
+        candidates = get_candidate_words(data.c, args.high_filter, sort=False, items=data.items, counts=data.counts)
         if len(candidates):
             print("Filtering {} high frequency word{}.".format(len(candidates),
                                                                's' if len(candidates) > 1 else ''))
-            stoplist.update(candidates)
+            data.stoplist.update(candidates)
 
-    if args.low_filter is None and args.low_percent is None and not args.quiet:
-        args.low_filter, candidates = get_low_filter(c, words=stoplist, items=items, counts=counts)
-        if len(candidates):
-            print("Filtering {} low frequency word{}.".format(len(candidates),
-                                                              's' if len(candidates) > 1 else ''))
-            stoplist.update(candidates)
-    elif args.low_filter is None and args.low_percent is None and args.quiet:
+    # DO THIS AUTOMATICALLY BASE OFF ARGS, NOT THE FIRST IF
+    # TODO TEST USUAL BEHAVIOR
+    
+    # if args.low_filter is None and args.low_percent is None and not args.quiet:
+    #     args.low_filter, candidates = get_low_filter(c, words=stoplist, items=items, counts=counts)
+    #     if len(candidates):
+    #         print("Filtering {} low frequency word{}.".format(len(candidates),
+    #                                                           's' if len(candidates) > 1 else ''))
+    #         stoplist.update(candidates)
+    # elif args.low_filter is None and args.low_percent is None and args.quiet:
+    if args.low_filter is None and args.low_percent is None and args.quiet:
         pass
     elif args.low_filter:
-        candidates = get_candidate_words(c, -1 * args.low_filter, sort=False, items=items, counts=counts)
+        candidates = get_candidate_words(data.c, -1 * args.low_filter, sort=False, items=data.items, counts=data.counts)
         if len(candidates):
             print("Filtering {} low frequency words.".format(len(candidates)))
-            stoplist.update(candidates)
+            data.stoplist.update(candidates)
 
     elif args.low_percent:
-        args.low_filter = get_closest_bin(c, 1 - (args.low_percent / 100.), reverse=True, counts=counts)
+        args.low_filter = get_closest_bin(data.c, 1 - (args.low_percent / 100.), reverse=True, counts=data.counts)
         print(args.low_filter)
-        candidates = get_candidate_words(c, -1 * args.low_filter, sort=False, items=items, counts=counts)
+        candidates = get_candidate_words(data.c, -1 * args.low_filter, sort=False, items=data.items, counts=data.counts)
         if len(candidates):
             print("Filtering {} low frequency word{}.".format(len(candidates),
                                                                's' if len(candidates) > 1 else ''))
-            stoplist.update(candidates)
-
-
-
-    if not stoplist:
+            data.stoplist.update(candidates)
+
+    def gui(screen, scene):
+        scenes = [
+            Scene([Summary(screen)], -1, name="Summary"),
+            Scene([HighFreq(screen)], -1, name="High Freq"),
+            Scene([LowFreq(screen)], -1, name="Low Freq"),
+            Scene([Lang(screen)], -1, name="Lang")
+        ]
+        screen.play(scenes, stop_on_resize=True, start_scene=scene)
+
+    last_scene = None
+    # global data = PrepData()
+    while True:
+        try:
+            Screen.wrapper(gui, catch_interrupt=True, arguments=[last_scene])
+            break
+            # sys.exit(0)
+        except ResizeScreenError as e:
+            last_scene = e.scene
+
+    # DO THIS WHEN PREPPING MAYBE? THE EXIT PORTION
+    # TODO TEST WHEN THIS HAPPENS, PUT IN SCREEN AFTER PREP
+
+    print("out of the loop")
+
+    data.stoplist.update(data.highCandidates)
+    data.stoplist.update(data.lowCandidates)
+
+    if not data.stoplist:
         print("No stopwords applied.\n\n")
 
         sys.exit(0)
     else:
-        print("\n\nApplying {} stopword{}".format(len(stoplist),
-                                                  's' if len(stoplist) > 1 else ''))
-        c.in_place_stoplist(stoplist)
+        print("\n\nApplying {} stopword{}".format(len(data.stoplist),
+                                                  's' if len(data.stoplist) > 1 else ''))
+        data.c.in_place_stoplist(data.stoplist)
         print("\n")
 
+    # LEAVE THE REST, TILL THE END OF THIS METHOD AS IS
+
     def name_corpus(dirname, languages, lowfreq=None, highfreq=None):
         corpus_name = [dirname]
 
@@ -602,7 +942,7 @@ def name_corpus(dirname, languages, lowfreq=None, highfreq=None):
 
     model_path = os.path.dirname(args.corpus_path)
     args.corpus_path = os.path.join(model_path, corpus_name)
-    c.save(args.corpus_path)
+    data.c.save(args.corpus_path)
 
     config.set("main", "corpus_file", args.corpus_path)
     config.remove_option("main", "model_pattern")
@@ -653,3 +993,5 @@ def populate_parser(parser):
     args = parser.parse_args()
 
     main(args)
+
+data = ""
\ No newline at end of file

From 79f7db48919f652115f76a8c2875c462ec1a86d9 Mon Sep 17 00:00:00 2001
From: Kirtan Sakariya <kirtansakariya@gmail.com>
Date: Mon, 12 Nov 2018 13:13:50 -0500
Subject: [PATCH 02/21] copy before removing comments

---
 topicexplorer/prep.py | 751 +++++++++++++++++++++++++++++++++++-------
 1 file changed, 630 insertions(+), 121 deletions(-)

diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py
index c3430990..9606bb7f 100644
--- a/topicexplorer/prep.py
+++ b/topicexplorer/prep.py
@@ -131,7 +131,7 @@
 from topicexplorer.lib.util import isint, is_valid_configfile, bool_prompt
 
 from asciimatics.widgets import Frame, ListBox, Layout, Divider, Text, \
-    Button, TextBox, Widget, Label
+    Button, TextBox, Widget, Label, PopUpDialog, PopupMenu, CheckBox
 from asciimatics.scene import Scene
 from asciimatics.screen import Screen
 from asciimatics.exceptions import ResizeScreenError, NextScene, StopApplication
@@ -277,7 +277,7 @@ def get_closest_bin(c, thresh, reverse=False, counts=None):
     if thresh == 0 and reverse:
         return max(counts) + 1
     elif thresh == 0 and not reverse:
-        return 0
+        return 1
     else:
         # sort counts
         counts = counts[counts.argsort()]
@@ -306,11 +306,11 @@ def get_high_filter_chart(c, words=None, items=None, counts=None, num=None):
     bins = sorted(set(bins))
     bins.append(max(counts))
 
-    try:
-        num = int(num)
-    except:
-        # TODO: show invalid num screen
-        num = "str"
+    # try:
+    #     num = int(num)
+    # except:
+    #     # TODO: show invalid num screen
+    #     num = "str"
 
     ret = ""
 
@@ -351,12 +351,12 @@ def get_high_filter_chart(c, words=None, items=None, counts=None, num=None):
 
 def get_high_filter_stops(c, words=None, items=None, counts=None, num=None):
     import numpy as np
-    try:
-        num = int(num)
-    except:
-        # TODO: show invalid num screen
-        num = "str"
-        return
+    # try:
+    #     num = int(num)
+    # except:
+    #     # TODO: show invalid num screen
+    #     num = "str"
+    #     return
     input_filter = num
     accept = None
     # while not input_filter or input_filter <= 0:
@@ -427,11 +427,11 @@ def get_low_filter_chart(c, words=None, items=None, counts=None, num=None):
     bins = sorted(set(bins))
     bins.append(max(counts))
 
-    try:
-        num = int(num)
-    except:
-        # TODO: show invalid num screen
-        num = "str"
+    # try:
+    #     num = int(num)
+    # except:
+    #     # TODO: show invalid num screen
+    #     num = "str"
 
     ret = ""
 
@@ -439,8 +439,8 @@ def get_low_filter_chart(c, words=None, items=None, counts=None, num=None):
     # while low_filter is False:
     bin_counts, bins = np.histogram(counts[counts.argsort()[::-1]], bins=bins)
     # print "{0:>10s} {1:>10s}".format("# Tokens", "# Words")
-    print("{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Bottom', '% of corpus',
-                                                                "# words", "Rate"))
+    # print("{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Bottom', '% of corpus',
+    #                                                             "# words", "Rate"))
     ret += "{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Bottom', '% of corpus', "# words", "Rate") + "\n"
     last_row = 0
     for bin, count in zip(bins, np.cumsum(bin_counts)):
@@ -471,12 +471,12 @@ def get_low_filter_chart(c, words=None, items=None, counts=None, num=None):
 
 def get_low_filter_stops(c, words=None, items=None, counts=None, num=None):
     import numpy as np
-    try:
-        num = int(num)
-    except:
-        # TODO: show invalid num screen
-        num = "str"
-        return
+    # try:
+    #     num = int(num)
+    # except:
+    #     # TODO: show invalid num screen
+    #     num = "str"
+    #     return
     input_filter = num
     accept = None
     # while not input_filter or input_filter <= 0:
@@ -506,7 +506,7 @@ def get_low_filter_stops(c, words=None, items=None, counts=None, num=None):
         if len(candidates) == len(c.words):
             # print("\n\nChoice of", input_filter, "will remove ALL words from the corpus.")
             # print("Please choose a different filter.")
-            filtered += "\n\nChoice of" + input_filter + "will remove ALL words from the corpus."
+            filtered += "\n\nChoice of" + str(input_filter) + "will remove ALL words from the corpus."
             filtered += "Please choose a different filter."
             # low_filter = 0
             # input_filter = 0
@@ -530,29 +530,99 @@ def get_low_filter_stops(c, words=None, items=None, counts=None, num=None):
 
 class PrepData(Frame):
     def __init__(self):
+        self.stoplist = set()
         # super(PrepData, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True,
         #                                 title="null", reduce_cpu=True)
         self.label = Label("change this")
-        self.lang = Label("haha")
-        self.summaryHigh = Text("High frequency word filter (#):", "summaryHighFreq")
-        self.high = Text("High frequency word filter (#):", "highFreq")
+        # self.lang = Label("haha")
+        # self.summaryHigh = Text("High frequency word filter (#):", "summaryHighFreq")
+        # self.summaryHighPercent = Text("High frequency word filter (%):", "summaryHighPercent")
+        self.summaryHigh = Text(label="Number of word frequency:", name="summaryHighFreq", on_change=self.summaryHighNumFocus)
+        self.summaryHighPercent = Text("Percent of words:", "summaryHighPercent", on_change=self.summaryHighPercentFocus)
+        self.summaryHighFocus = False
+        self.high = Text("High frequency word filter (#):", "highFreq", on_change=self.highNumFocus)
+        self.highPercent = Text("High ferquency word filter (%):", "highPercent", on_change=self.highPercentFocus)
         self.highLabel = Label("high label", height=35)
-        self.highFiltered = Label("filtered", height = 10)
+        self.highFocus = False
         self.highCandidates = []
-        self.summaryLow = Text("Low frequency word filter (#)", "summaryLowFreq")
-        self.low = Text("Low frequency word filter (#)", "lowFreq")
+        # self.highFiltered = Label("filtered", height = 10)
+        # self.summaryLow = Text("Low frequency word filter (#):", "summaryLowFreq")
+        # self.summaryLowPercent = Text("Low frequency word filter (%):", "summaryLowPercent")
+        self.summaryLow = Text("Number of word frequency:", "summaryLowFreq", on_change=self.summaryLowNumFocus)
+        self.summaryLowPercent = Text("Percent of words:", "summaryLowPercent", on_change=self.summaryLowPercentFocus)
+        self.summaryLowFocus = False
+        self.low = Text("Low frequency word filter (#):", "lowFreq", on_change=self.lowNumFocus)
+        self.lowPercent = Text("Low frequency word filter (%):", "lowPercent", on_change=self.lowPercentFocus)
         self.lowLabel = Label("low label", height=35)
+        self.lowFocus = False
         self.lowCandidates = []
+        self.minWord = Text("Minimum word length: ", "length")
         self.counter = 0
+        self.error = Label("Error message")
+        self.switch = 0
+        self.stopCandidates = []
+        self.english = CheckBox("Yes", label="Apply English stopwords")
+        self.englishCandidates = []
+        self.prepSize = Label("need to update length", align="^")
         # self.high.value("hello")
     
-    def update_lang(self, l):
-        self.lang = l
-
-    def setHigh(self):
-        # use this to change values of other elements
-        self.high._value = "hello"
-        # self._data["highFreq"] = val
+    # def update_lang(self, l):
+    #     self.lang = l
+
+    def summaryHighPercentFocus(self):
+        if self.summaryHighFocus:
+            self.summaryHighFocus = False
+            self.summaryHigh.blur()
+        if self.summaryLowFocus:
+            self.summaryLowFocus = False
+            self.summaryLow.blur()
+            self.summaryLowPercent.blur()
+    
+    def summaryHighNumFocus(self):
+        if self.summaryHighFocus:
+            self.summaryHighFocus = False
+            self.summaryHighPercent.blur()
+        if self.summaryLowFocus:
+            self.summaryLowFocus = False
+            self.summaryLow.blur()
+            self.summaryLowPercent.blur()
+
+    def highPercentFocus(self):
+        if self.highFocus:
+            self.highFocus = False
+            self.high.blur()
+    
+    def highNumFocus(self):
+        if self.highFocus:
+            self.highFocus = False
+            self.highPercent.blur()
+    
+    def summaryLowPercentFocus(self):
+        if self.summaryLowFocus:
+            self.summaryLowFocus = False
+            self.summaryLow.blur()
+        if self.summaryHighFocus:
+            self.summaryHighFocus = False
+            self.summaryHigh.blur()
+            self.summaryHighPercent.blur()
+
+    def summaryLowNumFocus(self):
+        if self.summaryLowFocus:
+            self.summaryLowFocus = False
+            self.summaryLowPercent.blur()
+        if self.summaryHighFocus:
+            self.summaryHigh.blur()
+            self.summaryHighPercent.blur()
+
+    def lowPercentFocus(self):
+        if self.lowFocus:
+            self.lowFocus = False
+            self.low.blur()
+    
+    def lowNumFocus(self):
+        if self.lowFocus:
+            self.lowFocus = False
+            self.highPercent.blur()
 
 class Summary(Frame):
     def __init__(self, screen):
@@ -563,78 +633,273 @@ def __init__(self, screen):
 
         # f = open("prep.txt", "a")
         # f.write("Summary init")
+        highTitle = Layout([100])
+        highOptions = Layout([1, 1])
+        self.add_layout(highTitle)
+        self.add_layout(highOptions)
+        lowTitle = Layout([100])
+        lowOptions = Layout([1, 1])
+        self.add_layout(lowTitle)
+        self.add_layout(lowOptions)
         layout = Layout([100], fill_frame=True)
         self.add_layout(layout)
         # layout.add_widget(Text("High frequency word filter (%):", "highFreq"))
-        layout.add_widget(data.summaryHigh)
+
+        highTitle.add_widget(Divider(height=1, line_char=" "))
+        highTitle.add_widget(Label("High Frequency Word Filter", align="^"))
+        # layout.add_widget(data.summaryHigh)
+        # layout.add_widget(data.summaryHighPercent)
+        highOptions.add_widget(data.summaryHigh, 0)
+        highOptions.add_widget(data.summaryHighPercent, 1)
+        highOptions.add_widget(Divider(height=1, line_char="-"), 0)
+        highOptions.add_widget(Divider(height=1, line_char="-"), 1)
+
         # layout.add_widget(Text("Low frequency word filter (%): ", "lowFreq"))
-        layout.add_widget(data.summaryLow)
-        layout.add_widget(Text("Language-specific stopwords: ", "lang"))
-        layout.add_widget(Text("Minimum word length: ", "length"))
-        layout.add_widget(Label("need to add original corpus size"))
-        layout.add_widget(Label("need to add prepped corpus size"))
-        layout2 = Layout([1, 1, 1, 1, 1])
+        lowTitle.add_widget(Label("Low Frequency Word Filter", align="^"))
+        # layout.add_widget(data.summaryLow)
+        # layout.add_widget(data.summaryLowPercent)
+        # layout.add_widget(Text("Language-specific stopwords: ", "lang"))
+        lowOptions.add_widget(data.summaryLow, 0)
+        lowOptions.add_widget(data.summaryLowPercent, 1)
+        lowOptions.add_widget(Divider(height=1, line_char="-"), 0)
+        lowOptions.add_widget(Divider(height=1, line_char="-"), 1)
+        lowOptions.add_widget(Divider(height=1, line_char=" "), 0)
+        lowOptions.add_widget(Divider(height=1, line_char=" "), 1)
+
+        layout.add_widget(data.english)
+        # layout.add_widget(Text("Minimum word length: ", "length"))
+        layout.add_widget(data.minWord)
+        layout.add_widget(Label("Original corpus unique words: " + str(data.c.original_length), align="^"))
+        layout.add_widget(data.prepSize)
+        layout2 = Layout([1, 1, 1, 1])
         self.add_layout(layout2)
         layout2.add_widget(Button("prep", self._prep), 0)
         layout2.add_widget(Button("high", self._high), 1)
         layout2.add_widget(Button("low", self._low), 2)
-        layout2.add_widget(Button("lang", self._lang), 3)
-        layout2.add_widget(Button("exit", self._exit), 4)
+        # layout2.add_widget(Button("lang", self._lang), 3)
+        layout2.add_widget(Button("exit", self._exit), 3)
         self.fix()
     
     # proceeds to scene with chart that displays with current settings
     def _prep(self):
         self.save()
+        global data
+        # try:
+        #     high = int(data.summaryHigh.value)
+        # except:
+        #     # switch to error screen for high
+        #     data.error._value = "Please enter a valid high value"
+        #     data.switch = "Summary"
+        #     raise NextScene("Error")
+        # try:
+        #     low = int(data.summaryLow.value)
+        # except:
+        #     # switch to error screen for low
+        #     data.error._value = "Please enter a valid low value"
+        #     data.switch = "Summary"
+        #     raise NextScene("Error")
+        # data.highCandidates, data.highFiltered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        #                                                         num=high)
+        # data.lowCandidates, data.lowFiltered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        #                                                         num=low)
+        minNum = 3
+        try:
+            high = test(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False)
+        except Exception as e:
+            self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._prepHigh))
+            return
+        try:
+            low = test(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True)
+        except Exception as e:
+            self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._prepLow))
+            return
+        if data.minWord.value != "":
+            try:
+                minNum = int(data.minWord.value)
+            except Exception as e:
+                self._scene.add_effect(PopUpDialog(self._screen, "Please enter a valid value for Minimum Word Length", ["OK"]))
+                return
+        if data.english.value:
+            data.englishCandidates = stop_language(data.c, "english")
+        data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+                                                                num=high)
+        data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+                                                                num=low)
+        data.stopCandidates = get_small_words(data.c, minNum)
         raise StopApplication("Quitting")
 
+    @staticmethod
+    def _fix(selection):
+        global data
+        data.summaryHighPercent.blur()
+        data.summaryHigh.blur()
+
+    @staticmethod
+    def _prepHigh(selection):
+        global data
+        if str(selection) == "0":
+            data.summaryHighPercent._value = "30.0"
+        elif str(selection) == "1":
+            data.summaryHighPercent._value = "0.0"
+        else:
+            data.summaryHighPercent.focus()
+            data.summaryHigh.focus()
+            data.summaryHighFocus = True
+            confirm()
+
+    @staticmethod
+    def _prepLow(selection):
+        global data
+        if str(selection) == "0":
+            data.summaryLowPercent._value = "20.0"
+        elif str(selection) == "1":
+            data.summaryLowPercent._value = "0.0"
+        else:
+            data.summaryLowPercent.focus()
+            data.summaryLow.focus()
+            data.summaryLowFocus = True
+            confirm()
+
     def _high(self):
         self.save()
         global data
-        data.high._value = data.summaryHigh.value
+        # if data.summaryHigh.value is None and data.summaryHighPercent.value is None:
+        #     data.error.text = "Please enter a value for either the number of occurrences or percent"
+        #     data.switch = "Summary"
+        #     raise NextScene("Error")
+        # if data.summaryHigh.value is not None and data.summaryHighPercent.value is not None:
+        #     data.error.text = "Please enter a value for only one field"
+        #     data.switch = "Summary"
+        #     raise NextScene("Error")
+        # try:
+        #     if data.summaryHigh.value is not None:
+        #         data.error.text = "Please enter a valid high value (int)"
+        #         high = int(data.summaryHigh.value)
+        #     if data.summaryHighPercent.value is not None:
+        #         data.error.text = "Please enter a valid high percent value (float or int)"
+        #         high = float(data.summaryHighPercent.value)
+        # except Exception as e:
+            # data.error.text = e.__str__()
+            # data.switch = "Summary"
+            # raise NextScene("Error")
+            # self._scene.add_effect(PopUpDialog(self._screen, "hellldoafaisdjfa", ["OK"]))
+            # return
+        try:
+            high = test(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False)
+        except Exception as e:
+            if e.args[2]:
+                self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popupHigh))
+            else:
+                self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1]))
+            return
+        # data.high._value = str(high) TODO: do this in test() or here?
         data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
-                                                                num=data.summaryHigh.value)
+                                                                num=high)
         # args.high_filter, candidates, data.highLabel.text, data.highFiltered.text = get_high_filter(data.c,
         #                                 words=data.stoplist, items=data.items, counts=data.counts, num=data.summaryHigh.value)
         
         # TODO dont stoplist yet (do it at the end)
         temp = deepcopy(data.stoplist)
         temp.update(data.highCandidates)
-        temp.update(data.lowCandidates) # should I do this?
+        # temp.update(data.lowCandidates) # should I do this?
         data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
-                                                                num=data.summaryHigh.value)
+                                                                num=high)
         data.highLabel.text += filtered
         # data.highLabel.text = get_high_filter(data.c, words=data.stoplist, items=data.items, counts=data.counts,
         #                                                         num=data.summaryHigh.value)
         # data.highLabel.text += str(data.counter)
         # data.counter = data.counter + 1
         raise NextScene("High Freq")
+
+    @staticmethod
+    def _popupHigh(selection):
+        global data
+        if str(selection) == "0":
+            data.summaryHighPercent._value = "30.0"
+        elif str(selection) == "1":
+            data.summaryHighPercent._value = "0.0"
+        else:
+            data.summaryHighPercent.focus()
+            data.summaryHigh.focus()
+            data.summaryHighFocus = True
+            confirm()
+            return
+        high = test(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False)
+        data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+                                                                num=high)
+        temp = deepcopy(data.stoplist)
+        temp.update(data.highCandidates)
+        data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+                                                                num=high)
+        data.highLabel.text += filtered
+        raise NextScene("High Freq")
     
     def _low(self):
         self.save()
         global data
-        data.low._value = data.summaryLow.value
+        try:
+            low = test(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True)
+        except Exception as e:
+            if e.args[2]:
+                self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popupLow))
+            else:
+                self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1]))
+            return
+            
         data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
-                                                                num=data.summaryLow.value)
+                                                                num=low)
 
         # TODO dont stoplist yet (do it at the end)
         temp = deepcopy(data.stoplist)
-        temp.update(data.highCandidates)
+        # temp.update(data.highCandidates)
         temp.update(data.lowCandidates)
         data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
-                                                            num=data.summaryLow.value)
+                                                            num=low)
         data.lowLabel.text += filtered
         # data.lowLabel.text += str(data.counter)
         # data.counter = data.counter + 1
         raise NextScene("Low Freq")
+    
+    @staticmethod
+    def _popupLow(selection):
+        global data
+        if str(selection) == "0":
+            data.summaryLowPercent._value = "20.0"
+        elif str(selection) == "1":
+            data.summaryLowPercent._value = "0.0"
+        else:
+            data.summaryLowPercent.focus()
+            data.summaryLow.focus()
+            data.summaryLowFocus = True
+            confirm()
+            return
+        low = test(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True)
+        data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+                                                                num=low)
+        temp = deepcopy(data.stoplist)
+        temp.update(data.lowCandidates)
+        data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+                                                                num=low)
+        data.lowLabel.text += filtered
+        raise NextScene("Low Freq")
 
-    def _lang(self):
-        self.save()
-        raise NextScene("Lang")
+    # def _lang(self):
+    #     self.save()
+    #     options = [("Danish", self._updateLang), ("Dutch", self._updateLang), ("English", self._updateLang), ("Finnish", self._updateLang),
+    #                 ("French", self._updateLang), ("German", self._updateLang), ("Hungarian", self._updateLang), ("Italian", self._updateLang),
+    #                 ("Norwegian", self._updateLang), ("Portuguese", self._updateLang), ("Russian", self._updateLang), ("Spanish", self._updateLang),
+    #                 ("Swedish", self._updateLang), ("Turkish", self._updateLang)]
+    #     self._scene.add_effect(PopupMenu(self.screen, options, 0, 0))
+
+    # def _updateLang(self):
+    #     self.save()
+    #     raise NextScene("Lang")
 
     # exits without prepping
-    # @staticmethod
-    def _exit(self):
+    @staticmethod
+    def _exit():
         # self._screen.close()
+        sys.exit(0)
         raise StopApplication("Quitting")
 
 class HighFreq(Frame):
@@ -648,8 +913,8 @@ def __init__(self, screen):
         layout = Layout([100], fill_frame=True)
         self.add_layout(layout)
         layout.add_widget(data.highLabel)
-        # layout.add_widget(Text("High Freq Filter (%)", "highFreq"))
         layout.add_widget(data.high)
+        layout.add_widget(data.highPercent)
         layout2 = Layout([1, 1])
         self.add_layout(layout2)
         layout2.add_widget(Button("Ok", self._ok), 0)
@@ -659,45 +924,110 @@ def __init__(self, screen):
     def _ok(self):
         self.save()
         global data
-        data.summaryHigh._value = data.high.value
+
+        try:
+            high = test(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False)
+        except Exception as e:
+            if e.args[2]:
+                self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popup))
+            else:
+                self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1]))
+            return
 
         data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
-                                                                num=data.high.value)
+                                                                num=high)
         # args.high_filter, candidates, data.highLabel.text, data.highFiltered.text = get_high_filter(data.c,
         #                                 words=data.stoplist, items=data.items, counts=data.counts, num=data.summaryHigh.value)
         
         # TODO dont stoplist yet (do it at the end)
         temp = deepcopy(data.stoplist)
         temp.update(data.highCandidates)
-        temp.update(data.lowCandidates) # should I do this?
+        # temp.update(data.lowCandidates) # should I do this?
         data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
-                                                                num=data.high.value)
+                                                                num=high)
         data.highLabel.text += filtered
         # data.highLabel.text = get_high_filter(data.c, words=data.stoplist, items=data.items, counts=data.counts,
         #                                                         num=data.summaryHigh.value)
         # data.highLabel.text += str(data.counter)
         # data.counter = data.counter + 1
+        updatePreppedLength()
+        raise NextScene("Summary")
+
+    @staticmethod
+    def _popup(selection):
+        if str(selection) == "0":
+            data.highPercent._value = "30.0"
+        elif str(selection) == "1":
+            data.highPercent._value = "0.0"
+        else:
+            data.highPercent.focus()
+            data.high.focus()
+            data.highFocus = True
+            confirm()
+            return
+        high = test(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False)
+        data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+                                                                num=high)
+        temp = deepcopy(data.stoplist)
+        temp.update(data.highCandidates)
+        # temp.update(data.lowCandidates) # should I do this?
+        data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+                                                                num=high)
+        data.highLabel.text += filtered
+
         raise NextScene("Summary")
     
     def _change(self):
         self.save()
         global data
+
+        try:
+            high = test(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False)
+        except Exception as e:
+            if e.args[2]:
+                self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popupChange))
+            else:
+                self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1]))
+            return
+
         data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
-                                                                num=data.high.value)
+                                                                num=high)
         # args.high_filter, candidates, data.highLabel.text, data.highFiltered.text = get_high_filter(data.c,
         #                                 words=data.stoplist, items=data.items, counts=data.counts, num=data.summaryHigh.value)
         
         # TODO dont stoplist yet (do it at the end)
         temp = deepcopy(data.stoplist)
         temp.update(data.highCandidates)
-        temp.update(data.lowCandidates) # should I do this?
+        # temp.update(data.lowCandidates) # should I do this?
         data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
-                                                                num=data.high.value)
+                                                                num=high)
         data.highLabel.text += filtered
         # data.highLabel.text = get_high_filter(data.c, words=data.stoplist, items=data.items, counts=data.counts,
         #                                                         num=data.summaryHigh.value)
         # data.highLabel.text += str(data.counter)
         # data.counter = data.counter + 1
+    
+    @staticmethod
+    def _popupChange(selection):
+        if str(selection) == "0":
+            data.highPercent._value = "30.0"
+        elif str(selection) == "1":
+            data.highPercent._value = "0.0"
+        else:
+            data.highPercent.focus()
+            data.high.focus()
+            data.highFocus = True
+            confirm()
+            return
+        high = test(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False)
+        data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+                                                                num=high)
+        temp = deepcopy(data.stoplist)
+        temp.update(data.highCandidates)
+        # temp.update(data.lowCandidates) # should I do this?
+        data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+                                                                num=high)
+        data.highLabel.text += filtered
 
 class LowFreq(Frame):
     def __init__(self, screen):
@@ -712,6 +1042,7 @@ def __init__(self, screen):
         layout.add_widget(data.lowLabel)
         # layout.add_widget(Text("Low Freq Filter (%)", "lowFreq"))
         layout.add_widget(data.low)
+        layout.add_widget(data.lowPercent)
         layout2 = Layout([1, 1])
         self.add_layout(layout2)
         layout2.add_widget(Button("Ok", self._ok), 0)
@@ -720,38 +1051,201 @@ def __init__(self, screen):
 
     def _ok(self):
         self.save()
+        global data
+        try:
+            low = test(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True)
+        except Exception as e:
+            if e.args[2]:
+                self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popup))
+            else:
+                self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1]))
+            return
+            
+        data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+                                                                num=low)
+
+        # TODO dont stoplist yet (do it at the end)
+        temp = deepcopy(data.stoplist)
+        # temp.update(data.highCandidates)
+        temp.update(data.lowCandidates)
+        data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+                                                            num=low)
+        data.lowLabel.text += filtered
+        # data.lowLabel.text += str(data.counter)
+        # data.counter = data.counter + 1
+        updatePreppedLength()
+        raise NextScene("Summary")
+    
+    @staticmethod
+    def _popup(selection):
+        if str(selection) == "0":
+            data.lowPercent._value = "20.0"
+        elif str(selection) == "1":
+            data.lowPercent._value = "0.0"
+        else:
+            data.lowPercent.focus()
+            data.low.focus()
+            data.lowFocus = True
+            confirm()
+            return
+        low = test(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True)
+
+        data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+                                                                num=low)
+        temp = deepcopy(data.stoplist)
+        # temp.update(data.highCandidates)
+        temp.update(data.lowCandidates)
+        data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+                                                            num=low)
+        data.lowLabel.text += filtered
+        
         raise NextScene("Summary")
     
     def _change(self):
         self.save()
         global data
-        data.summaryLow._value = data.low
+        try:
+            low = test(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True)
+        except Exception as e:
+            if e.args[2]:
+                self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popupChange))
+            else:
+                self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1]))
+            return
+            
+        data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+                                                                num=low)
 
-class Lang(Frame):
-    def __init__(self, screen):
-        super(Lang, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True,
-                                        title="Lang", reduce_cpu=True)
+        # TODO dont stoplist yet (do it at the end)
+        temp = deepcopy(data.stoplist)
+        # temp.update(data.highCandidates)
+        temp.update(data.lowCandidates)
+        data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+                                                            num=low)
+        data.lowLabel.text += filtered
+        # data.lowLabel.text += str(data.counter)
+        # data.counter = data.counter + 1
 
-        # f = open("prep.txt", "a")
-        # f.write("Lang init")
-        layout = Layout([100], fill_frame=True)
-        self.add_layout(layout)
-        layout.add_widget(Text(label="Language-specific stopwords: ", name="lang"))
-        layout.add_widget(Label("hello"))
-        layout2 = Layout([1, 1, 1])
-        self.add_layout(layout2)
-        layout2.add_widget(Button("Ok", self._ok), 0)
-        self.fix()
+    @staticmethod
+    def _popupChange(selection):
+        if str(selection) == "0":
+            data.lowPercent._value = "20.0"
+        elif str(selection) == "1":
+            data.lowPercent._value = "0.0"
+        else:
+            data.lowPercent.focus()
+            data.low.focus()
+            data.lowFocus = True
+            confirm()
+            return
+        low = test(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True)
+        data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+                                                                num=low)
+        temp = deepcopy(data.stoplist)
+        temp.update(data.lowCandidates)
+        # temp.update(data.lowCandidates) # should I do this?
+        data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+                                                                num=low)
+        data.lowLabel.text += filtered
+
+# class Lang(Frame):
+#     def __init__(self, screen):
+#         super(Lang, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True,
+#                                         title="Lang", reduce_cpu=True)
+
+#         # f = open("prep.txt", "a")
+#         # f.write("Lang init")
+#         layout = Layout([100], fill_frame=True)
+#         self.add_layout(layout)
+#         layout.add_widget(Text(label="Language-specific stopwords: ", name="lang"))
+#         layout.add_widget(Label("hello"))
+#         layout2 = Layout([1, 1, 1])
+#         self.add_layout(layout2)
+#         layout2.add_widget(Button("Ok", self._ok), 0)
+#         self.fix()
     
-    # proceeds to scene with chart that displays with current settings
-    def _ok(self):
-        self.save()
-        raise NextScene("Summary")
+#     # proceeds to scene with chart that displays with current settings
+#     def _ok(self):
+#         self.save()
+#         raise NextScene("Summary")
+
+# class Error(Frame):
+#     def __init__(self, screen):
+#         super(Error, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 //3, hover_focus=True,
+#                                             title="Error", reduce_cpu=True)
+
+#         global data
+        
+#         layout = Layout([100], fill_frame=True)
+#         self.add_layout(layout)
+#         layout.add_widget(data.error)
+#         layout2 = Layout([1])
+#         self.add_layout(layout2)
+#         layout2.add_widget(Button("Ok", self._ok), 0)
+#         self.fix()
+    
+#     def _ok(self):
+#         self.save()
+#         global data
+#         raise NextScene(data.switch)
+
+def test(num, percent, numPair, percentPair, iden, rev):
+    defaults = {"high": "30%", "low": "20%"}
+    if num.value == "" and percent.value == "":
+        # raise Exception("Please enter a value for either the number of occurrences or percent")
+        raise Exception("Apply default of " + str(defaults[iden]) + " for the " + iden + " frequency, don't stop list, or edit value?", ["Yes", "Don't stop list", "Edit value"], True)
+    if num.value != "" and percent.value != "":
+        raise Exception("Pleae enter a value for only one " + iden + " field", ["Ok"], False)
+    try:
+        msg = "error"
+        if num.value != "":
+            msg = "Please enter a valid " + iden + " value (int)"
+            ret = int(num.value)
+            numPair._value = num.value
+            percentPair._value = ""
+        if percent.value != "":
+            msg = "Please enter a valid " + iden + " percent value (float or int)"
+            ret = float(percent.value)
+            percentPair._value = percent.value
+            numPair._value = ""
+            ret = get_closest_bin(data.c, 1 - (ret / 100.), reverse=rev, counts=data.counts)
+    except:
+        raise Exception(msg, ["Ok"], False)
+    return ret
+
+def updatePreppedLength():
+    global data
+    temp = deepcopy(data.stoplist)
+    tempC = deepcopy(data.c)
+    if data.english.value:
+        data.englishCandidates = stop_language(tempC, "english")
+        temp.update(data.englishCandidates)
+    temp.update(data.lowCandidates)
+    temp.update(data.highCandidates)
+    tempC.in_place_stoplist(temp)
+    data.prepSize.text = str("Prepared corpus unique words: " + str(len(tempC)))
+
+def confirm():
+    global data
+    tempScreen = data.wholeScreen.current_scene._effects[0]._screen
+    tempScene = data.wholeScreen.current_scene
+    tempScene.add_effect(PopUpDialog(tempScreen, "Please input a value in one of the highlighted fields", ["OK"], on_close=reset))
+
+def reset(selection):
+    global data
+    data.summaryHigh.blur()
+    data.summaryHighPercent.blur()
+    data.summaryLow.blur()
+    data.summaryLowPercent.blur()
+    data.high.blur()
+    data.highPercent.blur()
+    data.low.blur()
+    data.lowPercent.blur()
 
 def main(args):
     global data
     data = PrepData()
-    print("IN MAINNNNNNNNNNNNNNNNN")
+    # print("IN MAINNNNNNNNNNNNNNNNN")
 
     config = topicexplorer.config.read(args.config_file)
 
@@ -780,29 +1274,29 @@ def main(args):
         args.lang.extend(new_langs)
     """
 
-    # NEXT 2 IF AND THE FOR ARE FOR LANG (PUT THEM IN LANG SCENE)
-
-    # add default locale if no other languages are specified
-    # do not add if in quiet mode -- make everything explicit
-    if not args.lang and not args.quiet:
-        import locale
-        locale = locale.getdefaultlocale()[0].split('_')[0].lower()
-        if locale in langs.keys():
-            args.lang.append(locale)
-
-    # check for any new candidates
-    args.lang = [lang for lang in args.lang if stop_language(data.c, langs[lang])]
-    if args.lang and not args.quiet:
-        args.lang = lang_prompt(args.lang)
-
-    data.stoplist = set()
-    # Apply stop words
-    print(" ")
-    for lang in args.lang:
-        print("Applying", langs[lang], "stopwords")
-        candidates = stop_language(data.c, langs[lang])
-        if len(candidates):
-            data.stoplist.update(candidates)
+    # # Language information, not sure if I should remove it
+
+    # # add default locale if no other languages are specified
+    # # do not add if in quiet mode -- make everything explicit
+    # if not args.lang and not args.quiet:
+    #     import locale
+    #     locale = locale.getdefaultlocale()[0].split('_')[0].lower()
+    #     if locale in langs.keys():
+    #         args.lang.append(locale)
+
+    # # check for any new candidates
+    # args.lang = [lang for lang in args.lang if stop_language(data.c, langs[lang])]
+    # if args.lang and not args.quiet:
+    #     args.lang = lang_prompt(args.lang)
+
+    # data.stoplist = set()
+    # # Apply stop words
+    # print(" ")
+    # for lang in args.lang:
+    #     print("Applying", langs[lang], "stopwords")
+    #     candidates = stop_language(data.c, langs[lang])
+    #     if len(candidates):
+    #         data.stoplist.update(candidates)
 
     # DO THIS AUTOMATICALLY, NOT NEED FOR SCENE, MAYBE HAVE SOME SORT OF INFO SCENE TO DISPLAY THIS INFO IN
     
@@ -843,9 +1337,11 @@ def main(args):
     elif args.high_filter:
         candidates = get_candidate_words(data.c, args.high_filter, sort=False, items=data.items, counts=data.counts)
         if len(candidates):
-            print("Filtering {} high frequency word{}.".format(len(candidates),
-                                                               's' if len(candidates) > 1 else ''))
-            data.stoplist.update(candidates)
+            # print("Filtering {} high frequency word{}.".format(len(candidates),
+            #                                                    's' if len(candidates) > 1 else ''))
+            data.highCandidates = candidates
+            data.highLabel._value = args.high_filter
+            # data.stoplist.update(candidates)
     elif args.high_percent:
         args.high_filter = get_closest_bin(data.c, 1 - (args.high_percent / 100.), counts=data.counts)
         print(args.high_filter)
@@ -870,9 +1366,10 @@ def main(args):
     elif args.low_filter:
         candidates = get_candidate_words(data.c, -1 * args.low_filter, sort=False, items=data.items, counts=data.counts)
         if len(candidates):
-            print("Filtering {} low frequency words.".format(len(candidates)))
-            data.stoplist.update(candidates)
-
+            # print("Filtering {} low frequency words.".format(len(candidates)))
+            data.lowCandidates = candidates
+            data.lowLabel._value = args.low_filter
+            # data.stoplist.update(candidates)
     elif args.low_percent:
         args.low_filter = get_closest_bin(data.c, 1 - (args.low_percent / 100.), reverse=True, counts=data.counts)
         print(args.low_filter)
@@ -886,11 +1383,16 @@ def gui(screen, scene):
         scenes = [
             Scene([Summary(screen)], -1, name="Summary"),
             Scene([HighFreq(screen)], -1, name="High Freq"),
-            Scene([LowFreq(screen)], -1, name="Low Freq"),
-            Scene([Lang(screen)], -1, name="Lang")
+            Scene([LowFreq(screen)], -1, name="Low Freq")
+            # Scene([Lang(screen)], -1, name="Lang")
         ]
+        global data
+        data.wholeScreen = screen
         screen.play(scenes, stop_on_resize=True, start_scene=scene)
 
+    # global data
+    data.prepSize.text = str("Prepared corpus unique words: " + str(len(data.c)))
+
     last_scene = None
     # global data = PrepData()
     while True:
@@ -908,6 +1410,12 @@ def gui(screen, scene):
 
     data.stoplist.update(data.highCandidates)
     data.stoplist.update(data.lowCandidates)
+    data.stoplist.update(data.stopCandidates)
+    print(data.highCandidates)
+    # print(data.highFiltered)
+    print(data.lowCandidates)
+    # print(data.lowFiltered)
+    print(data.stopCandidates)
 
     if not data.stoplist:
         print("No stopwords applied.\n\n")
@@ -917,6 +1425,7 @@ def gui(screen, scene):
         print("\n\nApplying {} stopword{}".format(len(data.stoplist),
                                                   's' if len(data.stoplist) > 1 else ''))
         data.c.in_place_stoplist(data.stoplist)
+        print(len(data.c))
         print("\n")
 
     # LEAVE THE REST, TILL THE END OF THIS METHOD AS IS

From 562df9373fab191a74470a48ef5b1b5f84b17e57 Mon Sep 17 00:00:00 2001
From: Kirtan Sakariya <kirtansakariya@gmail.com>
Date: Mon, 12 Nov 2018 13:26:32 -0500
Subject: [PATCH 03/21] removed comments

---
 topicexplorer/prep.py | 328 +-----------------------------------------
 1 file changed, 2 insertions(+), 326 deletions(-)

diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py
index 9606bb7f..4afffc8b 100644
--- a/topicexplorer/prep.py
+++ b/topicexplorer/prep.py
@@ -293,10 +293,6 @@ def get_high_filter_chart(c, words=None, items=None, counts=None, num=None):
     import numpy as np
     header = "FILTER HIGH FREQUENCY WORDS"
     stars = old_div((80 - len(header) - 2), 2)
-    # print("\n\n{0} {1} {0}".format('*' * stars, header))
-    # print("    This will remove all words occurring N or more times.")
-    # print("    The histogram below shows how many words will be removed")
-    # print("    by selecting each maximum frequency threshold.\n")
 
     # Get frequency bins
     if items is None or counts is None:
@@ -306,32 +302,16 @@ def get_high_filter_chart(c, words=None, items=None, counts=None, num=None):
     bins = sorted(set(bins))
     bins.append(max(counts))
 
-    # try:
-    #     num = int(num)
-    # except:
-    #     # TODO: show invalid num screen
-    #     num = "str"
-
     ret = ""
 
-    # do input validation here
-
     high_filter = False
-    # while not high_filter:
     bin_counts, bins = np.histogram(counts, bins=bins)
-    # print("{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Top', '% of corpus',
-    #                                                          "# words", "Rate"))
     ret += "{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Top', '% of corpus', "# words", "Rate") + "\n"
     last_row = 0
     for bin, count in zip(bins[-2::-1], np.cumsum(bin_counts[::-1])):
         filtered_counts = counts[get_mask(c, words)]
         if (filtered_counts >= bin).sum() > last_row:
             percentage = 1. - (old_div(counts[counts < bin].sum(), float(c.original_length)))
-            # print("{0:>5.0f}x".format(bin).rjust(8), end=' ')
-            # print('{0:2.1f}%'.format(percentage * 100).rjust(8), end=' ')
-            # print((u'\u2588' * int(percentage * 36)).ljust(36), end=' ')
-            # print("  {0:0.0f} words".format((filtered_counts >= bin).sum()).rjust(14), end=' ')
-            # print(">= {0:>5.0f}x".format(bin).ljust(8))
             ret += "{0:>5.0f}x".format(bin).rjust(8)
             ret += '{0:2.1f}% '.format(percentage * 100).rjust(10)
             ret += (u'\u2588' * int(percentage * 36)).ljust(36)
@@ -340,70 +320,29 @@ def get_high_filter_chart(c, words=None, items=None, counts=None, num=None):
 
         last_row = (filtered_counts >= bin).sum()
 
-    # return ret
-    # print(' ' * 17, "{} total occurrences".format(counts.sum()).ljust(36), end=' ')
-    # print('{} words total'.format(get_mask(c, words).sum()).rjust(20))
-    # print('')
     ret += (' ' * 18) + "{} total occurrences".format(counts.sum()).ljust(37)
     ret += '{} words total'.format(get_mask(c, words).sum()).rjust(20) + '\n'
-    # ret += str(type(num)) + " " + str(num)
     return ret
 
 def get_high_filter_stops(c, words=None, items=None, counts=None, num=None):
     import numpy as np
-    # try:
-    #     num = int(num)
-    # except:
-    #     # TODO: show invalid num screen
-    #     num = "str"
-    #     return
     input_filter = num
     accept = None
-    # while not input_filter or input_filter <= 0:
     try:
-        # if high_filter:
-        #     input_filter = high_filter
-        # else:
-        #     input_filter = int(input("Enter the maximum rate: ").replace('x', ''))
         candidates = get_candidate_words(c, input_filter, words=words, items=items, counts=counts)
         places = np.in1d(c.words, candidates)
         places = dict(zip(candidates, np.where(places)[0]))
         candidates = sorted(candidates, key=lambda x: counts[places[x]], reverse=True)
         filtered_counts = counts[get_mask(c, words)]
 
-        # print("Filter will remove", filtered_counts[filtered_counts >= input_filter].sum(), end=' ')
-        # print("occurrences", "of these", len(filtered_counts[filtered_counts >= input_filter]), "words:")
-        # print(u' '.join(candidates))
         filtered = ""
         filtered += "Filter will remove " + str(filtered_counts[filtered_counts >= input_filter].sum())
         filtered += " occurrences " + "of these " + str(len(filtered_counts[filtered_counts >= input_filter])) + " words: "
         filtered += u' '.join(candidates)
 
-        # print("\nFilter will remove", filtered_counts[filtered_counts >= input_filter].sum(), end=' ')
-        # print("occurrences", "of these", len(filtered_counts[filtered_counts >= input_filter]), "words.", end=' ')
-
-        # filtered += "\nFilter will remove " + str(filtered_counts[filtered_counts >= input_filter].sum())
-        # filtered += " occurrences " + " of these " + str(len(filtered_counts[filtered_counts >= input_filter])) + " words."
-
         if len(candidates) == len(c.words):
-            # print("\n\nChoice of", input_filter, "will remove ALL words from the corpus.")
-            # print("Please choose a different filter.")
             filtered += "\n\nChoice of" + str(input_filter) + "will remove ALL words from the corpus."
             filtered += "Please choose a different filter."
-            # high_filter = 0
-            # input_filter = 0
-        # else:
-        #     accept = None
-        #     while accept not in ['y', 'n']:
-        #         accept = input("\nAccept filter? [y/n/[different max number]] ")
-        #         if isint(accept):
-        #             high_filter = int(accept)
-        #             input_filter = 0
-        #             accept = 'n'
-        #         elif accept == 'y':
-        #             high_filter = input_filter
-        #         elif accept == 'n':
-        #             high_filter = 0
 
     except ValueError:
         input_filter = 0
@@ -414,10 +353,6 @@ def get_low_filter_chart(c, words=None, items=None, counts=None, num=None):
     import numpy as np
     header = "FILTER LOW FREQUENCY WORDS"
     stars = old_div((80 - len(header) - 2), 2)
-    # print("\n\n{0} {1} {0}".format('*' * stars, header))
-    # print("    This will remove all words occurring less than N times.")
-    # print("    The histogram below shows how many words will be removed")
-    # print("    by selecting each minimum frequency threshold.\n")
 
     # Get frequency bins
     if items is None or counts is None:
@@ -427,31 +362,16 @@ def get_low_filter_chart(c, words=None, items=None, counts=None, num=None):
     bins = sorted(set(bins))
     bins.append(max(counts))
 
-    # try:
-    #     num = int(num)
-    # except:
-    #     # TODO: show invalid num screen
-    #     num = "str"
-
     ret = ""
 
     low_filter = False
-    # while low_filter is False:
     bin_counts, bins = np.histogram(counts[counts.argsort()[::-1]], bins=bins)
-    # print "{0:>10s} {1:>10s}".format("# Tokens", "# Words")
-    # print("{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Bottom', '% of corpus',
-    #                                                             "# words", "Rate"))
     ret += "{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Bottom', '% of corpus', "# words", "Rate") + "\n"
     last_row = 0
     for bin, count in zip(bins, np.cumsum(bin_counts)):
         filtered_counts = counts[get_mask(c, words)]
         if last_row < (filtered_counts < bin).sum() <= len(filtered_counts):
             percentage = (old_div(counts[counts <= bin].sum(), float(c.original_length)))
-            # print("{0:>5.0f}x".format(bin).rjust(8), end=' ')
-            # print('{0:2.1f}%'.format(percentage * 100).rjust(8), end=' ')
-            # print((u'\u2588' * int(percentage * 36)).ljust(36), end=' ')
-            # print("  {0:0.0f} words".format((filtered_counts <= bin).sum()).rjust(14), end=' ')
-            # print("<= {0:>5.0f}x".format(bin).ljust(8))
             ret += "{0:>5.0f}x".format(bin).rjust(8)
             ret += '{0:2.1f}%'.format(percentage * 100).rjust(9)
             ret += " " + (u'\u2588' * int(percentage * 36)).ljust(36)
@@ -461,30 +381,15 @@ def get_low_filter_chart(c, words=None, items=None, counts=None, num=None):
                 break
         last_row = (filtered_counts >= bin).sum()
 
-
-    # print(' ' * 17, "{} total occurrences".format(counts.sum()).ljust(36), end=' ')
-    # print('{} words total'.format(get_mask(c, words).sum()).rjust(20))
-    # print('')
     ret += (' ' * 18) + "{} total occurrences".format(counts.sum()).ljust(37)
     ret += '{} words total'.format(get_mask(c, words).sum()).rjust(20) + '\n'
     return ret
 
 def get_low_filter_stops(c, words=None, items=None, counts=None, num=None):
     import numpy as np
-    # try:
-    #     num = int(num)
-    # except:
-    #     # TODO: show invalid num screen
-    #     num = "str"
-    #     return
     input_filter = num
     accept = None
-    # while not input_filter or input_filter <= 0:
     try:
-        # if low_filter:
-        #     input_filter = low_filter
-        # else:
-        #     input_filter = int(input("Enter the minimum rate: ").replace('x', ''))
 
         candidates = get_candidate_words(c, -input_filter, words=words, items=items, counts=counts)
         places = np.in1d(c.words, candidates)
@@ -492,51 +397,26 @@ def get_low_filter_stops(c, words=None, items=None, counts=None, num=None):
         candidates = sorted(candidates, key=lambda x: counts[places[x]])
         filtered_counts = counts[get_mask(c, words)]
 
-        # print("Filter will remove", filtered_counts[filtered_counts <= input_filter].sum(), "tokens", end=' ')
-        # print("of these", len(filtered_counts[filtered_counts <= input_filter]), "words:")
-        # print(u' '.join(candidates))
         filtered = ""
         filtered += "Filter will remove " + str(filtered_counts[filtered_counts <= input_filter].sum()) + " tokens"
         filtered += "of these " + str(len(filtered_counts[filtered_counts <= input_filter])) + " words: "
         filtered += u' '.join(candidates)
 
-        # print("\nFilter will remove", filtered_counts[filtered_counts <= input_filter].sum(), "tokens", end=' ')
-        # print("of these", len(filtered_counts[filtered_counts <= input_filter]), "words.", end=' ')
 
         if len(candidates) == len(c.words):
-            # print("\n\nChoice of", input_filter, "will remove ALL words from the corpus.")
-            # print("Please choose a different filter.")
             filtered += "\n\nChoice of" + str(input_filter) + "will remove ALL words from the corpus."
             filtered += "Please choose a different filter."
-            # low_filter = 0
-            # input_filter = 0
-        # else:
-        #     accept = None
-        #     while accept not in ['y', 'n']:
-        #         accept = input("\nAccept filter? [y/n/[different min. number] ")
-        #         if isint(accept):
-        #             low_filter = int(accept)
-        #             input_filter = 0
-        #             accept = 'n'
-        #         elif accept == 'y':
-        #             low_filter = input_filter
-        #         elif accept == 'n':
-        #             low_filter = False
 
     except ValueError:
         input_filter = 0
 
     return (candidates, filtered)
 
+# Stores all of the variables for the labels
 class PrepData(Frame):
     def __init__(self):
         self.stoplist = set()
-        # super(PrepData, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True,
-        #                                 title="null", reduce_cpu=True)
         self.label = Label("change this")
-        # self.lang = Label("haha")
-        # self.summaryHigh = Text("High frequency word filter (#):", "summaryHighFreq")
-        # self.summaryHighPercent = Text("High frequency word filter (%):", "summaryHighPercent")
         self.summaryHigh = Text(label="Number of word frequency:", name="summaryHighFreq", on_change=self.summaryHighNumFocus)
         self.summaryHighPercent = Text("Percent of words:", "summaryHighPercent", on_change=self.summaryHighPercentFocus)
         self.summaryHighFocus = False
@@ -545,9 +425,6 @@ def __init__(self):
         self.highLabel = Label("high label", height=35)
         self.highFocus = False
         self.highCandidates = []
-        # self.highFiltered = Label("filtered", height = 10)
-        # self.summaryLow = Text("Low frequency word filter (#):", "summaryLowFreq")
-        # self.summaryLowPercent = Text("Low frequency word filter (%):", "summaryLowPercent")
         self.summaryLow = Text("Number of word frequency:", "summaryLowFreq", on_change=self.summaryLowNumFocus)
         self.summaryLowPercent = Text("Percent of words:", "summaryLowPercent", on_change=self.summaryLowPercentFocus)
         self.summaryLowFocus = False
@@ -564,10 +441,6 @@ def __init__(self):
         self.english = CheckBox("Yes", label="Apply English stopwords")
         self.englishCandidates = []
         self.prepSize = Label("need to update length", align="^")
-        # self.high.value("hello")
-    
-    # def update_lang(self, l):
-    #     self.lang = l
 
     def summaryHighPercentFocus(self):
         if self.summaryHighFocus:
@@ -631,8 +504,6 @@ def __init__(self, screen):
 
         global data
 
-        # f = open("prep.txt", "a")
-        # f.write("Summary init")
         highTitle = Layout([100])
         highOptions = Layout([1, 1])
         self.add_layout(highTitle)
@@ -643,22 +514,15 @@ def __init__(self, screen):
         self.add_layout(lowOptions)
         layout = Layout([100], fill_frame=True)
         self.add_layout(layout)
-        # layout.add_widget(Text("High frequency word filter (%):", "highFreq"))
 
         highTitle.add_widget(Divider(height=1, line_char=" "))
         highTitle.add_widget(Label("High Frequency Word Filter", align="^"))
-        # layout.add_widget(data.summaryHigh)
-        # layout.add_widget(data.summaryHighPercent)
         highOptions.add_widget(data.summaryHigh, 0)
         highOptions.add_widget(data.summaryHighPercent, 1)
         highOptions.add_widget(Divider(height=1, line_char="-"), 0)
         highOptions.add_widget(Divider(height=1, line_char="-"), 1)
 
-        # layout.add_widget(Text("Low frequency word filter (%): ", "lowFreq"))
         lowTitle.add_widget(Label("Low Frequency Word Filter", align="^"))
-        # layout.add_widget(data.summaryLow)
-        # layout.add_widget(data.summaryLowPercent)
-        # layout.add_widget(Text("Language-specific stopwords: ", "lang"))
         lowOptions.add_widget(data.summaryLow, 0)
         lowOptions.add_widget(data.summaryLowPercent, 1)
         lowOptions.add_widget(Divider(height=1, line_char="-"), 0)
@@ -667,7 +531,6 @@ def __init__(self, screen):
         lowOptions.add_widget(Divider(height=1, line_char=" "), 1)
 
         layout.add_widget(data.english)
-        # layout.add_widget(Text("Minimum word length: ", "length"))
         layout.add_widget(data.minWord)
         layout.add_widget(Label("Original corpus unique words: " + str(data.c.original_length), align="^"))
         layout.add_widget(data.prepSize)
@@ -676,7 +539,6 @@ def __init__(self, screen):
         layout2.add_widget(Button("prep", self._prep), 0)
         layout2.add_widget(Button("high", self._high), 1)
         layout2.add_widget(Button("low", self._low), 2)
-        # layout2.add_widget(Button("lang", self._lang), 3)
         layout2.add_widget(Button("exit", self._exit), 3)
         self.fix()
     
@@ -684,24 +546,6 @@ def __init__(self, screen):
     def _prep(self):
         self.save()
         global data
-        # try:
-        #     high = int(data.summaryHigh.value)
-        # except:
-        #     # switch to error screen for high
-        #     data.error._value = "Please enter a valid high value"
-        #     data.switch = "Summary"
-        #     raise NextScene("Error")
-        # try:
-        #     low = int(data.summaryLow.value)
-        # except:
-        #     # switch to error screen for low
-        #     data.error._value = "Please enter a valid low value"
-        #     data.switch = "Summary"
-        #     raise NextScene("Error")
-        # data.highCandidates, data.highFiltered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
-        #                                                         num=high)
-        # data.lowCandidates, data.lowFiltered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
-        #                                                         num=low)
         minNum = 3
         try:
             high = test(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False)
@@ -763,27 +607,6 @@ def _prepLow(selection):
     def _high(self):
         self.save()
         global data
-        # if data.summaryHigh.value is None and data.summaryHighPercent.value is None:
-        #     data.error.text = "Please enter a value for either the number of occurrences or percent"
-        #     data.switch = "Summary"
-        #     raise NextScene("Error")
-        # if data.summaryHigh.value is not None and data.summaryHighPercent.value is not None:
-        #     data.error.text = "Please enter a value for only one field"
-        #     data.switch = "Summary"
-        #     raise NextScene("Error")
-        # try:
-        #     if data.summaryHigh.value is not None:
-        #         data.error.text = "Please enter a valid high value (int)"
-        #         high = int(data.summaryHigh.value)
-        #     if data.summaryHighPercent.value is not None:
-        #         data.error.text = "Please enter a valid high percent value (float or int)"
-        #         high = float(data.summaryHighPercent.value)
-        # except Exception as e:
-            # data.error.text = e.__str__()
-            # data.switch = "Summary"
-            # raise NextScene("Error")
-            # self._scene.add_effect(PopUpDialog(self._screen, "hellldoafaisdjfa", ["OK"]))
-            # return
         try:
             high = test(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False)
         except Exception as e:
@@ -792,23 +615,14 @@ def _high(self):
             else:
                 self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1]))
             return
-        # data.high._value = str(high) TODO: do this in test() or here?
         data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=high)
-        # args.high_filter, candidates, data.highLabel.text, data.highFiltered.text = get_high_filter(data.c,
-        #                                 words=data.stoplist, items=data.items, counts=data.counts, num=data.summaryHigh.value)
         
-        # TODO dont stoplist yet (do it at the end)
         temp = deepcopy(data.stoplist)
         temp.update(data.highCandidates)
-        # temp.update(data.lowCandidates) # should I do this?
         data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
                                                                 num=high)
         data.highLabel.text += filtered
-        # data.highLabel.text = get_high_filter(data.c, words=data.stoplist, items=data.items, counts=data.counts,
-        #                                                         num=data.summaryHigh.value)
-        # data.highLabel.text += str(data.counter)
-        # data.counter = data.counter + 1
         raise NextScene("High Freq")
 
     @staticmethod
@@ -849,15 +663,11 @@ def _low(self):
         data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=low)
 
-        # TODO dont stoplist yet (do it at the end)
         temp = deepcopy(data.stoplist)
-        # temp.update(data.highCandidates)
         temp.update(data.lowCandidates)
         data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
                                                             num=low)
         data.lowLabel.text += filtered
-        # data.lowLabel.text += str(data.counter)
-        # data.counter = data.counter + 1
         raise NextScene("Low Freq")
     
     @staticmethod
@@ -883,22 +693,9 @@ def _popupLow(selection):
         data.lowLabel.text += filtered
         raise NextScene("Low Freq")
 
-    # def _lang(self):
-    #     self.save()
-    #     options = [("Danish", self._updateLang), ("Dutch", self._updateLang), ("English", self._updateLang), ("Finnish", self._updateLang),
-    #                 ("French", self._updateLang), ("German", self._updateLang), ("Hungarian", self._updateLang), ("Italian", self._updateLang),
-    #                 ("Norwegian", self._updateLang), ("Portuguese", self._updateLang), ("Russian", self._updateLang), ("Spanish", self._updateLang),
-    #                 ("Swedish", self._updateLang), ("Turkish", self._updateLang)]
-    #     self._scene.add_effect(PopupMenu(self.screen, options, 0, 0))
-
-    # def _updateLang(self):
-    #     self.save()
-    #     raise NextScene("Lang")
-
     # exits without prepping
     @staticmethod
     def _exit():
-        # self._screen.close()
         sys.exit(0)
         raise StopApplication("Quitting")
 
@@ -907,7 +704,6 @@ def __init__(self, screen):
         super(HighFreq, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True,
                                         title="High Frequency Word Filter", reduce_cpu=True)
 
-        # self._data = data
         global data
         
         layout = Layout([100], fill_frame=True)
@@ -936,20 +732,12 @@ def _ok(self):
 
         data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=high)
-        # args.high_filter, candidates, data.highLabel.text, data.highFiltered.text = get_high_filter(data.c,
-        #                                 words=data.stoplist, items=data.items, counts=data.counts, num=data.summaryHigh.value)
         
-        # TODO dont stoplist yet (do it at the end)
         temp = deepcopy(data.stoplist)
         temp.update(data.highCandidates)
-        # temp.update(data.lowCandidates) # should I do this?
         data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
                                                                 num=high)
         data.highLabel.text += filtered
-        # data.highLabel.text = get_high_filter(data.c, words=data.stoplist, items=data.items, counts=data.counts,
-        #                                                         num=data.summaryHigh.value)
-        # data.highLabel.text += str(data.counter)
-        # data.counter = data.counter + 1
         updatePreppedLength()
         raise NextScene("Summary")
 
@@ -970,7 +758,6 @@ def _popup(selection):
                                                                 num=high)
         temp = deepcopy(data.stoplist)
         temp.update(data.highCandidates)
-        # temp.update(data.lowCandidates) # should I do this?
         data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
                                                                 num=high)
         data.highLabel.text += filtered
@@ -992,20 +779,12 @@ def _change(self):
 
         data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=high)
-        # args.high_filter, candidates, data.highLabel.text, data.highFiltered.text = get_high_filter(data.c,
-        #                                 words=data.stoplist, items=data.items, counts=data.counts, num=data.summaryHigh.value)
         
-        # TODO dont stoplist yet (do it at the end)
         temp = deepcopy(data.stoplist)
         temp.update(data.highCandidates)
-        # temp.update(data.lowCandidates) # should I do this?
         data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
                                                                 num=high)
         data.highLabel.text += filtered
-        # data.highLabel.text = get_high_filter(data.c, words=data.stoplist, items=data.items, counts=data.counts,
-        #                                                         num=data.summaryHigh.value)
-        # data.highLabel.text += str(data.counter)
-        # data.counter = data.counter + 1
     
     @staticmethod
     def _popupChange(selection):
@@ -1024,7 +803,6 @@ def _popupChange(selection):
                                                                 num=high)
         temp = deepcopy(data.stoplist)
         temp.update(data.highCandidates)
-        # temp.update(data.lowCandidates) # should I do this?
         data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
                                                                 num=high)
         data.highLabel.text += filtered
@@ -1034,13 +812,11 @@ def __init__(self, screen):
         super(LowFreq, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True,
                                         title="Low Frequency Word Filter", reduce_cpu=True)
 
-        # self._data = data
         global data
         
         layout = Layout([100], fill_frame=True)
         self.add_layout(layout)
         layout.add_widget(data.lowLabel)
-        # layout.add_widget(Text("Low Freq Filter (%)", "lowFreq"))
         layout.add_widget(data.low)
         layout.add_widget(data.lowPercent)
         layout2 = Layout([1, 1])
@@ -1064,15 +840,11 @@ def _ok(self):
         data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=low)
 
-        # TODO dont stoplist yet (do it at the end)
         temp = deepcopy(data.stoplist)
-        # temp.update(data.highCandidates)
         temp.update(data.lowCandidates)
         data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
                                                             num=low)
         data.lowLabel.text += filtered
-        # data.lowLabel.text += str(data.counter)
-        # data.counter = data.counter + 1
         updatePreppedLength()
         raise NextScene("Summary")
     
@@ -1093,7 +865,6 @@ def _popup(selection):
         data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=low)
         temp = deepcopy(data.stoplist)
-        # temp.update(data.highCandidates)
         temp.update(data.lowCandidates)
         data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
                                                             num=low)
@@ -1116,15 +887,11 @@ def _change(self):
         data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=low)
 
-        # TODO dont stoplist yet (do it at the end)
         temp = deepcopy(data.stoplist)
-        # temp.update(data.highCandidates)
         temp.update(data.lowCandidates)
         data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
                                                             num=low)
         data.lowLabel.text += filtered
-        # data.lowLabel.text += str(data.counter)
-        # data.counter = data.counter + 1
 
     @staticmethod
     def _popupChange(selection):
@@ -1143,56 +910,13 @@ def _popupChange(selection):
                                                                 num=low)
         temp = deepcopy(data.stoplist)
         temp.update(data.lowCandidates)
-        # temp.update(data.lowCandidates) # should I do this?
         data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
                                                                 num=low)
         data.lowLabel.text += filtered
 
-# class Lang(Frame):
-#     def __init__(self, screen):
-#         super(Lang, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True,
-#                                         title="Lang", reduce_cpu=True)
-
-#         # f = open("prep.txt", "a")
-#         # f.write("Lang init")
-#         layout = Layout([100], fill_frame=True)
-#         self.add_layout(layout)
-#         layout.add_widget(Text(label="Language-specific stopwords: ", name="lang"))
-#         layout.add_widget(Label("hello"))
-#         layout2 = Layout([1, 1, 1])
-#         self.add_layout(layout2)
-#         layout2.add_widget(Button("Ok", self._ok), 0)
-#         self.fix()
-    
-#     # proceeds to scene with chart that displays with current settings
-#     def _ok(self):
-#         self.save()
-#         raise NextScene("Summary")
-
-# class Error(Frame):
-#     def __init__(self, screen):
-#         super(Error, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 //3, hover_focus=True,
-#                                             title="Error", reduce_cpu=True)
-
-#         global data
-        
-#         layout = Layout([100], fill_frame=True)
-#         self.add_layout(layout)
-#         layout.add_widget(data.error)
-#         layout2 = Layout([1])
-#         self.add_layout(layout2)
-#         layout2.add_widget(Button("Ok", self._ok), 0)
-#         self.fix()
-    
-#     def _ok(self):
-#         self.save()
-#         global data
-#         raise NextScene(data.switch)
-
 def test(num, percent, numPair, percentPair, iden, rev):
     defaults = {"high": "30%", "low": "20%"}
     if num.value == "" and percent.value == "":
-        # raise Exception("Please enter a value for either the number of occurrences or percent")
         raise Exception("Apply default of " + str(defaults[iden]) + " for the " + iden + " frequency, don't stop list, or edit value?", ["Yes", "Don't stop list", "Edit value"], True)
     if num.value != "" and percent.value != "":
         raise Exception("Pleae enter a value for only one " + iden + " field", ["Ok"], False)
@@ -1245,7 +969,6 @@ def reset(selection):
 def main(args):
     global data
     data = PrepData()
-    # print("IN MAINNNNNNNNNNNNNNNNN")
 
     config = topicexplorer.config.read(args.config_file)
 
@@ -1297,8 +1020,6 @@ def main(args):
     #     candidates = stop_language(data.c, langs[lang])
     #     if len(candidates):
     #         data.stoplist.update(candidates)
-
-    # DO THIS AUTOMATICALLY, NOT NEED FOR SCENE, MAYBE HAVE SOME SORT OF INFO SCENE TO DISPLAY THIS INFO IN
     
     # Apply custom stopwords file
     if args.stopword_file:
@@ -1310,8 +1031,6 @@ def main(args):
                 print("Applying custom stopword file to remove {} word{}.".format(
                     len(candidates), 's' if len(candidates) > 1 else ''))
                 data.stoplist.update(candidates)
-    
-    # DO THIS AUTOMATICALLY BASED OFF ARGS
 
     if args.min_word_len:
         candidates = get_small_words(data.c, args.min_word_len)
@@ -1320,63 +1039,35 @@ def main(args):
                 len(candidates), 's' if len(candidates) > 1 else '', args.min_word_len))
             data.stoplist.update(candidates)
 
-    # DO THIS AUTOMATICALLY BASED OFF ARGS, NOT THE FIRST IF
-    # TODO TEST USUAL BEHAVIOR
-
     # cache item counts
     data.items, data.counts = get_corpus_counts(data.c)
-    # if args.high_filter is None and args.high_percent is None and not args.quiet:
-    #     args.high_filter, candidates = get_high_filter(c, words=stoplist, items=items, counts=counts)
-    #     if len(candidates):
-    #         print("Filtering {} high frequency word{}.".format(len(candidates),
-    #                                                            's' if len(candidates) > 1 else ''))
-    #         stoplist.update(candidates)
-    # elif args.high_filter is None and args.high_percent is None and args.quiet:
     if args.high_filter is None and args.high_percent is None and args.quiet:
         pass
     elif args.high_filter:
         candidates = get_candidate_words(data.c, args.high_filter, sort=False, items=data.items, counts=data.counts)
         if len(candidates):
-            # print("Filtering {} high frequency word{}.".format(len(candidates),
-            #                                                    's' if len(candidates) > 1 else ''))
             data.highCandidates = candidates
             data.highLabel._value = args.high_filter
-            # data.stoplist.update(candidates)
     elif args.high_percent:
         args.high_filter = get_closest_bin(data.c, 1 - (args.high_percent / 100.), counts=data.counts)
         print(args.high_filter)
         candidates = get_candidate_words(data.c, args.high_filter, sort=False, items=data.items, counts=data.counts)
         if len(candidates):
-            print("Filtering {} high frequency word{}.".format(len(candidates),
-                                                               's' if len(candidates) > 1 else ''))
             data.stoplist.update(candidates)
-
-    # DO THIS AUTOMATICALLY BASE OFF ARGS, NOT THE FIRST IF
-    # TODO TEST USUAL BEHAVIOR
     
-    # if args.low_filter is None and args.low_percent is None and not args.quiet:
-    #     args.low_filter, candidates = get_low_filter(c, words=stoplist, items=items, counts=counts)
-    #     if len(candidates):
-    #         print("Filtering {} low frequency word{}.".format(len(candidates),
-    #                                                           's' if len(candidates) > 1 else ''))
-    #         stoplist.update(candidates)
-    # elif args.low_filter is None and args.low_percent is None and args.quiet:
+
     if args.low_filter is None and args.low_percent is None and args.quiet:
         pass
     elif args.low_filter:
         candidates = get_candidate_words(data.c, -1 * args.low_filter, sort=False, items=data.items, counts=data.counts)
         if len(candidates):
-            # print("Filtering {} low frequency words.".format(len(candidates)))
             data.lowCandidates = candidates
             data.lowLabel._value = args.low_filter
-            # data.stoplist.update(candidates)
     elif args.low_percent:
         args.low_filter = get_closest_bin(data.c, 1 - (args.low_percent / 100.), reverse=True, counts=data.counts)
         print(args.low_filter)
         candidates = get_candidate_words(data.c, -1 * args.low_filter, sort=False, items=data.items, counts=data.counts)
         if len(candidates):
-            print("Filtering {} low frequency word{}.".format(len(candidates),
-                                                               's' if len(candidates) > 1 else ''))
             data.stoplist.update(candidates)
 
     def gui(screen, scene):
@@ -1384,17 +1075,14 @@ def gui(screen, scene):
             Scene([Summary(screen)], -1, name="Summary"),
             Scene([HighFreq(screen)], -1, name="High Freq"),
             Scene([LowFreq(screen)], -1, name="Low Freq")
-            # Scene([Lang(screen)], -1, name="Lang")
         ]
         global data
         data.wholeScreen = screen
         screen.play(scenes, stop_on_resize=True, start_scene=scene)
 
-    # global data
     data.prepSize.text = str("Prepared corpus unique words: " + str(len(data.c)))
 
     last_scene = None
-    # global data = PrepData()
     while True:
         try:
             Screen.wrapper(gui, catch_interrupt=True, arguments=[last_scene])
@@ -1403,19 +1091,9 @@ def gui(screen, scene):
         except ResizeScreenError as e:
             last_scene = e.scene
 
-    # DO THIS WHEN PREPPING MAYBE? THE EXIT PORTION
-    # TODO TEST WHEN THIS HAPPENS, PUT IN SCREEN AFTER PREP
-
-    print("out of the loop")
-
     data.stoplist.update(data.highCandidates)
     data.stoplist.update(data.lowCandidates)
     data.stoplist.update(data.stopCandidates)
-    print(data.highCandidates)
-    # print(data.highFiltered)
-    print(data.lowCandidates)
-    # print(data.lowFiltered)
-    print(data.stopCandidates)
 
     if not data.stoplist:
         print("No stopwords applied.\n\n")
@@ -1428,8 +1106,6 @@ def gui(screen, scene):
         print(len(data.c))
         print("\n")
 
-    # LEAVE THE REST, TILL THE END OF THIS METHOD AS IS
-
     def name_corpus(dirname, languages, lowfreq=None, highfreq=None):
         corpus_name = [dirname]
 

From 4f8b7995ad28c020d5bfc5eb2ca9cd53822578b8 Mon Sep 17 00:00:00 2001
From: Kirtan Sakariya <kirtansakariya@gmail.com>
Date: Mon, 12 Nov 2018 13:30:08 -0500
Subject: [PATCH 04/21] removing booleans meant for highlighting fields

---
 topicexplorer/prep.py | 83 +++++--------------------------------------
 1 file changed, 8 insertions(+), 75 deletions(-)

diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py
index 4afffc8b..07be621a 100644
--- a/topicexplorer/prep.py
+++ b/topicexplorer/prep.py
@@ -417,21 +417,17 @@ class PrepData(Frame):
     def __init__(self):
         self.stoplist = set()
         self.label = Label("change this")
-        self.summaryHigh = Text(label="Number of word frequency:", name="summaryHighFreq", on_change=self.summaryHighNumFocus)
-        self.summaryHighPercent = Text("Percent of words:", "summaryHighPercent", on_change=self.summaryHighPercentFocus)
-        self.summaryHighFocus = False
-        self.high = Text("High frequency word filter (#):", "highFreq", on_change=self.highNumFocus)
-        self.highPercent = Text("High ferquency word filter (%):", "highPercent", on_change=self.highPercentFocus)
+        self.summaryHigh = Text(label="Number of word frequency:", name="summaryHighFreq")
+        self.summaryHighPercent = Text("Percent of words:", "summaryHighPercent")
+        self.high = Text("High frequency word filter (#):", "highFreq")
+        self.highPercent = Text("High ferquency word filter (%):", "highPercent")
         self.highLabel = Label("high label", height=35)
-        self.highFocus = False
         self.highCandidates = []
-        self.summaryLow = Text("Number of word frequency:", "summaryLowFreq", on_change=self.summaryLowNumFocus)
-        self.summaryLowPercent = Text("Percent of words:", "summaryLowPercent", on_change=self.summaryLowPercentFocus)
-        self.summaryLowFocus = False
-        self.low = Text("Low frequency word filter (#):", "lowFreq", on_change=self.lowNumFocus)
-        self.lowPercent = Text("Low frequency word filter (%):", "lowPercent", on_change=self.lowPercentFocus)
+        self.summaryLow = Text("Number of word frequency:", "summaryLowFreq")
+        self.summaryLowPercent = Text("Percent of words:", "summaryLowPercent")
+        self.low = Text("Low frequency word filter (#):", "lowFreq")
+        self.lowPercent = Text("Low frequency word filter (%):", "lowPercent")
         self.lowLabel = Label("low label", height=35)
-        self.lowFocus = False
         self.lowCandidates = []
         self.minWord = Text("Minimum word length: ", "length")
         self.counter = 0
@@ -442,61 +438,6 @@ def __init__(self):
         self.englishCandidates = []
         self.prepSize = Label("need to update length", align="^")
 
-    def summaryHighPercentFocus(self):
-        if self.summaryHighFocus:
-            self.summaryHighFocus = False
-            self.summaryHigh.blur()
-        if self.summaryLowFocus:
-            self.summaryLowFocus = False
-            self.summaryLow.blur()
-            self.summaryLowPercent.blur()
-    
-    def summaryHighNumFocus(self):
-        if self.summaryHighFocus:
-            self.summaryHighFocus = False
-            self.summaryHighPercent.blur()
-        if self.summaryLowFocus:
-            self.summaryLowFocus = False
-            self.summaryLow.blur()
-            self.summaryLowPercent.blur()
-
-    def highPercentFocus(self):
-        if self.highFocus:
-            self.highFocus = False
-            self.high.blur()
-    
-    def highNumFocus(self):
-        if self.highFocus:
-            self.highFocus = False
-            self.highPercent.blur()
-    
-    def summaryLowPercentFocus(self):
-        if self.summaryLowFocus:
-            self.summaryLowFocus = False
-            self.summaryLow.blur()
-        if self.summaryHighFocus:
-            self.summaryHighFocus = False
-            self.summaryHigh.blur()
-            self.summaryHighPercent.blur()
-
-    def summaryLowNumFocus(self):
-        if self.summaryLowFocus:
-            self.summaryLowFocus = False
-            self.summaryLowPercent.blur()
-        if self.summaryHighFocus:
-            self.summaryHigh.blur()
-            self.summaryHighPercent.blur()
-
-    def lowPercentFocus(self):
-        if self.lowFocus:
-            self.lowFocus = False
-            self.low.blur()
-    
-    def lowNumFocus(self):
-        if self.lowFocus:
-            self.lowFocus = False
-            self.highPercent.blur()
-
 class Summary(Frame):
     def __init__(self, screen):
         super(Summary, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True,
@@ -588,7 +529,6 @@ def _prepHigh(selection):
         else:
             data.summaryHighPercent.focus()
             data.summaryHigh.focus()
-            data.summaryHighFocus = True
             confirm()
 
     @staticmethod
@@ -601,7 +541,6 @@ def _prepLow(selection):
         else:
             data.summaryLowPercent.focus()
             data.summaryLow.focus()
-            data.summaryLowFocus = True
             confirm()
 
     def _high(self):
@@ -635,7 +574,6 @@ def _popupHigh(selection):
         else:
             data.summaryHighPercent.focus()
             data.summaryHigh.focus()
-            data.summaryHighFocus = True
             confirm()
             return
         high = test(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False)
@@ -680,7 +618,6 @@ def _popupLow(selection):
         else:
             data.summaryLowPercent.focus()
             data.summaryLow.focus()
-            data.summaryLowFocus = True
             confirm()
             return
         low = test(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True)
@@ -750,7 +687,6 @@ def _popup(selection):
         else:
             data.highPercent.focus()
             data.high.focus()
-            data.highFocus = True
             confirm()
             return
         high = test(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False)
@@ -795,7 +731,6 @@ def _popupChange(selection):
         else:
             data.highPercent.focus()
             data.high.focus()
-            data.highFocus = True
             confirm()
             return
         high = test(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False)
@@ -857,7 +792,6 @@ def _popup(selection):
         else:
             data.lowPercent.focus()
             data.low.focus()
-            data.lowFocus = True
             confirm()
             return
         low = test(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True)
@@ -902,7 +836,6 @@ def _popupChange(selection):
         else:
             data.lowPercent.focus()
             data.low.focus()
-            data.lowFocus = True
             confirm()
             return
         low = test(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True)

From 07c39d38124114c9e18abe55293a7ca8116c18ea Mon Sep 17 00:00:00 2001
From: Kirtan Sakariya <kirtansakariya@gmail.com>
Date: Mon, 12 Nov 2018 13:48:53 -0500
Subject: [PATCH 05/21] added comments

---
 topicexplorer/prep.py | 77 +++++++++++++++++++++++++++++++++----------
 1 file changed, 60 insertions(+), 17 deletions(-)

diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py
index 07be621a..e700f78f 100644
--- a/topicexplorer/prep.py
+++ b/topicexplorer/prep.py
@@ -438,7 +438,9 @@ def __init__(self):
         self.englishCandidates = []
         self.prepSize = Label("need to update length", align="^")
 
+# Initial landing scene
 class Summary(Frame):
+    # Makes the layout of the scene
     def __init__(self, screen):
         super(Summary, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True,
                                         title="Summary", reduce_cpu=True)
@@ -483,27 +485,31 @@ def __init__(self, screen):
         layout2.add_widget(Button("exit", self._exit), 3)
         self.fix()
     
-    # proceeds to scene with chart that displays with current settings
+    # Preps the corpus
     def _prep(self):
         self.save()
         global data
         minNum = 3
+        # Ensure that there is a valid value for one of the high fields
         try:
-            high = test(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False)
+            high = validate(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False)
         except Exception as e:
             self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._prepHigh))
             return
+        # Ensure that there is a valid value for one of the low fields
         try:
-            low = test(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True)
+            low = validate(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True)
         except Exception as e:
             self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._prepLow))
             return
+        # Ensure there is a valid calue for the min word field
         if data.minWord.value != "":
             try:
                 minNum = int(data.minWord.value)
             except Exception as e:
                 self._scene.add_effect(PopUpDialog(self._screen, "Please enter a valid value for Minimum Word Length", ["OK"]))
                 return
+        # Apply English stopwords if the checkbox is selected
         if data.english.value:
             data.englishCandidates = stop_language(data.c, "english")
         data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
@@ -513,12 +519,14 @@ def _prep(self):
         data.stopCandidates = get_small_words(data.c, minNum)
         raise StopApplication("Quitting")
 
+    # Reset highlighting of fields
     @staticmethod
     def _fix(selection):
         global data
         data.summaryHighPercent.blur()
         data.summaryHigh.blur()
 
+    # Handle button clicks for high value popup
     @staticmethod
     def _prepHigh(selection):
         global data
@@ -531,6 +539,7 @@ def _prepHigh(selection):
             data.summaryHigh.focus()
             confirm()
 
+    # Handle button clicks for low value popup
     @staticmethod
     def _prepLow(selection):
         global data
@@ -543,11 +552,13 @@ def _prepLow(selection):
             data.summaryLow.focus()
             confirm()
 
+    # Handle button click of the <high> button on the Summary scene
     def _high(self):
         self.save()
         global data
+        # Determine if one of the high values are valid
         try:
-            high = test(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False)
+            high = validate(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False)
         except Exception as e:
             if e.args[2]:
                 self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popupHigh))
@@ -564,6 +575,7 @@ def _high(self):
         data.highLabel.text += filtered
         raise NextScene("High Freq")
 
+    # Handle button clicks for high popup
     @staticmethod
     def _popupHigh(selection):
         global data
@@ -576,7 +588,7 @@ def _popupHigh(selection):
             data.summaryHigh.focus()
             confirm()
             return
-        high = test(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False)
+        high = validate(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False)
         data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=high)
         temp = deepcopy(data.stoplist)
@@ -586,11 +598,13 @@ def _popupHigh(selection):
         data.highLabel.text += filtered
         raise NextScene("High Freq")
     
+    # Handle button click of the <low> button on the Summary scene
     def _low(self):
         self.save()
         global data
+        # Determine if one of the low values are valid
         try:
-            low = test(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True)
+            low = validate(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True)
         except Exception as e:
             if e.args[2]:
                 self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popupLow))
@@ -608,6 +622,7 @@ def _low(self):
         data.lowLabel.text += filtered
         raise NextScene("Low Freq")
     
+    # Handle button clicks for low popup
     @staticmethod
     def _popupLow(selection):
         global data
@@ -620,7 +635,7 @@ def _popupLow(selection):
             data.summaryLow.focus()
             confirm()
             return
-        low = test(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True)
+        low = validate(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True)
         data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=low)
         temp = deepcopy(data.stoplist)
@@ -630,13 +645,15 @@ def _popupLow(selection):
         data.lowLabel.text += filtered
         raise NextScene("Low Freq")
 
-    # exits without prepping
+    # Exits without prepping
     @staticmethod
     def _exit():
         sys.exit(0)
         raise StopApplication("Quitting")
 
+# High frequency scene
 class HighFreq(Frame):
+    # Loads in the scene layout
     def __init__(self, screen):
         super(HighFreq, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True,
                                         title="High Frequency Word Filter", reduce_cpu=True)
@@ -654,12 +671,14 @@ def __init__(self, screen):
         layout2.add_widget(Button("Update", self._change), 1)
         self.fix()
 
+    # Handle button click of Ok
     def _ok(self):
         self.save()
         global data
 
+        # Determines if one of the high values are valid
         try:
-            high = test(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False)
+            high = validate(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False)
         except Exception as e:
             if e.args[2]:
                 self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popup))
@@ -678,8 +697,10 @@ def _ok(self):
         updatePreppedLength()
         raise NextScene("Summary")
 
+    # Handle button clicks for high popup
     @staticmethod
     def _popup(selection):
+        # Handle the selections
         if str(selection) == "0":
             data.highPercent._value = "30.0"
         elif str(selection) == "1":
@@ -689,7 +710,7 @@ def _popup(selection):
             data.high.focus()
             confirm()
             return
-        high = test(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False)
+        high = validate(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False)
         data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=high)
         temp = deepcopy(data.stoplist)
@@ -700,12 +721,14 @@ def _popup(selection):
 
         raise NextScene("Summary")
     
+    # Handle button click for Update
     def _change(self):
         self.save()
         global data
 
+        # Determine if one of the high values are valid
         try:
-            high = test(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False)
+            high = validate(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False)
         except Exception as e:
             if e.args[2]:
                 self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popupChange))
@@ -722,8 +745,10 @@ def _change(self):
                                                                 num=high)
         data.highLabel.text += filtered
     
+    # Handle button click for popup after clicking change
     @staticmethod
     def _popupChange(selection):
+        # Handle the selections
         if str(selection) == "0":
             data.highPercent._value = "30.0"
         elif str(selection) == "1":
@@ -733,7 +758,7 @@ def _popupChange(selection):
             data.high.focus()
             confirm()
             return
-        high = test(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False)
+        high = validate(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False)
         data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=high)
         temp = deepcopy(data.stoplist)
@@ -742,7 +767,9 @@ def _popupChange(selection):
                                                                 num=high)
         data.highLabel.text += filtered
 
+# Low frequency scene
 class LowFreq(Frame):
+    # Loads in the scene layout
     def __init__(self, screen):
         super(LowFreq, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True,
                                         title="Low Frequency Word Filter", reduce_cpu=True)
@@ -760,11 +787,14 @@ def __init__(self, screen):
         layout2.add_widget(Button("Update", self._change), 1)
         self.fix()
 
+    # Handle button click of Ok
     def _ok(self):
         self.save()
         global data
+        
+        # Determines if one of the low values are valid
         try:
-            low = test(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True)
+            low = validate(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True)
         except Exception as e:
             if e.args[2]:
                 self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popup))
@@ -783,8 +813,10 @@ def _ok(self):
         updatePreppedLength()
         raise NextScene("Summary")
     
+    # Handle button clicks for low popup
     @staticmethod
     def _popup(selection):
+        # Handle the selections
         if str(selection) == "0":
             data.lowPercent._value = "20.0"
         elif str(selection) == "1":
@@ -794,7 +826,7 @@ def _popup(selection):
             data.low.focus()
             confirm()
             return
-        low = test(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True)
+        low = validate(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True)
 
         data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=low)
@@ -806,11 +838,14 @@ def _popup(selection):
         
         raise NextScene("Summary")
     
+    # Handle button click for Update
     def _change(self):
         self.save()
         global data
+        
+        # Determine if one of the low values are valid
         try:
-            low = test(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True)
+            low = validate(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True)
         except Exception as e:
             if e.args[2]:
                 self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1], on_close=self._popupChange))
@@ -827,8 +862,11 @@ def _change(self):
                                                             num=low)
         data.lowLabel.text += filtered
 
+    
+    # Handle button click for popup after clicking change
     @staticmethod
     def _popupChange(selection):
+        # Handle the selections
         if str(selection) == "0":
             data.lowPercent._value = "20.0"
         elif str(selection) == "1":
@@ -838,7 +876,7 @@ def _popupChange(selection):
             data.low.focus()
             confirm()
             return
-        low = test(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True)
+        low = validate(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True)
         data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=low)
         temp = deepcopy(data.stoplist)
@@ -847,7 +885,8 @@ def _popupChange(selection):
                                                                 num=low)
         data.lowLabel.text += filtered
 
-def test(num, percent, numPair, percentPair, iden, rev):
+# Determin if the values for the num and percent fields are valid
+def validate(num, percent, numPair, percentPair, iden, rev):
     defaults = {"high": "30%", "low": "20%"}
     if num.value == "" and percent.value == "":
         raise Exception("Apply default of " + str(defaults[iden]) + " for the " + iden + " frequency, don't stop list, or edit value?", ["Yes", "Don't stop list", "Edit value"], True)
@@ -870,6 +909,8 @@ def test(num, percent, numPair, percentPair, iden, rev):
         raise Exception(msg, ["Ok"], False)
     return ret
 
+# Update the prepped length by storing c and stoplist in temp varibles,
+# then updaing the originals, and then restoring the originals
 def updatePreppedLength():
     global data
     temp = deepcopy(data.stoplist)
@@ -882,12 +923,14 @@ def updatePreppedLength():
     tempC.in_place_stoplist(temp)
     data.prepSize.text = str("Prepared corpus unique words: " + str(len(tempC)))
 
+# Highlight the necessary fields
 def confirm():
     global data
     tempScreen = data.wholeScreen.current_scene._effects[0]._screen
     tempScene = data.wholeScreen.current_scene
     tempScene.add_effect(PopUpDialog(tempScreen, "Please input a value in one of the highlighted fields", ["OK"], on_close=reset))
 
+# Reset all highlighted fields
 def reset(selection):
     global data
     data.summaryHigh.blur()

From 47891977472fff037ab626af7c240136148ac8f7 Mon Sep 17 00:00:00 2001
From: Kirtan Sakariya <kirtansakariya@gmail.com>
Date: Mon, 12 Nov 2018 13:51:57 -0500
Subject: [PATCH 06/21] removed most unused variables

---
 topicexplorer/prep.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py
index e700f78f..ac5dc809 100644
--- a/topicexplorer/prep.py
+++ b/topicexplorer/prep.py
@@ -291,8 +291,6 @@ def get_closest_bin(c, thresh, reverse=False, counts=None):
 
 def get_high_filter_chart(c, words=None, items=None, counts=None, num=None):
     import numpy as np
-    header = "FILTER HIGH FREQUENCY WORDS"
-    stars = old_div((80 - len(header) - 2), 2)
 
     # Get frequency bins
     if items is None or counts is None:
@@ -304,7 +302,6 @@ def get_high_filter_chart(c, words=None, items=None, counts=None, num=None):
 
     ret = ""
 
-    high_filter = False
     bin_counts, bins = np.histogram(counts, bins=bins)
     ret += "{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Top', '% of corpus', "# words", "Rate") + "\n"
     last_row = 0
@@ -327,7 +324,6 @@ def get_high_filter_chart(c, words=None, items=None, counts=None, num=None):
 def get_high_filter_stops(c, words=None, items=None, counts=None, num=None):
     import numpy as np
     input_filter = num
-    accept = None
     try:
         candidates = get_candidate_words(c, input_filter, words=words, items=items, counts=counts)
         places = np.in1d(c.words, candidates)
@@ -351,8 +347,6 @@ def get_high_filter_stops(c, words=None, items=None, counts=None, num=None):
 
 def get_low_filter_chart(c, words=None, items=None, counts=None, num=None):
     import numpy as np
-    header = "FILTER LOW FREQUENCY WORDS"
-    stars = old_div((80 - len(header) - 2), 2)
 
     # Get frequency bins
     if items is None or counts is None:
@@ -364,7 +358,6 @@ def get_low_filter_chart(c, words=None, items=None, counts=None, num=None):
 
     ret = ""
 
-    low_filter = False
     bin_counts, bins = np.histogram(counts[counts.argsort()[::-1]], bins=bins)
     ret += "{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Bottom', '% of corpus', "# words", "Rate") + "\n"
     last_row = 0
@@ -388,7 +381,6 @@ def get_low_filter_chart(c, words=None, items=None, counts=None, num=None):
 def get_low_filter_stops(c, words=None, items=None, counts=None, num=None):
     import numpy as np
     input_filter = num
-    accept = None
     try:
 
         candidates = get_candidate_words(c, -input_filter, words=words, items=items, counts=counts)

From 9299811f097c4dfff698dd787fbe4c8a93c17fc4 Mon Sep 17 00:00:00 2001
From: Kirtan Sakariya <kirtansakariya@gmail.com>
Date: Wed, 14 Nov 2018 17:30:53 -0500
Subject: [PATCH 07/21] making text fields on summary smaller

---
 topicexplorer/prep.py | 53 ++++++++++++++++++++++++++++++-------------
 1 file changed, 37 insertions(+), 16 deletions(-)

diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py
index ac5dc809..1d371932 100644
--- a/topicexplorer/prep.py
+++ b/topicexplorer/prep.py
@@ -409,14 +409,22 @@ class PrepData(Frame):
     def __init__(self):
         self.stoplist = set()
         self.label = Label("change this")
-        self.summaryHigh = Text(label="Number of word frequency:", name="summaryHighFreq")
-        self.summaryHighPercent = Text("Percent of words:", "summaryHighPercent")
+        # self.summaryHigh = Text(label="Words:", name="summaryHighFreq")
+        self.summaryHighText = Label("Words:", align=">")
+        self.summaryHigh = Text(label="")
+        # self.summaryHighPercent = Text("Percent of words:", "summaryHighPercent")
+        self.summaryHighPercentText = Label("Percent:", align=">")
+        self.summaryHighPercent = Text(label="")
         self.high = Text("High frequency word filter (#):", "highFreq")
         self.highPercent = Text("High ferquency word filter (%):", "highPercent")
         self.highLabel = Label("high label", height=35)
         self.highCandidates = []
-        self.summaryLow = Text("Number of word frequency:", "summaryLowFreq")
-        self.summaryLowPercent = Text("Percent of words:", "summaryLowPercent")
+        # self.summaryLow = Text("Number of word frequency:", "summaryLowFreq")
+        self.summaryLowText = Label("Words:", align=">")
+        self.summaryLow = Text(label="")
+        # self.summaryLowPercent = Text("Percent of words:", "summaryLowPercent")
+        self.summaryLowPercentText = Label("Percent:", align=">")
+        self.summaryLowPercent = Text(label="")
         self.low = Text("Low frequency word filter (#):", "lowFreq")
         self.lowPercent = Text("Low frequency word filter (%):", "lowPercent")
         self.lowLabel = Label("low label", height=35)
@@ -440,35 +448,48 @@ def __init__(self, screen):
         global data
 
         highTitle = Layout([100])
-        highOptions = Layout([1, 1])
         self.add_layout(highTitle)
-        self.add_layout(highOptions)
-        lowTitle = Layout([100])
-        lowOptions = Layout([1, 1])
-        self.add_layout(lowTitle)
-        self.add_layout(lowOptions)
-        layout = Layout([100], fill_frame=True)
-        self.add_layout(layout)
-
         highTitle.add_widget(Divider(height=1, line_char=" "))
         highTitle.add_widget(Label("High Frequency Word Filter", align="^"))
-        highOptions.add_widget(data.summaryHigh, 0)
+
+        highOptions = Layout([10, 1, 9])
+        self.add_layout(highOptions)
+        highOptions.add_widget(data.summaryHighText, 0)
+        highOptions.add_widget(data.summaryHigh, 1)
+        highOptions.add_widget(Label(""), 2)
+        highOptions.add_widget(data.summaryHighPercentText, 0)
         highOptions.add_widget(data.summaryHighPercent, 1)
+        highOptions.add_widget(Label(""), 2)
         highOptions.add_widget(Divider(height=1, line_char="-"), 0)
         highOptions.add_widget(Divider(height=1, line_char="-"), 1)
-
+        highOptions.add_widget(Divider(height=1, line_char="-"), 2)
+        
+        lowTitle = Layout([100])
+        self.add_layout(lowTitle)
         lowTitle.add_widget(Label("Low Frequency Word Filter", align="^"))
-        lowOptions.add_widget(data.summaryLow, 0)
+
+        lowOptions = Layout([10, 1, 9])
+        self.add_layout(lowOptions)
+        lowOptions.add_widget(data.summaryLowText, 0)
+        lowOptions.add_widget(data.summaryLow, 1)
+        lowOptions.add_widget(Label(""), 2)
+        lowOptions.add_widget(data.summaryLowPercentText, 0)
         lowOptions.add_widget(data.summaryLowPercent, 1)
+        lowOptions.add_widget(Label(""), 2)
         lowOptions.add_widget(Divider(height=1, line_char="-"), 0)
         lowOptions.add_widget(Divider(height=1, line_char="-"), 1)
+        lowOptions.add_widget(Divider(height=1, line_char="-"), 2)
         lowOptions.add_widget(Divider(height=1, line_char=" "), 0)
         lowOptions.add_widget(Divider(height=1, line_char=" "), 1)
+        lowOptions.add_widget(Divider(height=1, line_char=" "), 2)
 
+        layout = Layout([100], fill_frame=True)
+        self.add_layout(layout)
         layout.add_widget(data.english)
         layout.add_widget(data.minWord)
         layout.add_widget(Label("Original corpus unique words: " + str(data.c.original_length), align="^"))
         layout.add_widget(data.prepSize)
+
         layout2 = Layout([1, 1, 1, 1])
         self.add_layout(layout2)
         layout2.add_widget(Button("prep", self._prep), 0)

From 0cc848d3a9cb2741ac27f0eaeff6c04836d54739 Mon Sep 17 00:00:00 2001
From: Kirtan Sakariya <kirtansakariya@gmail.com>
Date: Tue, 20 Nov 2018 21:26:05 -0500
Subject: [PATCH 08/21] need to handle file selection in file browser

---
 topicexplorer/prep.py | 202 +++++++++++++++++++++++++++++++-----------
 1 file changed, 149 insertions(+), 53 deletions(-)

diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py
index 1d371932..0372fb0d 100644
--- a/topicexplorer/prep.py
+++ b/topicexplorer/prep.py
@@ -131,7 +131,7 @@
 from topicexplorer.lib.util import isint, is_valid_configfile, bool_prompt
 
 from asciimatics.widgets import Frame, ListBox, Layout, Divider, Text, \
-    Button, TextBox, Widget, Label, PopUpDialog, PopupMenu, CheckBox
+    Button, TextBox, Widget, Label, PopUpDialog, PopupMenu, CheckBox, FileBrowser, KeyboardEvent
 from asciimatics.scene import Scene
 from asciimatics.screen import Screen
 from asciimatics.exceptions import ResizeScreenError, NextScene, StopApplication
@@ -409,27 +409,27 @@ class PrepData(Frame):
     def __init__(self):
         self.stoplist = set()
         self.label = Label("change this")
-        # self.summaryHigh = Text(label="Words:", name="summaryHighFreq")
-        self.summaryHighText = Label("Words:", align=">")
-        self.summaryHigh = Text(label="")
-        # self.summaryHighPercent = Text("Percent of words:", "summaryHighPercent")
-        self.summaryHighPercentText = Label("Percent:", align=">")
-        self.summaryHighPercent = Text(label="")
-        self.high = Text("High frequency word filter (#):", "highFreq")
-        self.highPercent = Text("High ferquency word filter (%):", "highPercent")
+        self.summaryHigh = Text(label="  Words:", name="summaryHighFreq", max_length=5)
+        # self.summaryHighText = Label("Words:", align=">")
+        # self.summaryHigh = Text(label="")
+        self.summaryHighPercent = Text("Percent:", "summaryHighPercent", max_length=5)
+        # self.summaryHighPercentText = Label("Percent:", align=">")
+        # self.summaryHighPercent = Text(label="")
+        self.high = Text("High frequency word filter (#):", "highFreq", max_length=5)
+        self.highPercent = Text("High ferquency word filter (%):", "highPercent", max_length=5)
         self.highLabel = Label("high label", height=35)
         self.highCandidates = []
-        # self.summaryLow = Text("Number of word frequency:", "summaryLowFreq")
-        self.summaryLowText = Label("Words:", align=">")
-        self.summaryLow = Text(label="")
-        # self.summaryLowPercent = Text("Percent of words:", "summaryLowPercent")
-        self.summaryLowPercentText = Label("Percent:", align=">")
-        self.summaryLowPercent = Text(label="")
-        self.low = Text("Low frequency word filter (#):", "lowFreq")
-        self.lowPercent = Text("Low frequency word filter (%):", "lowPercent")
+        self.summaryLow = Text("  Words:", "summaryLowFreq", max_length=5)
+        # self.summaryLowText = Label("Words:", align=">")
+        # self.summaryLow = Text(label="")
+        self.summaryLowPercent = Text("Percent:", "summaryLowPercent", max_length=5)
+        # self.summaryLowPercentText = Label("Percent:", align=">")
+        # self.summaryLowPercent = Text(label="")
+        self.low = Text("Low frequency word filter (#):", "lowFreq", max_length=5)
+        self.lowPercent = Text("Low frequency word filter (%):", "lowPercent", max_length=5)
         self.lowLabel = Label("low label", height=35)
         self.lowCandidates = []
-        self.minWord = Text("Minimum word length: ", "length")
+        self.minWord = Text("Minimum word length:", "length", max_length=5)
         self.counter = 0
         self.error = Label("Error message")
         self.switch = 0
@@ -437,65 +437,91 @@ def __init__(self):
         self.english = CheckBox("Yes", label="Apply English stopwords")
         self.englishCandidates = []
         self.prepSize = Label("need to update length", align="^")
+        self.stopwordFile = Label("Current stopworded file: <None>", align="^")
 
 # Initial landing scene
 class Summary(Frame):
     # Makes the layout of the scene
     def __init__(self, screen):
-        super(Summary, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True,
+        super(Summary, self).__init__(screen, screen.height, screen.width, hover_focus=True,
                                         title="Summary", reduce_cpu=True)
 
         global data
 
+        # super().set_theme("green")
+
         highTitle = Layout([100])
         self.add_layout(highTitle)
         highTitle.add_widget(Divider(height=1, line_char=" "))
-        highTitle.add_widget(Label("High Frequency Word Filter", align="^"))
+        highTitle.add_widget(Label("High Frequency Word Filter\n--------------------------", align="^", height=2))
 
-        highOptions = Layout([10, 1, 9])
+        highOptions = Layout([7, 2, 6])
         self.add_layout(highOptions)
-        highOptions.add_widget(data.summaryHighText, 0)
+        # highOptions.add_widget(data.summaryHighText, 0)
+        # highOptions.add_widget(data.summaryHigh, 1)
         highOptions.add_widget(data.summaryHigh, 1)
-        highOptions.add_widget(Label(""), 2)
-        highOptions.add_widget(data.summaryHighPercentText, 0)
+        # highOptions.add_widget(Label(""), 2)
+        # highOptions.add_widget(data.summaryHighPercentText, 0)
+        # highOptions.add_widget(data.summaryHighPercent, 1)
         highOptions.add_widget(data.summaryHighPercent, 1)
-        highOptions.add_widget(Label(""), 2)
-        highOptions.add_widget(Divider(height=1, line_char="-"), 0)
-        highOptions.add_widget(Divider(height=1, line_char="-"), 1)
-        highOptions.add_widget(Divider(height=1, line_char="-"), 2)
+        # highOptions.add_widget(Label(""), 2)
+
+        highButton = Layout([1])
+        self.add_layout(highButton)
+        highButton.add_widget(Divider(height=1, line_char=" "), 0)
+        highButton.add_widget(Button("High frequency wizard", self._high), 0)
+        highButton.add_widget(Divider(height=2, line_char="-"), 0)
+        highButton.add_widget(Divider(height=1, line_char=" "), 0)
         
         lowTitle = Layout([100])
         self.add_layout(lowTitle)
-        lowTitle.add_widget(Label("Low Frequency Word Filter", align="^"))
+        lowTitle.add_widget(Label("Low Frequency Word Filter\n-------------------------", align="^", height=2))
 
-        lowOptions = Layout([10, 1, 9])
+        lowOptions = Layout([7, 2, 6])
         self.add_layout(lowOptions)
-        lowOptions.add_widget(data.summaryLowText, 0)
+        # lowOptions.add_widget(data.summaryLowText, 0)
+        # lowOptions.add_widget(data.summaryLow, 1)
         lowOptions.add_widget(data.summaryLow, 1)
-        lowOptions.add_widget(Label(""), 2)
-        lowOptions.add_widget(data.summaryLowPercentText, 0)
+        # lowOptions.add_widget(Label(""), 2)
+        # lowOptions.add_widget(data.summaryLowPercentText, 0)
+        # lowOptions.add_widget(data.summaryLowPercent, 1)
         lowOptions.add_widget(data.summaryLowPercent, 1)
-        lowOptions.add_widget(Label(""), 2)
-        lowOptions.add_widget(Divider(height=1, line_char="-"), 0)
-        lowOptions.add_widget(Divider(height=1, line_char="-"), 1)
-        lowOptions.add_widget(Divider(height=1, line_char="-"), 2)
-        lowOptions.add_widget(Divider(height=1, line_char=" "), 0)
-        lowOptions.add_widget(Divider(height=1, line_char=" "), 1)
-        lowOptions.add_widget(Divider(height=1, line_char=" "), 2)
-
-        layout = Layout([100], fill_frame=True)
-        self.add_layout(layout)
-        layout.add_widget(data.english)
-        layout.add_widget(data.minWord)
-        layout.add_widget(Label("Original corpus unique words: " + str(data.c.original_length), align="^"))
-        layout.add_widget(data.prepSize)
+        # lowOptions.add_widget(Label(""), 2)
+
+        lowButton = Layout([1])
+        self.add_layout(lowButton)
+        lowButton.add_widget(Divider(height=1, line_char=" "), 0)
+        lowButton.add_widget(Button("Low frequency wizard", self._low), 0)
+        lowButton.add_widget(Divider(height=2, line_char="-"), 0)
+        lowButton.add_widget(Divider(height=1, line_char=" "), 0)
+
+        stopwordHeader = Layout([1])
+        self.add_layout(stopwordHeader)
+        stopwordHeader.add_widget(Label("Stopwords\n---------", align="^", height=2), 0)
+
+        stopwords = Layout([8, 6, 4])
+        self.add_layout(stopwords)
+        stopwords.add_widget(data.english, 1)
+        
+        stopMinWords = Layout([8, 5, 5])
+        self.add_layout(stopMinWords)
+        stopMinWords.add_widget(data.minWord, 1)
+
+        stopwordFileLayout = Layout([1])
+        self.add_layout(stopwordFileLayout)
+        stopwordFileLayout.add_widget(data.stopwordFile, 0)
+        stopwordFileLayout.add_widget(Divider(height=1, line_char=" "), 0)
+        stopwordFileLayout.add_widget(Button("Select new file", self._chooseFile), 0)
+        stopwordFileLayout.add_widget(Divider(height=2, line_char=" "), 0)
+        # layout.add_widget(Label("Original corpus unique words: " + str(data.c.original_length), align="^"))
+        # layout.add_widget(data.prepSize)
 
-        layout2 = Layout([1, 1, 1, 1])
+        layout2 = Layout([1, 1])
         self.add_layout(layout2)
         layout2.add_widget(Button("prep", self._prep), 0)
-        layout2.add_widget(Button("high", self._high), 1)
-        layout2.add_widget(Button("low", self._low), 2)
-        layout2.add_widget(Button("exit", self._exit), 3)
+        # layout2.add_widget(Button("high", self._high), 1)
+        # layout2.add_widget(Button("low", self._low), 2)
+        layout2.add_widget(Button("exit", self._exit), 1)
         self.fix()
     
     # Preps the corpus
@@ -658,6 +684,9 @@ def _popupLow(selection):
         data.lowLabel.text += filtered
         raise NextScene("Low Freq")
 
+    def _chooseFile(self):
+        raise NextScene("File Browser")
+
     # Exits without prepping
     @staticmethod
     def _exit():
@@ -898,7 +927,73 @@ def _popupChange(selection):
                                                                 num=low)
         data.lowLabel.text += filtered
 
-# Determin if the values for the num and percent fields are valid
+# Taken from: https://github.com/peterbrittain/asciimatics/blob/master/samples/treeview.py
+class Files(Frame):
+    def __init__(self, screen):
+        super(Files, self).__init__(
+            screen, screen.height, screen.width, has_border=False)
+
+        # Create the (very simple) form layout...
+        layout = Layout([1], fill_frame=True)
+        self.add_layout(layout)
+
+        # Now populate it with the widgets we want to use.
+        self._details = Text()
+        self._details.disabled = True
+        self._details.custom_colour = "field"
+        regex = "((?:\w+)(?:.)?(?:txt))|(\w+)$"
+        self._list = FileBrowser(Widget.FILL_FRAME,
+                                 os.path.abspath("."),
+                                 name="mc_list",
+                                 on_select=self.popup,
+                                 on_change=self.details,
+                                 file_filter=regex)
+        layout.add_widget(Label("Local disk browser sample"))
+        layout.add_widget(Divider())
+        layout.add_widget(self._list)
+        layout.add_widget(Divider())
+        layout.add_widget(self._details)
+        layout.add_widget(Label("Press Enter to select or `q` to quit."))
+
+        # Prepare the Frame for use.
+        self.fix()
+
+    def popup(self):
+        # Just confirm whenever the user actually selects something.
+        if not self._list.value.endswith(".txt") and "." in self._list.value:
+            self._scene.add_effect(PopUpDialog(self._screen, "Please pick a valid file (a .txt file or a file with no extension)", ["OK"]))
+        else:
+            data.stopwordFile.text = "Current stopworded file: " + self._list.value
+            raise NextScene("Summary")
+            # self._scene.add_effect(PopUpDialog(self._screen, "You selected: {}".format(self._list.value), ["OK"]))
+
+    def details(self):
+        # If python magic is installed, provide a little more detail of the current file.
+        if self._list.value:
+            if os.path.isdir(self._list.value):
+                self._details.value = "Directory"
+            elif os.path.isfile(self._list.value):
+                try:
+                    self._details.value = magic.from_file(self._list.value)
+                except NameError:
+                    self._details.value = "File (run 'pip install python-magic' for more details)"
+        else:
+            self._details.value = "--"
+
+    def process_event(self, event):
+        # Do the key handling for this Frame.
+        global data
+        if isinstance(event, KeyboardEvent):
+            if event.key_code in [ord('q'), ord('Q'), Screen.ctrl("c")]:
+                raise NextScene("Summary")
+            elif event.key_code in [ord('c'), ord('C')]:
+                data.stopwordFile.text = "hello"
+                raise NextScene("Summary")
+
+        # Now pass on to lower levels for normal handling of the event.
+        return super(Files, self).process_event(event)
+
+# Determine if the values for the num and percent fields are valid
 def validate(num, percent, numPair, percentPair, iden, rev):
     defaults = {"high": "30%", "low": "20%"}
     if num.value == "" and percent.value == "":
@@ -1063,7 +1158,8 @@ def gui(screen, scene):
         scenes = [
             Scene([Summary(screen)], -1, name="Summary"),
             Scene([HighFreq(screen)], -1, name="High Freq"),
-            Scene([LowFreq(screen)], -1, name="Low Freq")
+            Scene([LowFreq(screen)], -1, name="Low Freq"),
+            Scene([Files(screen)], -1, name="File Browser")
         ]
         global data
         data.wholeScreen = screen

From fe80ac13bca133ebd834489d52da9bf13bbf2523 Mon Sep 17 00:00:00 2001
From: Kirtan Sakariya <kirtansakariya@gmail.com>
Date: Tue, 27 Nov 2018 15:16:18 -0500
Subject: [PATCH 09/21] handling invalid values for percentages

---
 topicexplorer/prep.py | 150 +++++++++++++++++++++++++++++++-----------
 1 file changed, 113 insertions(+), 37 deletions(-)

diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py
index 0372fb0d..bcb6dedb 100644
--- a/topicexplorer/prep.py
+++ b/topicexplorer/prep.py
@@ -324,6 +324,7 @@ def get_high_filter_chart(c, words=None, items=None, counts=None, num=None):
 def get_high_filter_stops(c, words=None, items=None, counts=None, num=None):
     import numpy as np
     input_filter = num
+    valid = True
     try:
         candidates = get_candidate_words(c, input_filter, words=words, items=items, counts=counts)
         places = np.in1d(c.words, candidates)
@@ -337,12 +338,13 @@ def get_high_filter_stops(c, words=None, items=None, counts=None, num=None):
         filtered += u' '.join(candidates)
 
         if len(candidates) == len(c.words):
-            filtered += "\n\nChoice of" + str(input_filter) + "will remove ALL words from the corpus."
-            filtered += "Please choose a different filter."
+            valid = False
+            # filtered += "\n\nChoice of" + str(input_filter) + "will remove ALL words from the corpus."
+            # filtered += "Please choose a different filter."
 
     except ValueError:
         input_filter = 0
-    return (candidates, filtered)
+    return (candidates, filtered, valid)
 
 
 def get_low_filter_chart(c, words=None, items=None, counts=None, num=None):
@@ -381,8 +383,8 @@ def get_low_filter_chart(c, words=None, items=None, counts=None, num=None):
 def get_low_filter_stops(c, words=None, items=None, counts=None, num=None):
     import numpy as np
     input_filter = num
+    valid = True
     try:
-
         candidates = get_candidate_words(c, -input_filter, words=words, items=items, counts=counts)
         places = np.in1d(c.words, candidates)
         places = dict(zip(candidates, np.where(places)[0]))
@@ -396,13 +398,14 @@ def get_low_filter_stops(c, words=None, items=None, counts=None, num=None):
 
 
         if len(candidates) == len(c.words):
-            filtered += "\n\nChoice of" + str(input_filter) + "will remove ALL words from the corpus."
-            filtered += "Please choose a different filter."
+            valid = False
+            # filtered += "\n\nChoice of" + str(input_filter) + "will remove ALL words from the corpus."
+            # filtered += "Please choose a different filter."
 
     except ValueError:
         input_filter = 0
 
-    return (candidates, filtered)
+    return (candidates, filtered, valid)
 
 # Stores all of the variables for the labels
 class PrepData(Frame):
@@ -417,7 +420,7 @@ def __init__(self):
         # self.summaryHighPercent = Text(label="")
         self.high = Text("High frequency word filter (#):", "highFreq", max_length=5)
         self.highPercent = Text("High ferquency word filter (%):", "highPercent", max_length=5)
-        self.highLabel = Label("high label", height=35)
+        self.highLabel = Label("high label", height=58)
         self.highCandidates = []
         self.summaryLow = Text("  Words:", "summaryLowFreq", max_length=5)
         # self.summaryLowText = Label("Words:", align=">")
@@ -427,7 +430,7 @@ def __init__(self):
         # self.summaryLowPercent = Text(label="")
         self.low = Text("Low frequency word filter (#):", "lowFreq", max_length=5)
         self.lowPercent = Text("Low frequency word filter (%):", "lowPercent", max_length=5)
-        self.lowLabel = Label("low label", height=35)
+        self.lowLabel = Label("low label", height=58)
         self.lowCandidates = []
         self.minWord = Text("Minimum word length:", "length", max_length=5)
         self.counter = 0
@@ -437,7 +440,9 @@ def __init__(self):
         self.english = CheckBox("Yes", label="Apply English stopwords")
         self.englishCandidates = []
         self.prepSize = Label("need to update length", align="^")
+        self.fileName = "<None>"
         self.stopwordFile = Label("Current stopworded file: <None>", align="^")
+        self.fileCandidates = []
 
 # Initial landing scene
 class Summary(Frame):
@@ -512,16 +517,26 @@ def __init__(self, screen):
         stopwordFileLayout.add_widget(data.stopwordFile, 0)
         stopwordFileLayout.add_widget(Divider(height=1, line_char=" "), 0)
         stopwordFileLayout.add_widget(Button("Select new file", self._chooseFile), 0)
-        stopwordFileLayout.add_widget(Divider(height=2, line_char=" "), 0)
+        stopwordFileLayout.add_widget(Divider(height=2, line_char="-"), 0)
+        stopwordFileLayout.add_widget(Divider(height=1, line_char=" "), 0)
         # layout.add_widget(Label("Original corpus unique words: " + str(data.c.original_length), align="^"))
         # layout.add_widget(data.prepSize)
 
+        corpusLenLayout = Layout([1])
+        self.add_layout(corpusLenLayout)
+        corpusLenLayout.add_widget(Label("Corpus Length\n-------------", align="^", height=2), 0)
+        corpusLenLayout.add_widget(Label("Original corpus unique works: " + str(data.c.original_length), align="^"))
+        corpusLenLayout.add_widget(data.prepSize)
+        corpusLenLayout.add_widget(Divider(height=1, line_char=" "), 0)
+
         layout2 = Layout([1, 1])
         self.add_layout(layout2)
         layout2.add_widget(Button("prep", self._prep), 0)
+        # layout2.add_widget(Divider(height=1, line_char="-"), 4)
         # layout2.add_widget(Button("high", self._high), 1)
         # layout2.add_widget(Button("low", self._low), 2)
         layout2.add_widget(Button("exit", self._exit), 1)
+        # layout2.add_widget(Divider(height=1, line_char="-"), 5)
         self.fix()
     
     # Preps the corpus
@@ -551,10 +566,28 @@ def _prep(self):
         # Apply English stopwords if the checkbox is selected
         if data.english.value:
             data.englishCandidates = stop_language(data.c, "english")
-        data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        else:
+            data.englishCandidates = []
+        # Get the stopwords from a file
+        if data.fileName != "<None>":
+            with open(data.fileName, encoding='utf8') as swf:
+                    data.fileCandidates = [word.strip() for word in swf]
+
+                    if len(data.fileCandidates):
+                        print("Applying custom stopword file to remove {} word{}.".format(
+                            len(data.fileCandidates), 's' if len(data.fileCandidates) > 1 else ''))
+        else:
+            data.fileCandidates = []
+        data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=high)
-        data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        if not valid:
+            self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"]))
+            return
+        data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=low)
+        if not valid:
+            self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"]))
+            return
         data.stopCandidates = get_small_words(data.c, minNum)
         raise StopApplication("Quitting")
 
@@ -595,6 +628,7 @@ def _prepLow(selection):
     def _high(self):
         self.save()
         global data
+        
         # Determine if one of the high values are valid
         try:
             high = validate(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False)
@@ -604,8 +638,12 @@ def _high(self):
             else:
                 self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1]))
             return
-        data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        
+        data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=high)
+        if not valid:
+            self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"]))
+            return
         
         temp = deepcopy(data.stoplist)
         temp.update(data.highCandidates)
@@ -628,7 +666,7 @@ def _popupHigh(selection):
             confirm()
             return
         high = validate(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False)
-        data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=high)
         temp = deepcopy(data.stoplist)
         temp.update(data.highCandidates)
@@ -641,6 +679,7 @@ def _popupHigh(selection):
     def _low(self):
         self.save()
         global data
+        
         # Determine if one of the low values are valid
         try:
             low = validate(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True)
@@ -651,8 +690,11 @@ def _low(self):
                 self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1]))
             return
             
-        data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=low)
+        if not valid:
+            self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"]))
+            return
 
         temp = deepcopy(data.stoplist)
         temp.update(data.lowCandidates)
@@ -675,7 +717,7 @@ def _popupLow(selection):
             confirm()
             return
         low = validate(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True)
-        data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=low)
         temp = deepcopy(data.stoplist)
         temp.update(data.lowCandidates)
@@ -697,7 +739,7 @@ def _exit():
 class HighFreq(Frame):
     # Loads in the scene layout
     def __init__(self, screen):
-        super(HighFreq, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True,
+        super(HighFreq, self).__init__(screen, screen.height, screen.width, hover_focus=True,
                                         title="High Frequency Word Filter", reduce_cpu=True)
 
         global data
@@ -728,8 +770,11 @@ def _ok(self):
                 self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1]))
             return
 
-        data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=high)
+        if not valid:
+            self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"]))
+            return
         
         temp = deepcopy(data.stoplist)
         temp.update(data.highCandidates)
@@ -753,7 +798,7 @@ def _popup(selection):
             confirm()
             return
         high = validate(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False)
-        data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        data.highCandidates, filtered, value = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=high)
         temp = deepcopy(data.stoplist)
         temp.update(data.highCandidates)
@@ -778,8 +823,11 @@ def _change(self):
                 self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1]))
             return
 
-        data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=high)
+        if not valid:
+            self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"]))
+            return
         
         temp = deepcopy(data.stoplist)
         temp.update(data.highCandidates)
@@ -801,7 +849,7 @@ def _popupChange(selection):
             confirm()
             return
         high = validate(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False)
-        data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=high)
         temp = deepcopy(data.stoplist)
         temp.update(data.highCandidates)
@@ -813,7 +861,7 @@ def _popupChange(selection):
 class LowFreq(Frame):
     # Loads in the scene layout
     def __init__(self, screen):
-        super(LowFreq, self).__init__(screen, screen.height * 2 // 3, screen.width * 2 // 3, hover_focus=True,
+        super(LowFreq, self).__init__(screen, screen.height, screen.width, hover_focus=True,
                                         title="Low Frequency Word Filter", reduce_cpu=True)
 
         global data
@@ -844,8 +892,11 @@ def _ok(self):
                 self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1]))
             return
             
-        data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=low)
+        if not valid:
+            self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"]))
+            return
 
         temp = deepcopy(data.stoplist)
         temp.update(data.lowCandidates)
@@ -870,7 +921,7 @@ def _popup(selection):
             return
         low = validate(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True)
 
-        data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=low)
         temp = deepcopy(data.stoplist)
         temp.update(data.lowCandidates)
@@ -895,8 +946,11 @@ def _change(self):
                 self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1]))
             return
             
-        data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=low)
+        if not valid:
+            self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"]))
+            return
 
         temp = deepcopy(data.stoplist)
         temp.update(data.lowCandidates)
@@ -919,7 +973,7 @@ def _popupChange(selection):
             confirm()
             return
         low = validate(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True)
-        data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=low)
         temp = deepcopy(data.stoplist)
         temp.update(data.lowCandidates)
@@ -941,7 +995,7 @@ def __init__(self, screen):
         self._details = Text()
         self._details.disabled = True
         self._details.custom_colour = "field"
-        regex = "((?:\w+)(?:.)?(?:txt))|(\w+)$"
+        regex = "^([\w+\- ]*)(.txt)$"
         self._list = FileBrowser(Widget.FILL_FRAME,
                                  os.path.abspath("."),
                                  name="mc_list",
@@ -953,7 +1007,7 @@ def __init__(self, screen):
         layout.add_widget(self._list)
         layout.add_widget(Divider())
         layout.add_widget(self._details)
-        layout.add_widget(Label("Press Enter to select or `q` to quit."))
+        layout.add_widget(Label("Press Enter to select, 'q' to quit without making changes, or 'c' to clear file selection."))
 
         # Prepare the Frame for use.
         self.fix()
@@ -964,6 +1018,7 @@ def popup(self):
             self._scene.add_effect(PopUpDialog(self._screen, "Please pick a valid file (a .txt file or a file with no extension)", ["OK"]))
         else:
             data.stopwordFile.text = "Current stopworded file: " + self._list.value
+            data.fileName = self._list.value
             raise NextScene("Summary")
             # self._scene.add_effect(PopUpDialog(self._screen, "You selected: {}".format(self._list.value), ["OK"]))
 
@@ -987,7 +1042,8 @@ def process_event(self, event):
             if event.key_code in [ord('q'), ord('Q'), Screen.ctrl("c")]:
                 raise NextScene("Summary")
             elif event.key_code in [ord('c'), ord('C')]:
-                data.stopwordFile.text = "hello"
+                data.stopwordFile.text = "Current stopword file: <None>"
+                data.fileName = "<None>"
                 raise NextScene("Summary")
 
         # Now pass on to lower levels for normal handling of the event.
@@ -1026,6 +1082,22 @@ def updatePreppedLength():
     if data.english.value:
         data.englishCandidates = stop_language(tempC, "english")
         temp.update(data.englishCandidates)
+    if data.fileName != "<None>":
+        with open(data.fileName, encoding='utf8') as swf:
+                data.fileCandidates = [word.strip() for word in swf]
+
+                if len(data.fileCandidates):
+                    print("Applying custom stopword file to remove {} word{}.".format(
+                        len(data.fileCandidates), 's' if len(data.fileCandidates) > 1 else ''))
+                    temp.update(data.fileCandidates)
+    minNum = 3
+    if data.minWord.value != "":
+        try:
+            minNum = int(data.minWord.value)
+        except Exception:
+            minNum = 3
+    data.stopCandidates = get_small_words(tempC, minNum)
+    temp.update(data.stopCandidates)
     temp.update(data.lowCandidates)
     temp.update(data.highCandidates)
     tempC.in_place_stoplist(temp)
@@ -1107,21 +1179,23 @@ def main(args):
     
     # Apply custom stopwords file
     if args.stopword_file:
-        with open(args.stopword_file, encoding='utf8') as swf:
-            #candidates = [unidecode(word.strip()) for word in swf]
-            candidates = [word.strip() for word in swf]
+        data.fileName = args.stopword_file
+        data.stopwordFile.text = "Current stopworded file: " + args.stopword_file
+        # with open(args.stopword_file, encoding='utf8') as swf:
+        #     candidates = [unidecode(word.strip()) for word in swf]
+        #     data.fileCandidates = [word.strip() for word in swf]
 
-            if len(candidates):
-                print("Applying custom stopword file to remove {} word{}.".format(
-                    len(candidates), 's' if len(candidates) > 1 else ''))
-                data.stoplist.update(candidates)
+        #     if len(data.fileCandidates):
+        #         print("Applying custom stopword file to remove {} word{}.".format(
+        #             len(data.fileCandidates), 's' if len(data.fileCandidates) > 1 else ''))
+        #         data.stoplist.update(candidates)
 
     if args.min_word_len:
         candidates = get_small_words(data.c, args.min_word_len)
         if len(candidates):
             print("Filtering {} small word{} with less than {} characters.".format(
                 len(candidates), 's' if len(candidates) > 1 else '', args.min_word_len))
-            data.stoplist.update(candidates)
+            # data.stoplist.update(candidates)
 
     # cache item counts
     data.items, data.counts = get_corpus_counts(data.c)
@@ -1179,6 +1253,8 @@ def gui(screen, scene):
     data.stoplist.update(data.highCandidates)
     data.stoplist.update(data.lowCandidates)
     data.stoplist.update(data.stopCandidates)
+    data.stoplist.update(data.englishCandidates)
+    data.stoplist.update(data.fileCandidates)
 
     if not data.stoplist:
         print("No stopwords applied.\n\n")

From d3964bc77ed4614ea65e0d584aef964a6aa45aa7 Mon Sep 17 00:00:00 2001
From: Kirtan Sakariya <kirtansakariya@gmail.com>
Date: Wed, 28 Nov 2018 14:37:49 -0500
Subject: [PATCH 10/21] adding comments

---
 topicexplorer/prep.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py
index bcb6dedb..f5ad19f7 100644
--- a/topicexplorer/prep.py
+++ b/topicexplorer/prep.py
@@ -580,11 +580,13 @@ def _prep(self):
             data.fileCandidates = []
         data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=high)
+        # Checks to see if the value entered with filter the whole corpus out
         if not valid:
             self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"]))
             return
         data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=low)
+        # Checks to see if the value entered with filter the whole corpus out
         if not valid:
             self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"]))
             return
@@ -641,6 +643,7 @@ def _high(self):
         
         data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=high)
+        # Checks to see if the value entered with filter the whole corpus out
         if not valid:
             self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"]))
             return
@@ -692,6 +695,7 @@ def _low(self):
             
         data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=low)
+        # Checks to see if the value entered with filter the whole corpus out
         if not valid:
             self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"]))
             return
@@ -772,6 +776,7 @@ def _ok(self):
 
         data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=high)
+        # Checks to see if the value entered with filter the whole corpus out
         if not valid:
             self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"]))
             return
@@ -798,7 +803,7 @@ def _popup(selection):
             confirm()
             return
         high = validate(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False)
-        data.highCandidates, filtered, value = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=high)
         temp = deepcopy(data.stoplist)
         temp.update(data.highCandidates)
@@ -825,6 +830,7 @@ def _change(self):
 
         data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=high)
+        # Checks to see if the value entered with filter the whole corpus out
         if not valid:
             self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"]))
             return
@@ -894,6 +900,7 @@ def _ok(self):
             
         data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=low)
+        # Checks to see if the value entered with filter the whole corpus out
         if not valid:
             self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"]))
             return
@@ -948,6 +955,7 @@ def _change(self):
             
         data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=low)
+        # Checks to see if the value entered with filter the whole corpus out
         if not valid:
             self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"]))
             return
@@ -1019,6 +1027,7 @@ def popup(self):
         else:
             data.stopwordFile.text = "Current stopworded file: " + self._list.value
             data.fileName = self._list.value
+            updatePreppedLength()
             raise NextScene("Summary")
             # self._scene.add_effect(PopUpDialog(self._screen, "You selected: {}".format(self._list.value), ["OK"]))
 
@@ -1040,10 +1049,12 @@ def process_event(self, event):
         global data
         if isinstance(event, KeyboardEvent):
             if event.key_code in [ord('q'), ord('Q'), Screen.ctrl("c")]:
+                updatePreppedLength()
                 raise NextScene("Summary")
             elif event.key_code in [ord('c'), ord('C')]:
                 data.stopwordFile.text = "Current stopword file: <None>"
                 data.fileName = "<None>"
+                updatePreppedLength()
                 raise NextScene("Summary")
 
         # Now pass on to lower levels for normal handling of the event.
@@ -1085,7 +1096,6 @@ def updatePreppedLength():
     if data.fileName != "<None>":
         with open(data.fileName, encoding='utf8') as swf:
                 data.fileCandidates = [word.strip() for word in swf]
-
                 if len(data.fileCandidates):
                     print("Applying custom stopword file to remove {} word{}.".format(
                         len(data.fileCandidates), 's' if len(data.fileCandidates) > 1 else ''))

From 3c7f4937affba0e54e8a8d3cfda96771c4987851 Mon Sep 17 00:00:00 2001
From: Kirtan Sakariya <kirtansakariya@gmail.com>
Date: Mon, 3 Dec 2018 18:34:43 -0500
Subject: [PATCH 11/21] file preview, rearranging wizard screens

---
 topicexplorer/prep.py | 267 +++++++++++++++++++++++++-----------------
 1 file changed, 162 insertions(+), 105 deletions(-)

diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py
index f5ad19f7..53239797 100644
--- a/topicexplorer/prep.py
+++ b/topicexplorer/prep.py
@@ -300,26 +300,26 @@ def get_high_filter_chart(c, words=None, items=None, counts=None, num=None):
     bins = sorted(set(bins))
     bins.append(max(counts))
 
-    ret = ""
+    chart = ""
 
     bin_counts, bins = np.histogram(counts, bins=bins)
-    ret += "{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Top', '% of corpus', "# words", "Rate") + "\n"
+    chart += "{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Top', '% of corpus', "# words", "Rate") + "\n"
     last_row = 0
     for bin, count in zip(bins[-2::-1], np.cumsum(bin_counts[::-1])):
         filtered_counts = counts[get_mask(c, words)]
         if (filtered_counts >= bin).sum() > last_row:
             percentage = 1. - (old_div(counts[counts < bin].sum(), float(c.original_length)))
-            ret += "{0:>5.0f}x".format(bin).rjust(8)
-            ret += '{0:2.1f}% '.format(percentage * 100).rjust(10)
-            ret += (u'\u2588' * int(percentage * 36)).ljust(36)
-            ret += "{0:0.0f} words".format((filtered_counts >= bin).sum()).rjust(15)
-            ret += " >={0:>5.0f}x".format(bin).ljust(8) + "\n"
+            chart += "{0:>5.0f}x".format(bin).rjust(8)
+            chart += '{0:2.1f}% '.format(percentage * 100).rjust(10)
+            chart += (u'\u2588' * int(percentage * 36)).ljust(36)
+            chart += "{0:0.0f} words".format((filtered_counts >= bin).sum()).rjust(15)
+            chart += " >={0:>5.0f}x".format(bin).ljust(8) + "\n"
 
         last_row = (filtered_counts >= bin).sum()
 
-    ret += (' ' * 18) + "{} total occurrences".format(counts.sum()).ljust(37)
-    ret += '{} words total'.format(get_mask(c, words).sum()).rjust(20) + '\n'
-    return ret
+    chart += (' ' * 18) + "{} total occurrences".format(counts.sum()).ljust(34)
+    chart += '{} words total'.format(get_mask(c, words).sum()).rjust(20)
+    return chart
 
 def get_high_filter_stops(c, words=None, items=None, counts=None, num=None):
     import numpy as np
@@ -358,27 +358,27 @@ def get_low_filter_chart(c, words=None, items=None, counts=None, num=None):
     bins = sorted(set(bins))
     bins.append(max(counts))
 
-    ret = ""
+    chart = ""
 
     bin_counts, bins = np.histogram(counts[counts.argsort()[::-1]], bins=bins)
-    ret += "{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Bottom', '% of corpus', "# words", "Rate") + "\n"
+    chart += "{0:>8s} {1:>8s} {2:<36s} {3:>14s} {4:>8s}".format("Rate", 'Bottom', '% of corpus', "# words", "Rate") + "\n"
     last_row = 0
     for bin, count in zip(bins, np.cumsum(bin_counts)):
         filtered_counts = counts[get_mask(c, words)]
         if last_row < (filtered_counts < bin).sum() <= len(filtered_counts):
             percentage = (old_div(counts[counts <= bin].sum(), float(c.original_length)))
-            ret += "{0:>5.0f}x".format(bin).rjust(8)
-            ret += '{0:2.1f}%'.format(percentage * 100).rjust(9)
-            ret += " " + (u'\u2588' * int(percentage * 36)).ljust(36)
-            ret += "{0:0.0f} words".format((filtered_counts <= bin).sum()).rjust(15)
-            ret += " <={0:>5.0f}x".format(bin).ljust(8) + "\n"
+            chart += "{0:>5.0f}x".format(bin).rjust(8)
+            chart += '{0:2.1f}%'.format(percentage * 100).rjust(9)
+            chart += " " + (u'\u2588' * int(percentage * 36)).ljust(36)
+            chart += "{0:0.0f} words".format((filtered_counts <= bin).sum()).rjust(15)
+            chart += " <={0:>5.0f}x".format(bin).ljust(8) + "\n"
             if (filtered_counts < bin).sum() == len(filtered_counts):
                 break
         last_row = (filtered_counts >= bin).sum()
 
-    ret += (' ' * 18) + "{} total occurrences".format(counts.sum()).ljust(37)
-    ret += '{} words total'.format(get_mask(c, words).sum()).rjust(20) + '\n'
-    return ret
+    chart += (' ' * 18) + "{} total occurrences".format(counts.sum()).ljust(34)
+    chart += '{} words total'.format(get_mask(c, words).sum()).rjust(20)
+    return chart
 
 def get_low_filter_stops(c, words=None, items=None, counts=None, num=None):
     import numpy as np
@@ -413,24 +413,19 @@ def __init__(self):
         self.stoplist = set()
         self.label = Label("change this")
         self.summaryHigh = Text(label="  Words:", name="summaryHighFreq", max_length=5)
-        # self.summaryHighText = Label("Words:", align=">")
-        # self.summaryHigh = Text(label="")
         self.summaryHighPercent = Text("Percent:", "summaryHighPercent", max_length=5)
-        # self.summaryHighPercentText = Label("Percent:", align=">")
-        # self.summaryHighPercent = Text(label="")
-        self.high = Text("High frequency word filter (#):", "highFreq", max_length=5)
-        self.highPercent = Text("High ferquency word filter (%):", "highPercent", max_length=5)
-        self.highLabel = Label("high label", height=58)
+        self.high = Text("  Words:", "highFreq", max_length=5)
+        self.highPercent = Text("Percent:", "highPercent", max_length=5)
+        self.highChart = Label("high label", align="^")
+        self.highStop = Label("high stop", align="^")
+        self.highStop.text = "hello"
         self.highCandidates = []
         self.summaryLow = Text("  Words:", "summaryLowFreq", max_length=5)
-        # self.summaryLowText = Label("Words:", align=">")
-        # self.summaryLow = Text(label="")
         self.summaryLowPercent = Text("Percent:", "summaryLowPercent", max_length=5)
-        # self.summaryLowPercentText = Label("Percent:", align=">")
-        # self.summaryLowPercent = Text(label="")
-        self.low = Text("Low frequency word filter (#):", "lowFreq", max_length=5)
-        self.lowPercent = Text("Low frequency word filter (%):", "lowPercent", max_length=5)
-        self.lowLabel = Label("low label", height=58)
+        self.low = Text("  Words:", "lowFreq", max_length=5)
+        self.lowPercent = Text("Percent:", "lowPercent", max_length=5)
+        self.lowChart = Label("low label", align="^")
+        self.lowStop = Label("low stop", align="^")
         self.lowCandidates = []
         self.minWord = Text("Minimum word length:", "length", max_length=5)
         self.counter = 0
@@ -462,14 +457,8 @@ def __init__(self, screen):
 
         highOptions = Layout([7, 2, 6])
         self.add_layout(highOptions)
-        # highOptions.add_widget(data.summaryHighText, 0)
-        # highOptions.add_widget(data.summaryHigh, 1)
         highOptions.add_widget(data.summaryHigh, 1)
-        # highOptions.add_widget(Label(""), 2)
-        # highOptions.add_widget(data.summaryHighPercentText, 0)
-        # highOptions.add_widget(data.summaryHighPercent, 1)
         highOptions.add_widget(data.summaryHighPercent, 1)
-        # highOptions.add_widget(Label(""), 2)
 
         highButton = Layout([1])
         self.add_layout(highButton)
@@ -484,14 +473,8 @@ def __init__(self, screen):
 
         lowOptions = Layout([7, 2, 6])
         self.add_layout(lowOptions)
-        # lowOptions.add_widget(data.summaryLowText, 0)
-        # lowOptions.add_widget(data.summaryLow, 1)
         lowOptions.add_widget(data.summaryLow, 1)
-        # lowOptions.add_widget(Label(""), 2)
-        # lowOptions.add_widget(data.summaryLowPercentText, 0)
-        # lowOptions.add_widget(data.summaryLowPercent, 1)
         lowOptions.add_widget(data.summaryLowPercent, 1)
-        # lowOptions.add_widget(Label(""), 2)
 
         lowButton = Layout([1])
         self.add_layout(lowButton)
@@ -519,24 +502,18 @@ def __init__(self, screen):
         stopwordFileLayout.add_widget(Button("Select new file", self._chooseFile), 0)
         stopwordFileLayout.add_widget(Divider(height=2, line_char="-"), 0)
         stopwordFileLayout.add_widget(Divider(height=1, line_char=" "), 0)
-        # layout.add_widget(Label("Original corpus unique words: " + str(data.c.original_length), align="^"))
-        # layout.add_widget(data.prepSize)
 
         corpusLenLayout = Layout([1])
         self.add_layout(corpusLenLayout)
         corpusLenLayout.add_widget(Label("Corpus Length\n-------------", align="^", height=2), 0)
-        corpusLenLayout.add_widget(Label("Original corpus unique works: " + str(data.c.original_length), align="^"))
+        corpusLenLayout.add_widget(Label("Original corpus length: " + str(data.c.original_length), align="^"))
         corpusLenLayout.add_widget(data.prepSize)
         corpusLenLayout.add_widget(Divider(height=1, line_char=" "), 0)
 
         layout2 = Layout([1, 1])
         self.add_layout(layout2)
         layout2.add_widget(Button("prep", self._prep), 0)
-        # layout2.add_widget(Divider(height=1, line_char="-"), 4)
-        # layout2.add_widget(Button("high", self._high), 1)
-        # layout2.add_widget(Button("low", self._low), 2)
         layout2.add_widget(Button("exit", self._exit), 1)
-        # layout2.add_widget(Divider(height=1, line_char="-"), 5)
         self.fix()
     
     # Preps the corpus
@@ -650,9 +627,14 @@ def _high(self):
         
         temp = deepcopy(data.stoplist)
         temp.update(data.highCandidates)
-        data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+        chart = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
                                                                 num=high)
-        data.highLabel.text += filtered
+        (columns, line) = os.get_terminal_size()
+        data.highChart.text = chart
+        data.highChart._required_height = chart.count('\n') + 1
+        data.highStop.text = filtered
+        data.highStop._required_height = line - data.highChart._required_height - 5
+        data.highFreqScene.fix()
         raise NextScene("High Freq")
 
     # Handle button clicks for high popup
@@ -673,9 +655,14 @@ def _popupHigh(selection):
                                                                 num=high)
         temp = deepcopy(data.stoplist)
         temp.update(data.highCandidates)
-        data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+        chart = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
                                                                 num=high)
-        data.highLabel.text += filtered
+        (columns, line) = os.get_terminal_size()
+        data.highChart.text = chart
+        data.highChart._required_height = chart.count('\n') + 1
+        data.highStop.text = filtered
+        data.highStop._required_height = line - data.highChart._required_height - 5
+        data.highFreqScene.fix()
         raise NextScene("High Freq")
     
     # Handle button click of the <low> button on the Summary scene
@@ -699,12 +686,16 @@ def _low(self):
         if not valid:
             self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"]))
             return
-
+        (columns, line) = os.get_terminal_size()
         temp = deepcopy(data.stoplist)
         temp.update(data.lowCandidates)
-        data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+        chart = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
                                                             num=low)
-        data.lowLabel.text += filtered
+        data.lowChart.text = chart
+        data.lowChart._required_height = chart.count('\n') + 1
+        data.lowStop.text = filtered
+        data.lowStop._required_height = line - data.lowChart._required_height - 5
+        data.lowFreqScene.fix()
         raise NextScene("Low Freq")
     
     # Handle button clicks for low popup
@@ -725,9 +716,14 @@ def _popupLow(selection):
                                                                 num=low)
         temp = deepcopy(data.stoplist)
         temp.update(data.lowCandidates)
-        data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+        chart = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
                                                                 num=low)
-        data.lowLabel.text += filtered
+        (columns, line) = os.get_terminal_size()
+        data.lowChart.text = chart
+        data.lowChart._required_height = chart.count('\n') + 1
+        data.lowStop.text = filtered
+        data.lowStop._required_height = line - data.lowChart._required_height - 5
+        data.lowFreqScene.fix()
         raise NextScene("Low Freq")
 
     def _chooseFile(self):
@@ -747,12 +743,18 @@ def __init__(self, screen):
                                         title="High Frequency Word Filter", reduce_cpu=True)
 
         global data
+        data.highFreqScene = self
         
-        layout = Layout([100], fill_frame=True)
-        self.add_layout(layout)
-        layout.add_widget(data.highLabel)
-        layout.add_widget(data.high)
-        layout.add_widget(data.highPercent)
+        chartLayout = Layout([1])
+        self.add_layout(chartLayout)
+        chartLayout.add_widget(data.highChart, 0)
+        fieldsLayout = Layout([7, 2, 6])
+        self.add_layout(fieldsLayout)
+        fieldsLayout.add_widget(data.high, 1)
+        fieldsLayout.add_widget(data.highPercent, 1)
+        stopLayout = Layout([1])
+        self.add_layout(stopLayout)
+        stopLayout.add_widget(data.highStop)
         layout2 = Layout([1, 1])
         self.add_layout(layout2)
         layout2.add_widget(Button("Ok", self._ok), 0)
@@ -780,12 +782,16 @@ def _ok(self):
         if not valid:
             self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"]))
             return
-        
+        (columns, line) = os.get_terminal_size()
         temp = deepcopy(data.stoplist)
         temp.update(data.highCandidates)
-        data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+        chart = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
                                                                 num=high)
-        data.highLabel.text += filtered
+        data.highChart.text = chart
+        data.highChart._required_height = chart.count('\n') + 1
+        data.highStop.text = filtered
+        data.highStop._required_height = line - data.highChart._required_height - 5
+        data.highFreqScene.fix()
         updatePreppedLength()
         raise NextScene("Summary")
 
@@ -805,12 +811,16 @@ def _popup(selection):
         high = validate(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False)
         data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=high)
+        (columns, line) = os.get_terminal_size()
         temp = deepcopy(data.stoplist)
         temp.update(data.highCandidates)
-        data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+        chart = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
                                                                 num=high)
-        data.highLabel.text += filtered
-
+        data.highChart.text = chart
+        data.highChart._required_height = chart.count('\n') + 1
+        data.highStop.text = filtered
+        data.highStop._required_height = line - data.highChart._required_height - 5
+        data.highFreqScene.fix()
         raise NextScene("Summary")
     
     # Handle button click for Update
@@ -835,11 +845,16 @@ def _change(self):
             self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"]))
             return
         
+        (columns, line) = os.get_terminal_size()
         temp = deepcopy(data.stoplist)
         temp.update(data.highCandidates)
-        data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+        chart = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
                                                                 num=high)
-        data.highLabel.text += filtered
+        data.highChart.text = chart
+        data.highChart._required_height = chart.count('\n') + 1
+        data.highStop.text = filtered
+        data.highStop._required_height = line - data.highChart._required_height - 5
+        data.highFreqScene.fix()
     
     # Handle button click for popup after clicking change
     @staticmethod
@@ -857,11 +872,16 @@ def _popupChange(selection):
         high = validate(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False)
         data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=high)
+        (columns, line) = os.get_terminal_size()
         temp = deepcopy(data.stoplist)
         temp.update(data.highCandidates)
-        data.highLabel.text = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+        chart = get_high_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
                                                                 num=high)
-        data.highLabel.text += filtered
+        data.highChart.text = chart
+        data.highChart._required_height = chart.count('\n') + 1
+        data.highStop.text = filtered
+        data.highStop._required_height = line - data.highChart._required_height - 5
+        data.highFreqScene.fix()
 
 # Low frequency scene
 class LowFreq(Frame):
@@ -871,12 +891,18 @@ def __init__(self, screen):
                                         title="Low Frequency Word Filter", reduce_cpu=True)
 
         global data
+        data.lowFreqScene = self
         
-        layout = Layout([100], fill_frame=True)
-        self.add_layout(layout)
-        layout.add_widget(data.lowLabel)
-        layout.add_widget(data.low)
-        layout.add_widget(data.lowPercent)
+        chartLayout = Layout([1])
+        self.add_layout(chartLayout)
+        chartLayout.add_widget(data.lowChart, 0)
+        fieldsLayout = Layout([7, 2, 6])
+        self.add_layout(fieldsLayout)
+        fieldsLayout.add_widget(data.low, 1)
+        fieldsLayout.add_widget(data.lowPercent, 1)
+        stopLayout = Layout([1])
+        self.add_layout(stopLayout)
+        stopLayout.add_widget(data.lowStop)
         layout2 = Layout([1, 1])
         self.add_layout(layout2)
         layout2.add_widget(Button("Ok", self._ok), 0)
@@ -904,12 +930,16 @@ def _ok(self):
         if not valid:
             self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"]))
             return
-
+        (columns, line) = os.get_terminal_size()
         temp = deepcopy(data.stoplist)
         temp.update(data.lowCandidates)
-        data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+        chart = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
                                                             num=low)
-        data.lowLabel.text += filtered
+        data.lowChart.text = chart
+        data.lowChart._required_height = chart.count('\n') + 1
+        data.lowStop.text = filtered
+        data.lowStop._required_height = line - data.lowChart._required_height - 5
+        data.lowFreqScene.fix()
         updatePreppedLength()
         raise NextScene("Summary")
     
@@ -930,12 +960,16 @@ def _popup(selection):
 
         data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=low)
+        (columns, line) = os.get_terminal_size()
         temp = deepcopy(data.stoplist)
         temp.update(data.lowCandidates)
-        data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+        (chart, text) = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
                                                             num=low)
-        data.lowLabel.text += filtered
-        
+        data.lowChart.text = chart
+        data.lowChart._required_height = chart.count('\n') + 1
+        data.lowStop.text = filtered
+        data.lowStop._required_height = line - data.lowChart._required_height - 5
+        data.lowFreqScene.fix()
         raise NextScene("Summary")
     
     # Handle button click for Update
@@ -959,13 +993,16 @@ def _change(self):
         if not valid:
             self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"]))
             return
-
+        (columns, line) = os.get_terminal_size()
         temp = deepcopy(data.stoplist)
         temp.update(data.lowCandidates)
-        data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+        chart = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
                                                             num=low)
-        data.lowLabel.text += filtered
-
+        data.lowChart.text = chart
+        data.lowChart._required_height = chart.count('\n') + 1
+        data.lowStop.text = filtered
+        data.lowStop._required_height = line - data.lowChart._required_height - 5
+        data.lowFreqScene.fix()
     
     # Handle button click for popup after clicking change
     @staticmethod
@@ -983,11 +1020,16 @@ def _popupChange(selection):
         low = validate(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True)
         data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=low)
+        (columns, line) = os.get_terminal_size()
         temp = deepcopy(data.stoplist)
         temp.update(data.lowCandidates)
-        data.lowLabel.text = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
+        chart = get_low_filter_chart(data.c, words=temp, items=data.items, counts=data.counts,
                                                                 num=low)
-        data.lowLabel.text += filtered
+        data.lowChart.text = chart
+        data.lowChart._required_height = chart.count('\n') + 1
+        data.lowStop.text = filtered
+        data.lowStop._required_height = line - data.lowChart._required_height - 5
+        data.lowFreqScene.fix()
 
 # Taken from: https://github.com/peterbrittain/asciimatics/blob/master/samples/treeview.py
 class Files(Frame):
@@ -1025,11 +1067,20 @@ def popup(self):
         if not self._list.value.endswith(".txt") and "." in self._list.value:
             self._scene.add_effect(PopUpDialog(self._screen, "Please pick a valid file (a .txt file or a file with no extension)", ["OK"]))
         else:
-            data.stopwordFile.text = "Current stopworded file: " + self._list.value
-            data.fileName = self._list.value
+            global data
+            f = open(self._list.value, "r")
+            text = f.read()
+            data.tempFileName = self._list.value
+            self._scene.add_effect(PopUpDialog(self._screen, "Use the selected file with the following text?\n" + text, ["Yes", "No"], on_close=self.handlePopup))
+
+    @staticmethod
+    def handlePopup(selection):
+        if str(selection) == "0":
+            global data
+            data.stopwordFile.text = "Current stopworded file: " + data.tempFileName
+            data.fileName = data.tempFileName
             updatePreppedLength()
             raise NextScene("Summary")
-            # self._scene.add_effect(PopUpDialog(self._screen, "You selected: {}".format(self._list.value), ["OK"]))
 
     def details(self):
         # If python magic is installed, provide a little more detail of the current file.
@@ -1111,7 +1162,7 @@ def updatePreppedLength():
     temp.update(data.lowCandidates)
     temp.update(data.highCandidates)
     tempC.in_place_stoplist(temp)
-    data.prepSize.text = str("Prepared corpus unique words: " + str(len(tempC)))
+    data.prepSize.text = str("Prepared corpus length: " + str(len(tempC)))
 
 # Highlight the necessary fields
 def confirm():
@@ -1203,8 +1254,10 @@ def main(args):
     if args.min_word_len:
         candidates = get_small_words(data.c, args.min_word_len)
         if len(candidates):
-            print("Filtering {} small word{} with less than {} characters.".format(
-                len(candidates), 's' if len(candidates) > 1 else '', args.min_word_len))
+            data.lowCandidates = candidates
+            data.minWord._value = args.min_word_len
+            # print("Filtering {} small word{} with less than {} characters.".format(
+            #     len(candidates), 's' if len(candidates) > 1 else '', args.min_word_len))
             # data.stoplist.update(candidates)
 
     # cache item counts
@@ -1215,13 +1268,14 @@ def main(args):
         candidates = get_candidate_words(data.c, args.high_filter, sort=False, items=data.items, counts=data.counts)
         if len(candidates):
             data.highCandidates = candidates
-            data.highLabel._value = args.high_filter
+            data.summaryHigh._value = args.high_filter
     elif args.high_percent:
         args.high_filter = get_closest_bin(data.c, 1 - (args.high_percent / 100.), counts=data.counts)
         print(args.high_filter)
         candidates = get_candidate_words(data.c, args.high_filter, sort=False, items=data.items, counts=data.counts)
         if len(candidates):
-            data.stoplist.update(candidates)
+            data.highCandidates = candidates
+            data.summaryHighPercent._value = args.high_percent
     
 
     if args.low_filter is None and args.low_percent is None and args.quiet:
@@ -1230,13 +1284,14 @@ def main(args):
         candidates = get_candidate_words(data.c, -1 * args.low_filter, sort=False, items=data.items, counts=data.counts)
         if len(candidates):
             data.lowCandidates = candidates
-            data.lowLabel._value = args.low_filter
+            data.summaryLow._value = args.low_filter
     elif args.low_percent:
         args.low_filter = get_closest_bin(data.c, 1 - (args.low_percent / 100.), reverse=True, counts=data.counts)
         print(args.low_filter)
         candidates = get_candidate_words(data.c, -1 * args.low_filter, sort=False, items=data.items, counts=data.counts)
         if len(candidates):
-            data.stoplist.update(candidates)
+            data.lowCandidates = candidates
+            data.summaryLowPercent._value = args.low_percent
 
     def gui(screen, scene):
         scenes = [
@@ -1249,7 +1304,9 @@ def gui(screen, scene):
         data.wholeScreen = screen
         screen.play(scenes, stop_on_resize=True, start_scene=scene)
 
-    data.prepSize.text = str("Prepared corpus unique words: " + str(len(data.c)))
+    data.prepSize.text = str("Prepared corpus length: " + str(len(data.c)))
+
+    updatePreppedLength()
 
     last_scene = None
     while True:

From 6af97af3b91219777a374309cdff1550b2a7cec0 Mon Sep 17 00:00:00 2001
From: Kirtan Sakariya <kirtansakariya@gmail.com>
Date: Mon, 3 Dec 2018 19:36:33 -0500
Subject: [PATCH 12/21] adding asciimatics to the requirements.txt

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index a93e3805..884dcfd6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
+-e git+https://github.com/peterbrittain/asciimatics.git@fcedb4947933de7e1507ec0dee8ca7a3f466928a#egg=asciimatics
 bottle>=0.12.0
 brewer2mpl>=1.4.0,<1.5.0
 decorator>=4.0.5

From 2ffed42a8af3142b783c9a885aecde1b959c544c Mon Sep 17 00:00:00 2001
From: Kirtan Sakariya <kirtansakariya@gmail.com>
Date: Mon, 3 Dec 2018 22:08:57 -0500
Subject: [PATCH 13/21] demo works now

---
 topicexplorer/prep.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py
index 53239797..382b6377 100644
--- a/topicexplorer/prep.py
+++ b/topicexplorer/prep.py
@@ -339,8 +339,6 @@ def get_high_filter_stops(c, words=None, items=None, counts=None, num=None):
 
         if len(candidates) == len(c.words):
             valid = False
-            # filtered += "\n\nChoice of" + str(input_filter) + "will remove ALL words from the corpus."
-            # filtered += "Please choose a different filter."
 
     except ValueError:
         input_filter = 0
@@ -1309,7 +1307,7 @@ def gui(screen, scene):
     updatePreppedLength()
 
     last_scene = None
-    while True:
+    while not args.quiet:
         try:
             Screen.wrapper(gui, catch_interrupt=True, arguments=[last_scene])
             break

From bb3867194cd72e8f4b61da679e037a3aa5723ff4 Mon Sep 17 00:00:00 2001
From: Kirtan Sakariya <kirtansakariya@gmail.com>
Date: Tue, 4 Dec 2018 09:54:45 -0500
Subject: [PATCH 14/21] working on tests

---
 tests/test_prep.py    | 13 +++++++++++--
 topicexplorer/prep.py | 17 ++++++++++++++++-
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/tests/test_prep.py b/tests/test_prep.py
index dcb2a1f3..a14653f3 100644
--- a/tests/test_prep.py
+++ b/tests/test_prep.py
@@ -20,6 +20,7 @@
                     dtype=[('idx', '<i8'), ('sentence_label', '|S6')])]
 
 corpus = Corpus(text, context_data=ctx_data, context_types=['sentence'])
+print(str(corpus.words))
 
 def test_get_corpus_counts():
     items, counts = topicexplorer.prep.get_corpus_counts(corpus)
@@ -31,6 +32,7 @@ def test_get_small_words():
     assert topicexplorer.prep.get_small_words(corpus, 1) == []
 
 def test_get_closest_bin():
+    print(str(topicexplorer.prep.get_closest_bin(corpus, 0)))
     assert topicexplorer.prep.get_closest_bin(corpus, 0) == 0 
     assert topicexplorer.prep.get_closest_bin(corpus, 0.2) == 1 
     assert topicexplorer.prep.get_closest_bin(corpus, 0.5) == 1 
@@ -59,8 +61,8 @@ def test_get_candidate_words():
 @patch('topicexplorer.prep.input')
 def test_get_high_filter(input_mock):
     input_mock.side_effect = ['3', 'y']
-    high_filter, candidates = topicexplorer.prep.get_high_filter(corpus)
-    assert high_filter == 3
+    candidates, filtered, valid = topicexplorer.prep.get_high_filter_stops(corpus)
+    assert high_filter == len(corpus.words) - len(candidates)
     assert candidates == ['I']
     
     # Test selection of all words
@@ -105,3 +107,10 @@ def test_get_low_filter(input_mock):
     low_filter, candidates = topicexplorer.prep.get_low_filter(corpus)
     assert low_filter == 1
     assert all(w in candidates for w in ['came', 'saw', 'conquered'])
+
+test_get_corpus_counts()
+test_get_small_words()
+# test_get_closest_bin()
+test_get_candidate_words()
+test_get_high_filter()
+test_get_low_filter()
diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py
index 382b6377..d8294041 100644
--- a/topicexplorer/prep.py
+++ b/topicexplorer/prep.py
@@ -325,6 +325,21 @@ def get_high_filter_stops(c, words=None, items=None, counts=None, num=None):
     import numpy as np
     input_filter = num
     valid = True
+    f = open("test.txt", "w+")
+    f.write("c" + "\n")
+    f.write(str(c) + "\n")
+    f.write("c words" + "\n")
+    f.write(str(c.words) + "\n")
+    f.write("words" + "\n")
+    f.write(str(words) + "\n")
+    f.write("items" + "\n")
+    f.write(str(items) + "\n")
+    f.write("counts" + "\n")
+    f.write(str(counts) + "\n")
+    f.write("num" + "\n")
+    f.write(str(num) + "\n")
+    f.write("lengths" + "\n")
+    f.write(str(len(c.words)) + " " + str(len(items)) + " " + str(len(counts)))
     try:
         candidates = get_candidate_words(c, input_filter, words=words, items=items, counts=counts)
         places = np.in1d(c.words, candidates)
@@ -1405,4 +1420,4 @@ def populate_parser(parser):
 
     main(args)
 
-data = ""
\ No newline at end of file
+data = ""

From 8618c3ba9dd522751789caef63ceda72b27849b3 Mon Sep 17 00:00:00 2001
From: Kirtan Sakariya <kirtansakariya@gmail.com>
Date: Tue, 4 Dec 2018 15:24:03 -0500
Subject: [PATCH 15/21] modifying test cases

---
 tests/test_prep.py    | 91 ++++++++++++++++++++-----------------------
 topicexplorer/prep.py |  2 +-
 2 files changed, 44 insertions(+), 49 deletions(-)

diff --git a/tests/test_prep.py b/tests/test_prep.py
index a14653f3..22edf65f 100644
--- a/tests/test_prep.py
+++ b/tests/test_prep.py
@@ -20,7 +20,6 @@
                     dtype=[('idx', '<i8'), ('sentence_label', '|S6')])]
 
 corpus = Corpus(text, context_data=ctx_data, context_types=['sentence'])
-print(str(corpus.words))
 
 def test_get_corpus_counts():
     items, counts = topicexplorer.prep.get_corpus_counts(corpus)
@@ -32,7 +31,6 @@ def test_get_small_words():
     assert topicexplorer.prep.get_small_words(corpus, 1) == []
 
 def test_get_closest_bin():
-    print(str(topicexplorer.prep.get_closest_bin(corpus, 0)))
     assert topicexplorer.prep.get_closest_bin(corpus, 0) == 0 
     assert topicexplorer.prep.get_closest_bin(corpus, 0.2) == 1 
     assert topicexplorer.prep.get_closest_bin(corpus, 0.5) == 1 
@@ -60,57 +58,54 @@ def test_get_candidate_words():
 
 @patch('topicexplorer.prep.input')
 def test_get_high_filter(input_mock):
-    input_mock.side_effect = ['3', 'y']
-    candidates, filtered, valid = topicexplorer.prep.get_high_filter_stops(corpus)
-    assert high_filter == len(corpus.words) - len(candidates)
+    # Test with high filter of 3
+    items, counts = topicexplorer.prep.get_corpus_counts(corpus)
+    candidates, filtered, valid = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=3)
+    assert len(corpus.words) - len(candidates) == 3
     assert candidates == ['I']
+    assert valid == True
     
-    # Test selection of all words
-    input_mock.side_effect = ['3', '1', '3', 'y']
-    high_filter, candidates = topicexplorer.prep.get_high_filter(corpus)
-    assert high_filter == 3
-    assert candidates == ['I']
+    # Test with high filter of 0
+    candidates, filtered, valid = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=0)
+    assert len(corpus.words) - len(candidates) == 4
+    assert candidates == []
+    assert valid == True
     
-    # Test not accept
-    input_mock.side_effect = ['3', 'n', '3', 'y']
-    high_filter, candidates = topicexplorer.prep.get_high_filter(corpus)
-    assert high_filter == 3
-    assert candidates == ['I']
-
-    # Test invalid action
-    input_mock.side_effect = ['blahhhh', '3', 'y']
-    high_filter, candidates = topicexplorer.prep.get_high_filter(corpus)
-    assert high_filter == 3
-    assert candidates == ['I']
+    # Test with high filter of 1, should return invalid
+    candidates, filtered, valid = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=1)
+    assert len(corpus.words) - len(candidates) == 0
+    assert candidates == ['I', 'came', 'conquered', 'saw']
+    assert valid == False
+
+    # Test with high filter of 100
+    candidates, filtered, valid = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=100)
+    assert len(corpus.words) - len(candidates) == 4
+    assert candidates == []
+    assert valid == True
 
 @patch('topicexplorer.prep.input')
 def test_get_low_filter(input_mock):
-    input_mock.side_effect = ['1', 'y']
-    low_filter, candidates = topicexplorer.prep.get_low_filter(corpus)
-    assert low_filter == 1 
+    # Test with low filter of 1
+    items, counts = topicexplorer.prep.get_corpus_counts(corpus)
+    candidates, filtered, valid = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=1)
+    assert len(corpus.words) - len(candidates) == 1 
     assert all(w in candidates for w in ['came', 'saw', 'conquered'])
+    assert valid == True
    
-    # Test selection of all words
-    input_mock.side_effect = ['1', '3', '1', 'y']
-    low_filter, candidates = topicexplorer.prep.get_low_filter(corpus)
-    assert low_filter == 1
-    assert all(w in candidates for w in ['came', 'saw', 'conquered'])
-
-    # Test not accept
-    input_mock.side_effect = ['1', 'n', '1', 'y']
-    low_filter, candidates = topicexplorer.prep.get_low_filter(corpus)
-    assert low_filter == 1
-    assert all(w in candidates for w in ['came', 'saw', 'conquered'])
-
-    # Test invalid action
-    input_mock.side_effect = ['blahhhh', '1', 'y']
-    low_filter, candidates = topicexplorer.prep.get_low_filter(corpus)
-    assert low_filter == 1
-    assert all(w in candidates for w in ['came', 'saw', 'conquered'])
-
-test_get_corpus_counts()
-test_get_small_words()
-# test_get_closest_bin()
-test_get_candidate_words()
-test_get_high_filter()
-test_get_low_filter()
+    # Test with low filter of 3
+    candidates, filtered, valid = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=3)
+    assert len(corpus.words) - len(candidates) == 0
+    assert all(w in candidates for w in ['came', 'saw', 'conquered', 'I'])
+    assert valid == False
+
+    # Test with low filter of 0
+    candidates, filtered, valid = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=0)
+    assert len(corpus.words) - len(candidates) == 4
+    assert all(w in candidates for w in [])
+    assert valid == True
+
+    # Test with low filter of 100
+    candidates, filtered, valid = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=100)
+    assert len(corpus.words) - len(candidates) == 0
+    assert all(w in candidates for w in ['came', 'saw', 'conquered', 'I'])
+    assert valid == False
diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py
index d8294041..8a5d34e9 100644
--- a/topicexplorer/prep.py
+++ b/topicexplorer/prep.py
@@ -277,7 +277,7 @@ def get_closest_bin(c, thresh, reverse=False, counts=None):
     if thresh == 0 and reverse:
         return max(counts) + 1
     elif thresh == 0 and not reverse:
-        return 1
+        return 0
     else:
         # sort counts
         counts = counts[counts.argsort()]

From 7dcde536ce8c8c618ee98249f54f87ec5bb1f022 Mon Sep 17 00:00:00 2001
From: Kirtan Sakariya <kirtansakariya@gmail.com>
Date: Wed, 5 Dec 2018 14:34:38 -0500
Subject: [PATCH 16/21] adding asciimatics install to windows build

---
 appveyor.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/appveyor.yml b/appveyor.yml
index 2e78203b..de8d7fb9 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -17,6 +17,7 @@ install:
   - "%PYTHON%\\python.exe -m conda install -q --yes cython scikit-learn pandas" # for vsm 
   - "%PYTHON%\\python.exe -c \"import nltk; nltk.download('stopwords'); nltk.download('punkt')\""
   - "%PYTHON%\\python.exe -m pip install unittest2 nose wget"
+  - "%PYTHON%\\python.exe -m pip install -e git+https://github.com/peterbrittain/asciimatics.git@fcedb4947933de7e1507ec0dee8ca7a3f466928a#egg=asciimatics"
   - "%PYTHON%\\python.exe -m pip install ."
 
 build: off

From 6d43236ec7c9e6325c6693aba67c4bc768ced144 Mon Sep 17 00:00:00 2001
From: Kirtan Sakariya <kirtansakariya@gmail.com>
Date: Thu, 6 Dec 2018 12:25:34 -0500
Subject: [PATCH 17/21] adding dividers for wizard screens

---
 topicexplorer/prep.py | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py
index 8a5d34e9..88449016 100644
--- a/topicexplorer/prep.py
+++ b/topicexplorer/prep.py
@@ -646,7 +646,7 @@ def _high(self):
         data.highChart.text = chart
         data.highChart._required_height = chart.count('\n') + 1
         data.highStop.text = filtered
-        data.highStop._required_height = line - data.highChart._required_height - 5
+        data.highStop._required_height = line - data.highChart._required_height - 7
         data.highFreqScene.fix()
         raise NextScene("High Freq")
 
@@ -674,7 +674,7 @@ def _popupHigh(selection):
         data.highChart.text = chart
         data.highChart._required_height = chart.count('\n') + 1
         data.highStop.text = filtered
-        data.highStop._required_height = line - data.highChart._required_height - 5
+        data.highStop._required_height = line - data.highChart._required_height - 7
         data.highFreqScene.fix()
         raise NextScene("High Freq")
     
@@ -707,7 +707,7 @@ def _low(self):
         data.lowChart.text = chart
         data.lowChart._required_height = chart.count('\n') + 1
         data.lowStop.text = filtered
-        data.lowStop._required_height = line - data.lowChart._required_height - 5
+        data.lowStop._required_height = line - data.lowChart._required_height - 7
         data.lowFreqScene.fix()
         raise NextScene("Low Freq")
     
@@ -735,7 +735,7 @@ def _popupLow(selection):
         data.lowChart.text = chart
         data.lowChart._required_height = chart.count('\n') + 1
         data.lowStop.text = filtered
-        data.lowStop._required_height = line - data.lowChart._required_height - 5
+        data.lowStop._required_height = line - data.lowChart._required_height - 7
         data.lowFreqScene.fix()
         raise NextScene("Low Freq")
 
@@ -761,12 +761,14 @@ def __init__(self, screen):
         chartLayout = Layout([1])
         self.add_layout(chartLayout)
         chartLayout.add_widget(data.highChart, 0)
+        chartLayout.add_widget(Divider())
         fieldsLayout = Layout([7, 2, 6])
         self.add_layout(fieldsLayout)
         fieldsLayout.add_widget(data.high, 1)
         fieldsLayout.add_widget(data.highPercent, 1)
         stopLayout = Layout([1])
         self.add_layout(stopLayout)
+        stopLayout.add_widget(Divider())
         stopLayout.add_widget(data.highStop)
         layout2 = Layout([1, 1])
         self.add_layout(layout2)
@@ -803,7 +805,7 @@ def _ok(self):
         data.highChart.text = chart
         data.highChart._required_height = chart.count('\n') + 1
         data.highStop.text = filtered
-        data.highStop._required_height = line - data.highChart._required_height - 5
+        data.highStop._required_height = line - data.highChart._required_height - 7
         data.highFreqScene.fix()
         updatePreppedLength()
         raise NextScene("Summary")
@@ -832,7 +834,7 @@ def _popup(selection):
         data.highChart.text = chart
         data.highChart._required_height = chart.count('\n') + 1
         data.highStop.text = filtered
-        data.highStop._required_height = line - data.highChart._required_height - 5
+        data.highStop._required_height = line - data.highChart._required_height - 7
         data.highFreqScene.fix()
         raise NextScene("Summary")
     
@@ -866,7 +868,7 @@ def _change(self):
         data.highChart.text = chart
         data.highChart._required_height = chart.count('\n') + 1
         data.highStop.text = filtered
-        data.highStop._required_height = line - data.highChart._required_height - 5
+        data.highStop._required_height = line - data.highChart._required_height - 7
         data.highFreqScene.fix()
     
     # Handle button click for popup after clicking change
@@ -893,7 +895,7 @@ def _popupChange(selection):
         data.highChart.text = chart
         data.highChart._required_height = chart.count('\n') + 1
         data.highStop.text = filtered
-        data.highStop._required_height = line - data.highChart._required_height - 5
+        data.highStop._required_height = line - data.highChart._required_height - 7
         data.highFreqScene.fix()
 
 # Low frequency scene
@@ -909,12 +911,14 @@ def __init__(self, screen):
         chartLayout = Layout([1])
         self.add_layout(chartLayout)
         chartLayout.add_widget(data.lowChart, 0)
+        chartLayout.add_widget(Divider())
         fieldsLayout = Layout([7, 2, 6])
         self.add_layout(fieldsLayout)
         fieldsLayout.add_widget(data.low, 1)
         fieldsLayout.add_widget(data.lowPercent, 1)
         stopLayout = Layout([1])
         self.add_layout(stopLayout)
+        stopLayout.add_widget(Divider())
         stopLayout.add_widget(data.lowStop)
         layout2 = Layout([1, 1])
         self.add_layout(layout2)
@@ -951,7 +955,7 @@ def _ok(self):
         data.lowChart.text = chart
         data.lowChart._required_height = chart.count('\n') + 1
         data.lowStop.text = filtered
-        data.lowStop._required_height = line - data.lowChart._required_height - 5
+        data.lowStop._required_height = line - data.lowChart._required_height - 7
         data.lowFreqScene.fix()
         updatePreppedLength()
         raise NextScene("Summary")
@@ -981,7 +985,7 @@ def _popup(selection):
         data.lowChart.text = chart
         data.lowChart._required_height = chart.count('\n') + 1
         data.lowStop.text = filtered
-        data.lowStop._required_height = line - data.lowChart._required_height - 5
+        data.lowStop._required_height = line - data.lowChart._required_height - 7
         data.lowFreqScene.fix()
         raise NextScene("Summary")
     
@@ -1014,7 +1018,7 @@ def _change(self):
         data.lowChart.text = chart
         data.lowChart._required_height = chart.count('\n') + 1
         data.lowStop.text = filtered
-        data.lowStop._required_height = line - data.lowChart._required_height - 5
+        data.lowStop._required_height = line - data.lowChart._required_height - 7
         data.lowFreqScene.fix()
     
     # Handle button click for popup after clicking change
@@ -1041,7 +1045,7 @@ def _popupChange(selection):
         data.lowChart.text = chart
         data.lowChart._required_height = chart.count('\n') + 1
         data.lowStop.text = filtered
-        data.lowStop._required_height = line - data.lowChart._required_height - 5
+        data.lowStop._required_height = line - data.lowChart._required_height - 7
         data.lowFreqScene.fix()
 
 # Taken from: https://github.com/peterbrittain/asciimatics/blob/master/samples/treeview.py

From 7b7ede8cbeb770e0d7185c9f38c200ae5d9b4a5f Mon Sep 17 00:00:00 2001
From: Kirtan Sakariya <kirtansakariya@gmail.com>
Date: Tue, 18 Dec 2018 16:28:26 -0500
Subject: [PATCH 18/21] raising ValueError now

---
 tests/test_prep.py    |  39 +++++------
 topicexplorer/prep.py | 156 ++++++++++++++++++------------------------
 2 files changed, 82 insertions(+), 113 deletions(-)

diff --git a/tests/test_prep.py b/tests/test_prep.py
index 22edf65f..da3ca61a 100644
--- a/tests/test_prep.py
+++ b/tests/test_prep.py
@@ -60,52 +60,47 @@ def test_get_candidate_words():
 def test_get_high_filter(input_mock):
     # Test with high filter of 3
     items, counts = topicexplorer.prep.get_corpus_counts(corpus)
-    candidates, filtered, valid = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=3)
+    candidates, filtered = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=3)
     assert len(corpus.words) - len(candidates) == 3
     assert candidates == ['I']
-    assert valid == True
     
     # Test with high filter of 0
-    candidates, filtered, valid = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=0)
+    candidates, filtered = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=0)
     assert len(corpus.words) - len(candidates) == 4
     assert candidates == []
-    assert valid == True
     
     # Test with high filter of 1, should return invalid
-    candidates, filtered, valid = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=1)
-    assert len(corpus.words) - len(candidates) == 0
-    assert candidates == ['I', 'came', 'conquered', 'saw']
-    assert valid == False
+    with unittest.TestCase.assertRaises(unittest.TestCase, ValueError):
+        candidates, filtered = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=1)
+        assert len(corpus.words) - len(candidates) == 0
+        assert candidates == ['I', 'came', 'conquered', 'saw']
 
     # Test with high filter of 100
-    candidates, filtered, valid = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=100)
+    candidates, filtered = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=100)
     assert len(corpus.words) - len(candidates) == 4
     assert candidates == []
-    assert valid == True
 
 @patch('topicexplorer.prep.input')
 def test_get_low_filter(input_mock):
     # Test with low filter of 1
     items, counts = topicexplorer.prep.get_corpus_counts(corpus)
-    candidates, filtered, valid = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=1)
+    candidates, filtered = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=1)
     assert len(corpus.words) - len(candidates) == 1 
     assert all(w in candidates for w in ['came', 'saw', 'conquered'])
-    assert valid == True
    
     # Test with low filter of 3
-    candidates, filtered, valid = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=3)
-    assert len(corpus.words) - len(candidates) == 0
-    assert all(w in candidates for w in ['came', 'saw', 'conquered', 'I'])
-    assert valid == False
+    with unittest.TestCase.assertRaises(unittest.TestCase, ValueError):
+        candidates, filtered = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=3)
+        assert len(corpus.words) - len(candidates) == 0
+        assert all(w in candidates for w in ['came', 'saw', 'conquered', 'I'])  
 
     # Test with low filter of 0
-    candidates, filtered, valid = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=0)
+    candidates, filtered = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=0)
     assert len(corpus.words) - len(candidates) == 4
     assert all(w in candidates for w in [])
-    assert valid == True
 
     # Test with low filter of 100
-    candidates, filtered, valid = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=100)
-    assert len(corpus.words) - len(candidates) == 0
-    assert all(w in candidates for w in ['came', 'saw', 'conquered', 'I'])
-    assert valid == False
+    with unittest.TestCase.assertRaises(unittest.TestCase, ValueError):
+        candidates, filtered = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=100)
+        assert len(corpus.words) - len(candidates) == 0
+        assert all(w in candidates for w in ['came', 'saw', 'conquered', 'I'])
diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py
index 88449016..e28c729f 100644
--- a/topicexplorer/prep.py
+++ b/topicexplorer/prep.py
@@ -324,40 +324,22 @@ def get_high_filter_chart(c, words=None, items=None, counts=None, num=None):
 def get_high_filter_stops(c, words=None, items=None, counts=None, num=None):
     import numpy as np
     input_filter = num
-    valid = True
-    f = open("test.txt", "w+")
-    f.write("c" + "\n")
-    f.write(str(c) + "\n")
-    f.write("c words" + "\n")
-    f.write(str(c.words) + "\n")
-    f.write("words" + "\n")
-    f.write(str(words) + "\n")
-    f.write("items" + "\n")
-    f.write(str(items) + "\n")
-    f.write("counts" + "\n")
-    f.write(str(counts) + "\n")
-    f.write("num" + "\n")
-    f.write(str(num) + "\n")
-    f.write("lengths" + "\n")
-    f.write(str(len(c.words)) + " " + str(len(items)) + " " + str(len(counts)))
-    try:
-        candidates = get_candidate_words(c, input_filter, words=words, items=items, counts=counts)
-        places = np.in1d(c.words, candidates)
-        places = dict(zip(candidates, np.where(places)[0]))
-        candidates = sorted(candidates, key=lambda x: counts[places[x]], reverse=True)
-        filtered_counts = counts[get_mask(c, words)]
 
-        filtered = ""
-        filtered += "Filter will remove " + str(filtered_counts[filtered_counts >= input_filter].sum())
-        filtered += " occurrences " + "of these " + str(len(filtered_counts[filtered_counts >= input_filter])) + " words: "
-        filtered += u' '.join(candidates)
+    candidates = get_candidate_words(c, input_filter, words=words, items=items, counts=counts)
+    places = np.in1d(c.words, candidates)
+    places = dict(zip(candidates, np.where(places)[0]))
+    candidates = sorted(candidates, key=lambda x: counts[places[x]], reverse=True)
+    filtered_counts = counts[get_mask(c, words)]
 
-        if len(candidates) == len(c.words):
-            valid = False
+    filtered = ""
+    filtered += "Filter will remove " + str(filtered_counts[filtered_counts >= input_filter].sum())
+    filtered += " occurrences " + "of these " + str(len(filtered_counts[filtered_counts >= input_filter])) + " words: "
+    filtered += u' '.join(candidates)
 
-    except ValueError:
-        input_filter = 0
-    return (candidates, filtered, valid)
+    if len(candidates) == len(c.words):
+        raise ValueError
+
+    return (candidates, filtered)
 
 
 def get_low_filter_chart(c, words=None, items=None, counts=None, num=None):
@@ -396,29 +378,21 @@ def get_low_filter_chart(c, words=None, items=None, counts=None, num=None):
 def get_low_filter_stops(c, words=None, items=None, counts=None, num=None):
     import numpy as np
     input_filter = num
-    valid = True
-    try:
-        candidates = get_candidate_words(c, -input_filter, words=words, items=items, counts=counts)
-        places = np.in1d(c.words, candidates)
-        places = dict(zip(candidates, np.where(places)[0]))
-        candidates = sorted(candidates, key=lambda x: counts[places[x]])
-        filtered_counts = counts[get_mask(c, words)]
-
-        filtered = ""
-        filtered += "Filter will remove " + str(filtered_counts[filtered_counts <= input_filter].sum()) + " tokens"
-        filtered += "of these " + str(len(filtered_counts[filtered_counts <= input_filter])) + " words: "
-        filtered += u' '.join(candidates)
-
+    candidates = get_candidate_words(c, -input_filter, words=words, items=items, counts=counts)
+    places = np.in1d(c.words, candidates)
+    places = dict(zip(candidates, np.where(places)[0]))
+    candidates = sorted(candidates, key=lambda x: counts[places[x]])
+    filtered_counts = counts[get_mask(c, words)]
 
-        if len(candidates) == len(c.words):
-            valid = False
-            # filtered += "\n\nChoice of" + str(input_filter) + "will remove ALL words from the corpus."
-            # filtered += "Please choose a different filter."
+    filtered = ""
+    filtered += "Filter will remove " + str(filtered_counts[filtered_counts <= input_filter].sum()) + " tokens"
+    filtered += "of these " + str(len(filtered_counts[filtered_counts <= input_filter])) + " words: "
+    filtered += u' '.join(candidates)
 
-    except ValueError:
-        input_filter = 0
+    if len(candidates) == len(c.words):
+        raise ValueError
 
-    return (candidates, filtered, valid)
+    return (candidates, filtered)
 
 # Stores all of the variables for the labels
 class PrepData(Frame):
@@ -568,16 +542,16 @@ def _prep(self):
                             len(data.fileCandidates), 's' if len(data.fileCandidates) > 1 else ''))
         else:
             data.fileCandidates = []
-        data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
-                                                                num=high)
-        # Checks to see if the value entered with filter the whole corpus out
-        if not valid:
+        try:
+            data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+                                                                    num=high)
+        except ValueError:
             self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"]))
             return
-        data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
-                                                                num=low)
-        # Checks to see if the value entered with filter the whole corpus out
-        if not valid:
+        try:
+            data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+                                                                    num=low)
+        except ValueError:
             self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"]))
             return
         data.stopCandidates = get_small_words(data.c, minNum)
@@ -631,10 +605,10 @@ def _high(self):
                 self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1]))
             return
         
-        data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
-                                                                num=high)
-        # Checks to see if the value entered with filter the whole corpus out
-        if not valid:
+        try:
+            data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+                                                                    num=high)
+        except ValueError:
             self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"]))
             return
         
@@ -664,7 +638,7 @@ def _popupHigh(selection):
             confirm()
             return
         high = validate(data.summaryHigh, data.summaryHighPercent, data.high, data.highPercent, "high", False)
-        data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=high)
         temp = deepcopy(data.stoplist)
         temp.update(data.highCandidates)
@@ -693,10 +667,10 @@ def _low(self):
                 self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1]))
             return
             
-        data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
-                                                                num=low)
-        # Checks to see if the value entered with filter the whole corpus out
-        if not valid:
+        try:
+            data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+                                                                    num=low)
+        except ValueError:
             self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"]))
             return
         (columns, line) = os.get_terminal_size()
@@ -725,7 +699,7 @@ def _popupLow(selection):
             confirm()
             return
         low = validate(data.summaryLow, data.summaryLowPercent, data.low, data.lowPercent, "low", True)
-        data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=low)
         temp = deepcopy(data.stoplist)
         temp.update(data.lowCandidates)
@@ -791,10 +765,10 @@ def _ok(self):
                 self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1]))
             return
 
-        data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
-                                                                num=high)
-        # Checks to see if the value entered with filter the whole corpus out
-        if not valid:
+        try:
+            data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+                                                                    num=high)
+        except ValueError:
             self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"]))
             return
         (columns, line) = os.get_terminal_size()
@@ -824,7 +798,7 @@ def _popup(selection):
             confirm()
             return
         high = validate(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False)
-        data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=high)
         (columns, line) = os.get_terminal_size()
         temp = deepcopy(data.stoplist)
@@ -853,10 +827,10 @@ def _change(self):
                 self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1]))
             return
 
-        data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
-                                                                num=high)
-        # Checks to see if the value entered with filter the whole corpus out
-        if not valid:
+        try:
+            data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+                                                                    num=high)
+        except ValueError:
             self._scene.add_effect(PopUpDialog(self._screen, "Current filter for high will remove all values, please choose a different filter", ["OK"]))
             return
         
@@ -885,7 +859,7 @@ def _popupChange(selection):
             confirm()
             return
         high = validate(data.high, data.highPercent, data.summaryHigh, data.summaryHighPercent, "high", False)
-        data.highCandidates, filtered, valid = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        data.highCandidates, filtered = get_high_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=high)
         (columns, line) = os.get_terminal_size()
         temp = deepcopy(data.stoplist)
@@ -940,11 +914,11 @@ def _ok(self):
             else:
                 self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1]))
             return
-            
-        data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
-                                                                num=low)
-        # Checks to see if the value entered with filter the whole corpus out
-        if not valid:
+
+        try:    
+            data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+                                                                    num=low)
+        except ValueError:
             self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"]))
             return
         (columns, line) = os.get_terminal_size()
@@ -975,7 +949,7 @@ def _popup(selection):
             return
         low = validate(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True)
 
-        data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=low)
         (columns, line) = os.get_terminal_size()
         temp = deepcopy(data.stoplist)
@@ -1003,11 +977,11 @@ def _change(self):
             else:
                 self._scene.add_effect(PopUpDialog(self._screen, e.args[0], e.args[1]))
             return
-            
-        data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
-                                                                num=low)
-        # Checks to see if the value entered with filter the whole corpus out
-        if not valid:
+
+        try:   
+            data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+                                                                    num=low)
+        except ValueError:
             self._scene.add_effect(PopUpDialog(self._screen, "Current filter for low will remove all values, please choose a different filter", ["OK"]))
             return
         (columns, line) = os.get_terminal_size()
@@ -1035,7 +1009,7 @@ def _popupChange(selection):
             confirm()
             return
         low = validate(data.low, data.lowPercent, data.summaryLow, data.summaryLowPercent, "low", True)
-        data.lowCandidates, filtered, valid = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
+        data.lowCandidates, filtered = get_low_filter_stops(data.c, words=data.stoplist, items=data.items, counts=data.counts,
                                                                 num=low)
         (columns, line) = os.get_terminal_size()
         temp = deepcopy(data.stoplist)

From af695edae4364cfd4f8839db0566fbd4b7cea57d Mon Sep 17 00:00:00 2001
From: Kirtan Sakariya <kirtansakariya@gmail.com>
Date: Sun, 23 Dec 2018 14:14:30 -0600
Subject: [PATCH 19/21] using instance of unittest.TestCase

---
 tests/test_prep.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/test_prep.py b/tests/test_prep.py
index da3ca61a..04aff21e 100644
--- a/tests/test_prep.py
+++ b/tests/test_prep.py
@@ -56,8 +56,7 @@ def test_get_candidate_words():
         corpus, -low_freq, words=low_words)
     assert len(mask_words) == 0
 
-@patch('topicexplorer.prep.input')
-def test_get_high_filter(input_mock):
+def test_get_high_filter():
     # Test with high filter of 3
     items, counts = topicexplorer.prep.get_corpus_counts(corpus)
     candidates, filtered = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=3)
@@ -68,9 +67,10 @@ def test_get_high_filter(input_mock):
     candidates, filtered = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=0)
     assert len(corpus.words) - len(candidates) == 4
     assert candidates == []
-    
+
     # Test with high filter of 1, should return invalid
-    with unittest.TestCase.assertRaises(unittest.TestCase, ValueError):
+    t = unittest.TestCase('run')
+    with t.assertRaises(ValueError):
         candidates, filtered = topicexplorer.prep.get_high_filter_stops(corpus, words=set(), items=items, counts=counts, num=1)
         assert len(corpus.words) - len(candidates) == 0
         assert candidates == ['I', 'came', 'conquered', 'saw']
@@ -80,8 +80,7 @@ def test_get_high_filter(input_mock):
     assert len(corpus.words) - len(candidates) == 4
     assert candidates == []
 
-@patch('topicexplorer.prep.input')
-def test_get_low_filter(input_mock):
+def test_get_low_filter():
     # Test with low filter of 1
     items, counts = topicexplorer.prep.get_corpus_counts(corpus)
     candidates, filtered = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=1)
@@ -89,7 +88,8 @@ def test_get_low_filter(input_mock):
     assert all(w in candidates for w in ['came', 'saw', 'conquered'])
    
     # Test with low filter of 3
-    with unittest.TestCase.assertRaises(unittest.TestCase, ValueError):
+    t = unittest.TestCase('run')
+    with t.assertRaises(ValueError):
         candidates, filtered = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=3)
         assert len(corpus.words) - len(candidates) == 0
         assert all(w in candidates for w in ['came', 'saw', 'conquered', 'I'])  
@@ -100,7 +100,7 @@ def test_get_low_filter(input_mock):
     assert all(w in candidates for w in [])
 
     # Test with low filter of 100
-    with unittest.TestCase.assertRaises(unittest.TestCase, ValueError):
+    with t.assertRaises(ValueError):
         candidates, filtered = topicexplorer.prep.get_low_filter_stops(corpus, words=set(), items=items, counts=counts, num=100)
         assert len(corpus.words) - len(candidates) == 0
         assert all(w in candidates for w in ['came', 'saw', 'conquered', 'I'])

From 7751cfeda8b2c77f253bbc45ca914589e9c04acb Mon Sep 17 00:00:00 2001
From: Jaimie Murdock <jaimie.murdock@gmail.com>
Date: Tue, 28 Apr 2020 22:57:23 -0600
Subject: [PATCH 20/21] updating asciimatics reqs

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 61fbf24e..a20d43d2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
--e git+https://github.com/peterbrittain/asciimatics.git@fcedb4947933de7e1507ec0dee8ca7a3f466928a#egg=asciimatics
+asciimatics>=1.11.0
 bottle>=0.12.0
 brewer2mpl>=1.4.0,<1.5.0
 decorator>=4.0.5

From d470a71aed3f1895a96c8696398099c51a92f2fc Mon Sep 17 00:00:00 2001
From: Kirtan Sakariya <kirtansakariya@gmail.com>
Date: Tue, 5 May 2020 16:17:58 -0400
Subject: [PATCH 21/21] interrupts enabled

---
 topicexplorer/prep.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py
index 9af80eb2..283a3be8 100644
--- a/topicexplorer/prep.py
+++ b/topicexplorer/prep.py
@@ -1301,7 +1301,7 @@ def gui(screen, scene):
     last_scene = None
     while not args.quiet:
         try:
-            Screen.wrapper(gui, catch_interrupt=True, arguments=[last_scene])
+            Screen.wrapper(gui, catch_interrupt=False, arguments=[last_scene])
             break
             # sys.exit(0)
         except ResizeScreenError as e: