From b191494dcfcf2b5d1e3e0d8098f317e3f93df659 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Maciejewski?= Date: Mon, 25 Jul 2016 12:58:58 +0200 Subject: [PATCH 1/3] refactor probability calculation for categories into separate method --- lib/naive_bayes.js | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/lib/naive_bayes.js b/lib/naive_bayes.js index a258db1..818a4d5 100644 --- a/lib/naive_bayes.js +++ b/lib/naive_bayes.js @@ -177,13 +177,34 @@ Naivebayes.prototype.categorize = function (text) { , maxProbability = -Infinity , chosenCategory = null + var probabilities = self.probabilities(text) + + //iterate thru our categories to find the one with max probability for this text + probabilities + .forEach(function (categoryProbability) { + if (categoryProbability.value > maxProbability) { + maxProbability = categoryProbability.value + chosenCategory = categoryProbability.category + } + }) +} + +/** + * Determine category probabilities for `text`. + * + * @param {String} text + * @return {Array} probabilities + */ +Naivebayes.prototype.probabilities = function(text) { + var self = this; + var tokens = self.tokenizer(text) var frequencyTable = self.frequencyTable(tokens) - //iterate thru our categories to find the one with max probability for this text - Object + //iterate thru our categories to calculate the probability for this text + return Object .keys(self.categories) - .forEach(function (category) { + .map(function (category) { //start by calculating the overall probability of this category //=> out of all documents we've ever looked at, how many were @@ -206,13 +227,11 @@ Naivebayes.prototype.categorize = function (text) { logProbability += frequencyInText * Math.log(tokenProbability) }) - if (logProbability > maxProbability) { - maxProbability = logProbability - chosenCategory = category + return { + category: category, + value: logProbability } }) - - return chosenCategory } /** From a10da46067fbd05a57e7a0f233d76662a6a9d8ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Maciejewski?= Date: Mon, 25 Jul 2016 13:01:19 +0200 Subject: [PATCH 2/3] add documentation for probabilities method --- readme.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/readme.md b/readme.md index f268a5b..1daa933 100644 --- a/readme.md +++ b/readme.md @@ -70,6 +70,10 @@ Teach your classifier what `category` the `text` belongs to. The more you teach Returns the `category` it thinks `text` belongs to. Its judgement is based on what you have taught it with **.learn()**. +###`classifier.probabilities(text)` + +Returns an array of `{ value, category }` objects with probability calculated for each category. Its judgement is based on what you have taught it with **.learn()**. + ###`classifier.toJson()` Returns the JSON representation of a classifier. From 1ce6d3bf326beaa0e68ec654bbbc4ea76696b9be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Maciejewski?= Date: Mon, 25 Jul 2016 13:22:44 +0200 Subject: [PATCH 3/3] bugfix --- lib/naive_bayes.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/naive_bayes.js b/lib/naive_bayes.js index 818a4d5..757e91e 100644 --- a/lib/naive_bayes.js +++ b/lib/naive_bayes.js @@ -187,6 +187,8 @@ Naivebayes.prototype.categorize = function (text) { chosenCategory = categoryProbability.category } }) + + return chosenCategory; } /**