diff --git a/lib/naive_bayes.js b/lib/naive_bayes.js index a258db1..757e91e 100644 --- a/lib/naive_bayes.js +++ b/lib/naive_bayes.js @@ -177,13 +177,36 @@ Naivebayes.prototype.categorize = function (text) { , maxProbability = -Infinity , chosenCategory = null + var probabilities = self.probabilities(text) + + //iterate thru our categories to find the one with max probability for this text + probabilities + .forEach(function (categoryProbability) { + if (categoryProbability.value > maxProbability) { + maxProbability = categoryProbability.value + chosenCategory = categoryProbability.category + } + }) + + return chosenCategory; +} + +/** + * Determine category probabilities for `text`. + * + * @param {String} text + * @return {Array} probabilities + */ +Naivebayes.prototype.probabilities = function(text) { + var self = this; + var tokens = self.tokenizer(text) var frequencyTable = self.frequencyTable(tokens) - //iterate thru our categories to find the one with max probability for this text - Object + //iterate thru our categories to calculate the probability for this text + return Object .keys(self.categories) - .forEach(function (category) { + .map(function (category) { //start by calculating the overall probability of this category //=> out of all documents we've ever looked at, how many were @@ -206,13 +229,11 @@ Naivebayes.prototype.categorize = function (text) { logProbability += frequencyInText * Math.log(tokenProbability) }) - if (logProbability > maxProbability) { - maxProbability = logProbability - chosenCategory = category + return { + category: category, + value: logProbability } }) - - return chosenCategory } /** diff --git a/readme.md b/readme.md index f268a5b..1daa933 100644 --- a/readme.md +++ b/readme.md @@ -70,6 +70,10 @@ Teach your classifier what `category` the `text` belongs to. The more you teach Returns the `category` it thinks `text` belongs to. Its judgement is based on what you have taught it with **.learn()**. +###`classifier.probabilities(text)` + +Returns an array of `{ value, category }` objects with probability calculated for each category. Its judgement is based on what you have taught it with **.learn()**. + ###`classifier.toJson()` Returns the JSON representation of a classifier.