Skip to content

Commit

Permalink
Change minimal length word
Browse files Browse the repository at this point in the history
  • Loading branch information
tabuna committed May 10, 2024
1 parent 2f2edfe commit 5a89eaf
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/Classifier.php
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ public function tokenize(string $string): Collection

return Str::of($string)
->lower()
->matchAll('/[[:alpha:]]+/u');
->matchAll('/[[:alpha:]]+/u')
->filter(fn (string $word) => Str::length($word) > 3);
}

/**
Expand Down
13 changes: 13 additions & 0 deletions tests/ClassifierTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
namespace AssistedMindfulness\NaiveBayes\Tests;

use AssistedMindfulness\NaiveBayes\Classifier;
use Illuminate\Support\Str;
use PHPUnit\Framework\TestCase;

class ClassifierTest extends TestCase
Expand All @@ -13,6 +14,12 @@ public function testTokenizeClassifier(): void
{
$classifier = new Classifier();

$classifier->setTokenizer(function (string $string) {
return Str::of($string)
->lower()
->matchAll('/[[:alpha:]]+/u');
});

$this->assertEquals(
['hello', 'how', 'are', 'you'],
$classifier->tokenize('Hello, how are you?')->toArray()
Expand Down Expand Up @@ -198,6 +205,12 @@ public function testSimpleSpam(): void
{
$classifier = new Classifier();

$classifier->setTokenizer(function (string $string) {
return Str::of($string)
->lower()
->matchAll('/[[:alpha:]]+/u');
});

$classifier
->learn('Learn how to grow your business with these proven strategies', 'ham')
->learn('Unlock the secrets of successful investing in our latest guide', 'ham')
Expand Down

0 comments on commit 5a89eaf

Please sign in to comment.