diff --git a/GNUmakefile b/GNUmakefile index 7fcff3d8..5ce3b207 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -65,7 +65,7 @@ tarball_ext = .tar.gz # * KOI8_R_algorithms include algorithms.mk -other_algorithms = german2 kraaij_pohlmann lovins +other_algorithms = kraaij_pohlmann lovins all_algorithms = $(libstemmer_algorithms) $(other_algorithms) diff --git a/algorithms/german.sbl b/algorithms/german.sbl index 61f24ef9..cd303b15 100644 --- a/algorithms/german.sbl +++ b/algorithms/german.sbl @@ -32,16 +32,22 @@ define st_ending s_ending - 'r' define prelude as ( - test repeat ( - ( - ['{ss}'] <- 'ss' - ) or next - ) - - repeat goto ( + test repeat goto ( v [('u'] v <- 'U') or ('y'] v <- 'Y') ) + + repeat ( + [substring] among( + '{ss}' (<- 'ss') + 'ae' (<- '{a"}') + 'oe' (<- '{o"}') + 'ue' (<- '{u"}') + 'qu' () + '' (next) + ) + ) + ) define mark_regions as ( diff --git a/algorithms/german2.sbl b/algorithms/german2.sbl deleted file mode 100644 index cd303b15..00000000 --- a/algorithms/german2.sbl +++ /dev/null @@ -1,145 +0,0 @@ - -/* - Extra rule for -nisse ending added 11 Dec 2009 -*/ - -routines ( - prelude postlude - mark_regions - R1 R2 - standard_suffix -) - -externals ( stem ) - -integers ( p1 p2 x ) - -groupings ( v s_ending st_ending ) - -stringescapes {} - -/* special characters */ - -stringdef a" '{U+00E4}' -stringdef o" '{U+00F6}' -stringdef u" '{U+00FC}' -stringdef ss '{U+00DF}' - -define v 'aeiouy{a"}{o"}{u"}' - -define s_ending 'bdfghklmnrt' -define st_ending s_ending - 'r' - -define prelude as ( - - test repeat goto ( - v [('u'] v <- 'U') or - ('y'] v <- 'Y') - ) - - repeat ( - [substring] among( - '{ss}' (<- 'ss') - 'ae' (<- '{a"}') - 'oe' (<- '{o"}') - 'ue' (<- '{u"}') - 'qu' () - '' (next) - ) - ) - -) - -define mark_regions as ( - - $p1 = limit - $p2 = limit - - test(hop 3 setmark x) - - gopast v gopast non-v setmark p1 - try($p1 < x $p1 = x) // at least 3 - gopast v gopast non-v setmark p2 - -) - -define postlude as repeat ( - - [substring] among( - 'Y' (<- 'y') - 'U' (<- 'u') - '{a"}' (<- 'a') - '{o"}' (<- 'o') - '{u"}' (<- 'u') - '' (next) - ) - -) - -backwardmode ( - - define R1 as $p1 <= cursor - define R2 as $p2 <= cursor - - define standard_suffix as ( - do ( - [substring] R1 among( - 'em' 'ern' 'er' - ( delete - ) - 'e' 'en' 'es' - ( delete - try (['s'] 'nis' delete) - ) - 's' - ( s_ending delete - ) - ) - ) - do ( - [substring] R1 among( - 'en' 'er' 'est' - ( delete - ) - 'st' - ( st_ending hop 3 delete - ) - ) - ) - do ( - [substring] R2 among( - 'end' 'ung' - ( delete - try (['ig'] not 'e' R2 delete) - ) - 'ig' 'ik' 'isch' - ( not 'e' delete - ) - 'lich' 'heit' - ( delete - try ( - ['er' or 'en'] R1 delete - ) - ) - 'keit' - ( delete - try ( - [substring] R2 among( - 'lich' 'ig' - ( delete - ) - ) - ) - ) - ) - ) - ) -) - -define stem as ( - do prelude - do mark_regions - backwards - do standard_suffix - do postlude -) diff --git a/libstemmer/modules.txt b/libstemmer/modules.txt index 61e23417..cd36a219 100644 --- a/libstemmer/modules.txt +++ b/libstemmer/modules.txt @@ -52,9 +52,6 @@ porter UTF_8,ISO_8859_1 porter english # intended for general use, and use of them is is not fully supported. These # algorithms are: # -# german2 - This is a slight modification of the german stemmer. -#german2 UTF_8,ISO_8859_1 german2 german -# # kraaij_pohlmann - This is a different dutch stemmer. #kraaij_pohlmann UTF_8,ISO_8859_1 kraaij_pohlmann dutch #