diff --git a/README.md b/README.md
index c2425c3..3ef2435 100644
--- a/README.md
+++ b/README.md
@@ -8,18 +8,17 @@ HanLP Analyzer for ElasticSearch
此分词器基于[HanLP](http://www.hankcs.com/nlp),提供了HanLP中大部分的分词方式。
-🚩适配Elasticsearch 6.5.2,增加了远程词典的功能,功能类似于medcl大神的[IK分词器插件](https://github.com/medcl/elasticsearch-analysis-ik),因为hanlp有词性的配置,所以远程自定义词典配置稍有不同,需要配置词性和频次。
+🚩适配Elasticsearch 6.5.3,增加了远程词典的功能,功能类似于medcl大神的[IK分词器插件](https://github.com/medcl/elasticsearch-analysis-ik),因为hanlp有词性的配置,所以远程自定义词典配置稍有不同,需要配置词性和频次。
----------
版本对应
----------
-### 1. 下载安装ES对应Plugin Release版本
-
| Plugin version | Elastic version |
| :------------- | :-------------- |
| master | 6.x |
+| 6.5.3 | 6.5.3 |
| 6.5.2 | 6.5.2 |
| 6.5.1 | 6.5.1 |
| 6.5.0 | 6.5.0 |
@@ -33,6 +32,11 @@ HanLP Analyzer for ElasticSearch
| 6.2.2 | 6.2.2 |
| 5.2.2 | 5.2.2 |
+安装步骤
+----------
+
+### 1. 下载安装ES对应Plugin Release版本
+
安装方式:
方式一
@@ -49,7 +53,7 @@ HanLP Analyzer for ElasticSearch
a. 使用elasticsearch插件脚本安装command如下:
- `./bin/elasticsearch-plugin install https://github.com/KennFalcon/elasticsearch-analysis-hanlp/releases/download/v6.5.2/elasticsearch-analysis-hanlp-6.5.2.zip`
+ `./bin/elasticsearch-plugin install https://github.com/KennFalcon/elasticsearch-analysis-hanlp/releases/download/v6.5.3/elasticsearch-analysis-hanlp-6.5.3.zip`
### 2. 安装数据包
@@ -157,6 +161,8 @@ POST http://localhost:9200/twitter2/_analyze
远程词典配置
----------
+配置文件为*ES_HOME*/config/analysis-hanlp/hanlp-remote.xml
+
```xml
HanLP Analyzer 扩展配置
@@ -169,7 +175,7 @@ POST http://localhost:9200/twitter2/_analyze
```
-### 远程扩展字典
+### 1. 远程扩展字典
其中words_location为URL或者URL+" "+词性,如:
@@ -181,7 +187,7 @@ POST http://localhost:9200/twitter2/_analyze
第二个样例,配置词典URL,同时配置该词典的默认词性nt,当然词典内部同样遵循[单词] [词性A] [A的频次] [词性B] [B的频次] ... 如果不配置词性,则采用默认词性nt。
-### 远程扩展停止词字典
+### 2. 远程扩展停止词字典
其中stop_words_location为URL,如:
diff --git a/pom.xml b/pom.xml
index d917b39..e7f93d9 100644
--- a/pom.xml
+++ b/pom.xml
@@ -10,7 +10,7 @@
HanLP Analyzer for ElasticSearch
- 6.5.2
+ 6.5.3
1.8
UTF-8
${project.basedir}/src/main/assemblies/plugin.xml
diff --git a/src/main/java/com/hankcs/lucene/BaseHanLPAnalyzer.java b/src/main/java/com/hankcs/lucene/BaseHanLPAnalyzer.java
index b8749df..f8d71bf 100644
--- a/src/main/java/com/hankcs/lucene/BaseHanLPAnalyzer.java
+++ b/src/main/java/com/hankcs/lucene/BaseHanLPAnalyzer.java
@@ -26,7 +26,7 @@ abstract class BaseHanLPAnalyzer extends Analyzer {
* @param configuration 配置信息
* @return 新segment
*/
- Segment buildSegment(Segment segment, Configuration configuration) {
+ protected Segment buildSegment(Segment segment, Configuration configuration) {
segment.enableIndexMode(configuration.isEnableIndexMode())
.enableNumberQuantifierRecognize(configuration.isEnableNumberQuantifierRecognize())
.enableCustomDictionary(configuration.isEnableCustomDictionary())
@@ -38,13 +38,11 @@ Segment buildSegment(Segment segment, Configuration configuration) {
.enablePartOfSpeechTagging(configuration.isEnablePartOfSpeechTagging());
if (configuration.isEnableTraditionalChineseMode()) {
segment.enableIndexMode(false);
- Segment inner = segment;
- TraditionalChineseTokenizer.SEGMENT = inner;
- segment = new Segment() {
+ TraditionalChineseTokenizer.SEGMENT = segment;
+ return new Segment() {
@Override
protected List segSentence(char[] sentence) {
- List termList = TraditionalChineseTokenizer.segment(new String(sentence));
- return termList;
+ return TraditionalChineseTokenizer.segment(new String(sentence));
}
};
}
@@ -57,7 +55,7 @@ protected List segSentence(char[] sentence) {
* @param configuration 配置信息
* @return Tokenizer
*/
- Tokenizer buildBaseTokenizer(Segment segment, Configuration configuration) {
+ protected Tokenizer buildBaseTokenizer(Segment segment, Configuration configuration) {
return AccessController.doPrivileged((PrivilegedAction) () -> new HanLPTokenizer(segment, configuration));
}
}
diff --git a/src/main/java/com/hankcs/lucene/HanLPDijkstraAnalyzer.java b/src/main/java/com/hankcs/lucene/HanLPDijkstraAnalyzer.java
index 34ad03a..72535b4 100644
--- a/src/main/java/com/hankcs/lucene/HanLPDijkstraAnalyzer.java
+++ b/src/main/java/com/hankcs/lucene/HanLPDijkstraAnalyzer.java
@@ -4,10 +4,6 @@
import com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment;
import com.hankcs.hanlp.seg.Segment;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Tokenizer;
-
-import java.security.AccessController;
-import java.security.PrivilegedAction;
/**
* @project: elasticsearch-analysis-hanlp
diff --git a/src/main/java/com/hankcs/lucene/HanLPNLPAnalyzer.java b/src/main/java/com/hankcs/lucene/HanLPNLPAnalyzer.java
index be48637..77045ca 100644
--- a/src/main/java/com/hankcs/lucene/HanLPNLPAnalyzer.java
+++ b/src/main/java/com/hankcs/lucene/HanLPNLPAnalyzer.java
@@ -4,10 +4,6 @@
import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.seg.Segment;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Tokenizer;
-
-import java.security.AccessController;
-import java.security.PrivilegedAction;
/**
* @project: elasticsearch-analysis-hanlp