Skip to content

Commit

Permalink
refactor : Decrease Title Weight and fix wordFrequencies counting
Browse files Browse the repository at this point in the history
  • Loading branch information
K-Diger committed Mar 4, 2024
1 parent da13ca8 commit e208c6d
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,12 @@ class LuceneAnalyzerKeywordExtractor : KeywordExtractor {
override fun extractKeyword(title: String, content: String): String {
val titleFrequencies = calculateWordFrequencies(title, TITLE_WEIGHT)
val contentFrequencies = calculateWordFrequencies(content, CONTENT_WEIGHT)
val wordFrequencies = titleFrequencies + contentFrequencies
val wordFrequencies = titleFrequencies.toMutableMap()

contentFrequencies.forEach { (key, value) ->
wordFrequencies[key] = wordFrequencies.getOrDefault(key, DEFAULT_FREQUENCY) + value
}

return formatResult(wordFrequencies)
}

Expand Down Expand Up @@ -50,7 +55,7 @@ class LuceneAnalyzerKeywordExtractor : KeywordExtractor {

companion object {
private const val KEYWORD_COUNT = 5
private const val TITLE_WEIGHT = 2.0
private const val TITLE_WEIGHT = 1.5
private const val CONTENT_WEIGHT = 1.0
private const val DEFAULT_FREQUENCY = 0
private const val TOKEN_STREAM_FIELD_NAME_TYPE = "text"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class KeywordExtractorTest(

@Test
fun execute() {
val title = "대구 수성알파시티에 2031년까지 1천개 기업 유치"
val title = "대구 수성알파시티에 2031년까지 1천개 기업 유치 수성알파시티에 수성알파시티에 수성알파시티에 수성알파시티에 수성알파시티에 수성알파시티에 수성알파시티에 수성알파시티에 수성알파시티에 수성알파시티에 수성알파시티에"
val content =
"수원역 수원역 수원역 수원역 수원역 수원역 수원역 수원역 최저임금 최저임금 최저임금 최저임금 최저임금 최저임금 최저임금 최저임금 최저임금 최저임금 (서울=연합뉴스) 강건택 기자 = 정부가 대구 수성알파시티를 2만 명의 디지털 인재가 상주하는 국가 디지털 혁신지구로 본격 조성한다.\n" +
"\n" +
Expand Down

0 comments on commit e208c6d

Please sign in to comment.