Skip to content

Commit

Permalink
refactor : repackaging legacy
Browse files Browse the repository at this point in the history
  • Loading branch information
K-Diger committed Mar 17, 2024
1 parent 86bdebd commit 9a25d93
Show file tree
Hide file tree
Showing 16 changed files with 51 additions and 69 deletions.
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
package com.mashup.shorts.leagcy
package com.mashup.shorts.core.leagcy

import org.jsoup.Jsoup
import org.jsoup.select.Elements
import org.springframework.stereotype.Component
import com.mashup.shorts.domain.category.Category
import com.mashup.shorts.domain.category.CategoryName
import com.mashup.shorts.domain.news.News
import com.mashup.shorts.leagcy.consts.CONTENT_CLASS_NAME
import com.mashup.shorts.leagcy.consts.HEADLINE
import com.mashup.shorts.leagcy.consts.IMAGE_ID_NAME
import com.mashup.shorts.leagcy.consts.NORMAL
import com.mashup.shorts.leagcy.consts.PRESS_CLASS_NAME
import com.mashup.shorts.leagcy.consts.SYMBOLIC_LINK_BASE_URL
import com.mashup.shorts.leagcy.consts.TITLE_CLASS_NAME
import com.mashup.shorts.leagcy.consts.WRITTEN_DATETIME_CLASS_NAME
import com.mashup.shorts.leagcy.consts.detailDocClassNames
import com.mashup.shorts.leagcy.consts.moreHeadLineLinksElements
import com.mashup.shorts.core.leagcy.consts.CONTENT_CLASS_NAME
import com.mashup.shorts.core.leagcy.consts.HEADLINE
import com.mashup.shorts.core.leagcy.consts.IMAGE_ID_NAME
import com.mashup.shorts.core.leagcy.consts.NORMAL
import com.mashup.shorts.core.leagcy.consts.PRESS_CLASS_NAME
import com.mashup.shorts.core.leagcy.consts.SYMBOLIC_LINK_BASE_URL
import com.mashup.shorts.core.leagcy.consts.TITLE_CLASS_NAME
import com.mashup.shorts.core.leagcy.consts.WRITTEN_DATETIME_CLASS_NAME
import com.mashup.shorts.core.leagcy.consts.detailDocClassNames
import com.mashup.shorts.core.leagcy.consts.moreHeadLineLinksElements

@Deprecated("Deprecated By Changed DOM")
@Component("DeprecatedCrawlerBase")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.mashup.shorts.leagcy
package com.mashup.shorts.core.leagcy

import java.time.LocalDateTime
import java.time.format.DateTimeFormatter.ofPattern
Expand All @@ -10,7 +10,7 @@ import org.springframework.transaction.annotation.Transactional
import com.mashup.shorts.common.exception.ShortsBaseException
import com.mashup.shorts.common.exception.ShortsErrorCode
import com.mashup.shorts.common.util.Slf4j2KotlinLogging.log
import com.mashup.shorts.core.keywordextractor.KeywordExtractor
import com.mashup.shorts.core.v2.keywordextractor.KeywordExtractor
import com.mashup.shorts.domain.category.CategoryName.CULTURE
import com.mashup.shorts.domain.category.CategoryName.ECONOMIC
import com.mashup.shorts.domain.category.CategoryName.POLITICS
Expand All @@ -25,7 +25,7 @@ import com.mashup.shorts.domain.news.NewsBulkInsertRepository
import com.mashup.shorts.domain.news.NewsRepository
import com.mashup.shorts.domain.newscard.NewsCard
import com.mashup.shorts.domain.newscard.NewsCardBulkInsertRepository
import com.mashup.shorts.leagcy.consts.categoryToUrl
import com.mashup.shorts.core.leagcy.consts.categoryToUrl

@Deprecated("Deprecated By Changed DOM")
@Component("DeprecatedCrawlerCore")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.mashup.shorts.leagcy.consts
package com.mashup.shorts.core.leagcy.consts

import com.mashup.shorts.domain.category.CategoryName

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.mashup.shorts.leagcy.consts
package com.mashup.shorts.core.leagcy.consts

import com.mashup.shorts.domain.category.CategoryName

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.mashup.shorts.leagcy.consts
package com.mashup.shorts.core.leagcy.consts

import com.mashup.shorts.domain.category.CategoryName

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package com.mashup.shorts.leagcy.keywordextractor
package com.mashup.shorts.core.leagcy.keywordextractor

import java.util.*
import org.springframework.beans.factory.annotation.Qualifier
import org.springframework.stereotype.Component
import com.mashup.shorts.core.keywordextractor.KeywordExtractor
import com.mashup.shorts.core.v2.keywordextractor.KeywordExtractor
import kr.co.shineware.nlp.komoran.constant.DEFAULT_MODEL
import kr.co.shineware.nlp.komoran.core.Komoran

Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
package com.mashup.shorts.core
package com.mashup.shorts.core.v2

import org.jsoup.Jsoup
import org.jsoup.select.Elements
import org.springframework.context.annotation.Primary
import org.springframework.stereotype.Component
import com.mashup.shorts.core.consts.NewsDOMClassNameConst.CONTENT_CLASS_NAME
import com.mashup.shorts.core.consts.NewsDOMClassNameConst.HEADLINE
import com.mashup.shorts.core.consts.NewsDOMClassNameConst.IMAGE_ID_NAME
import com.mashup.shorts.core.consts.NewsDOMClassNameConst.NORMAL
import com.mashup.shorts.core.consts.NewsDOMClassNameConst.PRESS_CLASS_NAME
import com.mashup.shorts.core.consts.NewsDOMClassNameConst.TITLE_CLASS_NAME
import com.mashup.shorts.core.consts.NewsDOMClassNameConst.WRITTEN_DATETIME_CLASS_NAME
import com.mashup.shorts.core.consts.NewsDOMClassNameConst.detailDocClassNames
import com.mashup.shorts.core.consts.NewsLinkElementConst.moreHeadLineLinksElements
import com.mashup.shorts.core.consts.SYMBOLIC_LINK_BASE_URL
import com.mashup.shorts.core.v2.consts.NewsDOMClassNameConst.CONTENT_CLASS_NAME
import com.mashup.shorts.core.v2.consts.NewsDOMClassNameConst.HEADLINE
import com.mashup.shorts.core.v2.consts.NewsDOMClassNameConst.IMAGE_ID_NAME
import com.mashup.shorts.core.v2.consts.NewsDOMClassNameConst.NORMAL
import com.mashup.shorts.core.v2.consts.NewsDOMClassNameConst.PRESS_CLASS_NAME
import com.mashup.shorts.core.v2.consts.NewsDOMClassNameConst.TITLE_CLASS_NAME
import com.mashup.shorts.core.v2.consts.NewsDOMClassNameConst.WRITTEN_DATETIME_CLASS_NAME
import com.mashup.shorts.core.v2.consts.NewsDOMClassNameConst.detailDocClassNames
import com.mashup.shorts.core.v2.consts.NewsLinkElementConst.moreHeadLineLinksElements
import com.mashup.shorts.core.v2.consts.SYMBOLIC_LINK_BASE_URL
import com.mashup.shorts.domain.category.Category
import com.mashup.shorts.domain.category.CategoryName
import com.mashup.shorts.domain.news.News
Expand Down
Original file line number Diff line number Diff line change
@@ -1,25 +1,19 @@
package com.mashup.shorts.core
package com.mashup.shorts.core.v2

import java.time.LocalDateTime
import java.time.format.DateTimeFormatter.ofPattern
import org.jsoup.select.Elements
import org.springframework.beans.factory.annotation.Qualifier
import org.springframework.context.annotation.Primary
import org.springframework.retry.annotation.Recover
import org.springframework.retry.annotation.Retryable
import org.springframework.scheduling.annotation.Scheduled
import org.springframework.stereotype.Component
import org.springframework.transaction.annotation.Transactional
import com.mashup.shorts.common.exception.ShortsBaseException
import com.mashup.shorts.common.exception.ShortsErrorCode
import com.mashup.shorts.common.util.Slf4j2KotlinLogging.log
import com.mashup.shorts.core.consts.CATEGORY_WEIGHT_ONE
import com.mashup.shorts.core.consts.CATEGORY_WEIGHT_ONE_HALF
import com.mashup.shorts.core.consts.CATEGORY_WEIGHT_ONE_QUARTER
import com.mashup.shorts.core.consts.CATEGORY_WEIGHT_TWO_HALF_QUARTER
import com.mashup.shorts.core.consts.categoryToUrl
import com.mashup.shorts.core.keywordextractor.KeywordExtractor
import com.mashup.shorts.core.rank.RankingGenerator
import com.mashup.shorts.core.v2.consts.CATEGORY_WEIGHT_ONE
import com.mashup.shorts.core.v2.consts.CATEGORY_WEIGHT_ONE_HALF
import com.mashup.shorts.core.v2.consts.CATEGORY_WEIGHT_ONE_QUARTER
import com.mashup.shorts.core.v2.consts.CATEGORY_WEIGHT_TWO_HALF_QUARTER
import com.mashup.shorts.core.v2.consts.categoryToUrl
import com.mashup.shorts.core.v2.keywordextractor.KeywordExtractor
import com.mashup.shorts.core.v2.rank.RankingGenerator
import com.mashup.shorts.domain.category.Category
import com.mashup.shorts.domain.category.CategoryName
import com.mashup.shorts.domain.category.CategoryName.CULTURE
Expand Down Expand Up @@ -47,10 +41,7 @@ class CrawlerCore(
private val rankingGenerator: RankingGenerator,
) {

@Retryable(value = [Exception::class], maxAttempts = 3)
@Transactional(rollbackFor = [Exception::class])
@Scheduled(cron = "0 0 * * * *")
internal fun executeCrawling() {
internal fun executeCrawling(): LocalDateTime {
val crawledDateTime = LocalDateTime.now()
val keywordsCountingPair = mutableMapOf<String, Double>()
val persistenceTargetNewsCards = mutableListOf<NewsCard>()
Expand Down Expand Up @@ -128,22 +119,13 @@ class CrawlerCore(
rankingGenerator.saveKeywordRanking(keywordsCountingPair.mapValues { it.value.toInt() })

log.info("$crawledDateTime - all crawling done")

return crawledDateTime
}

private fun isBulkInserTiming(persistenceTargetNewsCards: MutableList<NewsCard>) =
persistenceTargetNewsCards.size >= 100

@Recover
fun recover(exception: Exception) {
log.error { "크롤링 중 예외가 발생하여 총 3회를 시도했으나 작업이 실패했습니다." }
log.error { "ExceptionStackTrace : ${exception.localizedMessage}" }
log.error { "ExceptionCause : ${exception.cause}" }
throw ShortsBaseException.from(
shortsErrorCode = ShortsErrorCode.E500_INTERNAL_SERVER_ERROR,
resultErrorMessage = "크롤링 중 예외가 발생하여 총 3회를 시도했으나 작업이 실패했습니다."
)
}

private fun bulkInsertNewsCard(
persistenceTargetNewsCards: MutableList<NewsCard>,
crawledDateTime: LocalDateTime,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.mashup.shorts.core.consts
package com.mashup.shorts.core.v2.consts

internal const val CATEGORY_WEIGHT_ONE = 1.0
internal const val CATEGORY_WEIGHT_ONE_QUARTER = 1.25
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.mashup.shorts.core.consts
package com.mashup.shorts.core.v2.consts

import com.mashup.shorts.domain.category.CategoryName

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.mashup.shorts.core.consts
package com.mashup.shorts.core.v2.consts

import com.mashup.shorts.domain.category.CategoryName

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.mashup.shorts.core.consts
package com.mashup.shorts.core.v2.consts

import com.mashup.shorts.domain.category.CategoryName

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.mashup.shorts.core.keywordextractor
package com.mashup.shorts.core.v2.keywordextractor

import org.springframework.stereotype.Component

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.mashup.shorts.core.keywordextractor
package com.mashup.shorts.core.v2.keywordextractor

import java.io.StringReader
import org.apache.lucene.analysis.Analyzer
Expand All @@ -8,8 +8,8 @@ import org.apache.lucene.analysis.ko.KoreanTokenizer.DecompoundMode
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute
import org.springframework.beans.factory.annotation.Qualifier
import org.springframework.stereotype.Component
import com.mashup.shorts.core.consts.CONTENT_WEIGHT
import com.mashup.shorts.core.consts.TITLE_WEIGHT
import com.mashup.shorts.core.v2.consts.CONTENT_WEIGHT
import com.mashup.shorts.core.v2.consts.TITLE_WEIGHT

@Component
@Qualifier("LuceneAnalyzerKeywordExtractor")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.mashup.shorts.core.keywordextractor
package com.mashup.shorts.core.v2.keywordextractor

/**
불용어 목록 출처 : https://www.kci.go.kr/kciportal/ci/sereArticleSearch/ciSereArtiView.kci?sereArticleSearchBean.artiId=ART002390885
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.mashup.shorts.core.rank
package com.mashup.shorts.core.v2.rank

import org.springframework.stereotype.Component
import org.springframework.transaction.annotation.Transactional
Expand Down

0 comments on commit 9a25d93

Please sign in to comment.