Skip to content

Commit

Permalink
CleanUp Crawler Components
Browse files Browse the repository at this point in the history
  • Loading branch information
K-Diger committed May 30, 2024
1 parent f4d0984 commit e5a180f
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 56 deletions.
80 changes: 40 additions & 40 deletions build.gradle.kts
Original file line number Diff line number Diff line change
@@ -1,64 +1,64 @@
import org.jetbrains.kotlin.gradle.tasks.KotlinCompile

plugins {
id("org.springframework.boot") version "3.0.5"
id("io.spring.dependency-management") version "1.1.0"
id("org.asciidoctor.jvm.convert") version "3.3.2"
id("application")
kotlin("jvm") version "1.7.22"
kotlin("plugin.spring") version "1.7.22"
kotlin("plugin.jpa") version "1.7.22"
kotlin("kapt") version "1.6.21"
id("org.springframework.boot") version "3.0.5"
id("io.spring.dependency-management") version "1.1.0"
id("org.asciidoctor.jvm.convert") version "3.3.2"
id("application")
kotlin("jvm") version "1.7.22"
kotlin("plugin.spring") version "1.7.22"
kotlin("plugin.jpa") version "1.7.22"
kotlin("kapt") version "1.6.21"
}

java.sourceCompatibility = JavaVersion.VERSION_17

application {
mainClass.set("com.mashup.shorts.ShortsApiApplicationKt")
mainClass.set("com.mashup.shorts.ShortsApiApplicationKt")
}

allprojects {
group = "com.mashup"
version = "0.0.1-SNAPSHOT"
group = "com.mashup"
version = "0.0.1-SNAPSHOT"

repositories {
mavenCentral()
}
repositories {
mavenCentral()
}
}

subprojects {
apply(plugin = "org.jetbrains.kotlin.jvm")
apply(plugin = "org.jetbrains.kotlin.plugin.spring")
apply(plugin = "org.springframework.boot")
apply(plugin = "kotlin")
apply(plugin = "java-library")
apply(plugin = "kotlin-jpa")
apply(plugin = "io.spring.dependency-management")
apply(plugin = "org.asciidoctor.jvm.convert")
apply(plugin = "kotlin-kapt")
apply(plugin = "application")
apply(plugin = "org.jetbrains.kotlin.jvm")
apply(plugin = "org.jetbrains.kotlin.plugin.spring")
apply(plugin = "org.springframework.boot")
apply(plugin = "kotlin")
apply(plugin = "java-library")
apply(plugin = "kotlin-jpa")
apply(plugin = "io.spring.dependency-management")
apply(plugin = "org.asciidoctor.jvm.convert")
apply(plugin = "kotlin-kapt")
apply(plugin = "application")

dependencies {
implementation("org.springframework.boot:spring-boot-starter-web")
implementation("com.fasterxml.jackson.module:jackson-module-kotlin")
implementation("io.github.microutils:kotlin-logging:2.0.8")
dependencies {
implementation("org.springframework.boot:spring-boot-starter-web")
implementation("com.fasterxml.jackson.module:jackson-module-kotlin")
implementation("io.github.microutils:kotlin-logging:2.0.8")

implementation("org.jetbrains.kotlin:kotlin-reflect")
testImplementation("org.springframework.boot:spring-boot-starter-test")
implementation("org.jetbrains.kotlin:kotlin-reflect")
testImplementation("org.springframework.boot:spring-boot-starter-test")

// SpringMockk
testImplementation("com.ninja-squad:springmockk:3.1.1")
}
}

tasks.withType<KotlinCompile> {
kotlinOptions {
freeCompilerArgs = listOf("-Xjsr305=strict")
jvmTarget = "17"
}
}
tasks.withType<KotlinCompile> {
kotlinOptions {
freeCompilerArgs = listOf("-Xjsr305=strict")
jvmTarget = "17"
}
}

tasks.withType<Test> {
useJUnitPlatform()
}
tasks.withType<Test> {
useJUnitPlatform()
}

}
21 changes: 16 additions & 5 deletions shorts-crawler/src/main/resources/application-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,26 @@ spring:
database: mysql
database-platform: org.hibernate.dialect.MySQL8Dialect
hibernate:
ddl-auto: none
ddl-auto: validate
properties:
hibernate:
show_sql: true
format_sql: true
use_sql_comments: true
datasource:
url: jdbc:mysql://localhost:3306/shorts
username: root
password: 112233
driver-class-name: com.mysql.cj.jdbc.Driver
hikari:
jdbc-url: ENC(byZqEQJmUbRR7bQ/qtfEv6+70zHZH0vmQbM3EGNcDUelNLCB0WH0ZXu8E9W8rBhja5oq4X0qLPqnhdD0LPNdCp0NtQ4fv4qL+y879T3RGUc=)
username: ENC(TG/zdE+OfGqCqOT7P30ZvXzyWDXLqrCvm670gMLmPHeRoMANM7qXGYJOptHBUQfZ)
password: ENC(+Ef+hZlVqHMGPj3xA5FoyXcfKkxVQN40tB8P1YmglUNdO4BqgArsfxCbNK7aBeam)
driver-class-name: com.mysql.cj.jdbc.Driver
maximum-pool-size: 20
minimum-idle: 5
idle-timeout: 10000
pool-name: SpringHikariCP
max-lifetime: 2000000
connection-timeout: 30000

discord:
url:
success: ENC(vNme4VoNC6Zm53u6yV6kuYYl+YZLJITHFBkkFDwOXwttZTMOsalgC7SJrDboTV337uBUoH9AFPJD9NG+cVPUzu8iKacPuYCCUampWtu6g/6p3JCsUIm5p1wqxcmNJfgvlC9ARz+sFo7owKB3l6nNQ6MLYw9ls/trRCD47Zf15akSYeZ9F/b6XCNsTRXbMMcLK/YlGOmOpv5RY4rzPj//TA==)
fail: ENC(iotSK1j3FIcBk1NkPkhepcXg3WanEkt7EHJ7n5F/FhDYHwTJgCQRSoEwqtv1rEd81V4Gj7DN+PqIi6O0CU5MvpVWeJBdnjDwbr07v41W3o1NWYWrX0U4pbhIhjA3tWJv9QXp5I1wBDHI8DJvluuboWZ6iNkpyO5NTe1eY+PIy5K7UBV/ilJFOj7/rETiEG85XQKz+r6RoHi6P2ZvQ7/iaw==)
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import com.mashup.shorts.core.modern.CrawlerCore
@Disabled
@ActiveProfiles("test")
@SpringBootTest(classes = [ShortsCrawlerApplication::class])
class CrawlerCoreTest (
class CrawlerCoreTest(
@Autowired private val crawlerCore: CrawlerCore,
) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@ spring:
batch_size: 101
order_inserts: true
order_updates: true
cache:
use_second_level_cache: true
use_query_cache: true
region.factory_class: org.hibernate.cache.jcache.JCacheRegionFactory

datasource:
url: ENC(m/cWWqilVqoIWWZKyTOxLLsTKnV35HECpNON0Hkw0X4k8S/VPOYbRlchcchUBnwkU54flUpjJFJo29Zj+2rsVXbPDP3RxooQAcptBDZJXDz5TT4eX0N2W3NPg/L9fqVgDR76wNMvxNNBDh2xUVmeumlj63Ej/zEHG3qrms2BtCo=)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,19 @@ spring:
format_sql: true
use_sql_comments: true
datasource:
url: ENC(m/cWWqilVqoIWWZKyTOxLLsTKnV35HECpNON0Hkw0X4k8S/VPOYbRlchcchUBnwkU54flUpjJFJo29Zj+2rsVXbPDP3RxooQAcptBDZJXDz5TT4eX0N2W3NPg/L9fqVgDR76wNMvxNNBDh2xUVmeumlj63Ej/zEHG3qrms2BtCo=)
username: ENC(VWvxuZYAz4whqGlh7q636WiTgrkjfRG2LWNmIBu3chYPC1M/S9hBESk+44smnAyY)
password: ENC(XyE1+hgtbCB30SYGXjjc2nAitbDxk88KGPpdNx3dUWwiB5rxI4Lv0NLMUCKWO83f)
url: jdbc:mysql://localhost:3306/shorts
username: root
password: 112233
driver-class-name: com.mysql.cj.jdbc.Driver
hikari:
maximum-pool-size: 20
minimum-idle: 5
idle-timeout: 10000
pool-name: SpringHikariCP
max-lifetime: 2000000
connection-timeout: 30000

discord:
url:
success: ENC(vNme4VoNC6Zm53u6yV6kuYYl+YZLJITHFBkkFDwOXwttZTMOsalgC7SJrDboTV337uBUoH9AFPJD9NG+cVPUzu8iKacPuYCCUampWtu6g/6p3JCsUIm5p1wqxcmNJfgvlC9ARz+sFo7owKB3l6nNQ6MLYw9ls/trRCD47Zf15akSYeZ9F/b6XCNsTRXbMMcLK/YlGOmOpv5RY4rzPj//TA==)
fail: ENC(iotSK1j3FIcBk1NkPkhepcXg3WanEkt7EHJ7n5F/FhDYHwTJgCQRSoEwqtv1rEd81V4Gj7DN+PqIi6O0CU5MvpVWeJBdnjDwbr07v41W3o1NWYWrX0U4pbhIhjA3tWJv9QXp5I1wBDHI8DJvluuboWZ6iNkpyO5NTe1eY+PIy5K7UBV/ilJFOj7/rETiEG85XQKz+r6RoHi6P2ZvQ7/iaw==)
Original file line number Diff line number Diff line change
Expand Up @@ -78,15 +78,15 @@ class LuceneAnalyzerKeywordExtractorV2 : KeywordExtractor {

val wordBM25Scores = mutableMapOf<String, Double>()
wordScores.forEach { (term, freq) ->
val idf = calculateInverseDocumentFrequency(1, 2)
val bm25Score = idf * ((freq * (k1 + 1)) / (freq + k1 * (1 - b + b * (docLength / avgDocLength))))
val idf = calculateIDF(1, 2)
val bm25Score = calculateBM25(idf, freq, docLength, avgDocLength)
wordBM25Scores[term] = bm25Score
}

return wordBM25Scores
}

private fun calculateInverseDocumentFrequency(docFreq: Int, totalDocs: Int): Double {
private fun calculateIDF(docFreq: Int, totalDocs: Int): Double {
// 문서 빈도수가 0인 경우, 혹은 전체 문서 수가 0인 경우 IDF 값은 0으로 처리한다. By K-Diger
if (docFreq == 0 || totalDocs == 0) {
return 0.0
Expand All @@ -96,6 +96,13 @@ class LuceneAnalyzerKeywordExtractorV2 : KeywordExtractor {
return ln((totalDocs.toDouble() + 1) / (docFreq + 1)) + 1
}

private fun calculateBM25(
idf: Double,
freq: Double,
docLength: Double,
avgDocLength: Double,
) = idf * ((freq * (k1 + 1)) / (freq + k1 * (1 - b + b * (docLength / avgDocLength))))

private fun mergeFrequencies(
titleFrequencies: Map<String, Double>,
contentFrequencies: Map<String, Double>,
Expand Down

0 comments on commit e5a180f

Please sign in to comment.