Skip to content

Commit

Permalink
refactor: update types in dataset context and models, and enhance doc…
Browse files Browse the repository at this point in the history
…ument detail fetching logic
  • Loading branch information
WTW0313 committed Dec 25, 2024
1 parent 003a480 commit 746f753
Show file tree
Hide file tree
Showing 10 changed files with 169 additions and 123 deletions.
77 changes: 39 additions & 38 deletions web/app/components/datasets/documents/detail/completed/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -303,20 +303,20 @@ const Completed: FC<ICompletedProps> = ({

eventEmitter?.emit('update-segment')
await updateSegment({ datasetId, documentId, segmentId, body: params }, {
onSuccess(data) {
onSuccess(res) {
notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
if (!needRegenerate)
onCloseSegmentDetail()
for (const seg of segments) {
if (seg.id === segmentId) {
seg.answer = data.data.answer
seg.content = data.data.content
seg.keywords = data.data.keywords
seg.word_count = data.data.word_count
seg.hit_count = data.data.hit_count
seg.enabled = data.data.enabled
seg.updated_at = data.data.updated_at
seg.child_chunks = data.data.child_chunks
seg.answer = res.data.answer
seg.content = res.data.content
seg.keywords = res.data.keywords
seg.word_count = res.data.word_count
seg.hit_count = res.data.hit_count
seg.enabled = res.data.enabled
seg.updated_at = res.data.updated_at
seg.child_chunks = res.data.child_chunks
}
}
setSegments([...segments])
Expand Down Expand Up @@ -477,41 +477,42 @@ const Completed: FC<ICompletedProps> = ({

params.content = content

try {
eventEmitter?.emit('update-child-segment')
const res = await updateChildSegment({ datasetId, documentId, segmentId, childChunkId, body: params })
notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
onCloseChildSegmentDetail()
if (parentMode === 'paragraph') {
for (const seg of segments) {
if (seg.id === segmentId) {
for (const childSeg of seg.child_chunks!) {
if (childSeg.id === childChunkId) {
childSeg.content = res.data.content
childSeg.type = res.data.type
childSeg.word_count = res.data.word_count
childSeg.updated_at = res.data.updated_at
eventEmitter?.emit('update-child-segment')
await updateChildSegment({ datasetId, documentId, segmentId, childChunkId, body: params }, {
onSuccess: (res) => {
notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
onCloseChildSegmentDetail()
if (parentMode === 'paragraph') {
for (const seg of segments) {
if (seg.id === segmentId) {
for (const childSeg of seg.child_chunks!) {
if (childSeg.id === childChunkId) {
childSeg.content = res.data.content
childSeg.type = res.data.type
childSeg.word_count = res.data.word_count
childSeg.updated_at = res.data.updated_at
}
}
}
}
setSegments([...segments])
}
setSegments([...segments])
}
else {
for (const childSeg of childSegments) {
if (childSeg.id === childChunkId) {
childSeg.content = res.data.content
childSeg.type = res.data.type
childSeg.word_count = res.data.word_count
childSeg.updated_at = res.data.updated_at
else {
for (const childSeg of childSegments) {
if (childSeg.id === childChunkId) {
childSeg.content = res.data.content
childSeg.type = res.data.type
childSeg.word_count = res.data.word_count
childSeg.updated_at = res.data.updated_at
}
}
setChildSegments([...childSegments])
}
setChildSegments([...childSegments])
}
}
finally {
eventEmitter?.emit('update-child-segment-done')
}
},
onSettled: () => {
eventEmitter?.emit('update-child-segment-done')
},
})
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [segments, childSegments, datasetId, documentId, parentMode])

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ const NewChildSegmentModal: FC<NewChildSegmentModalProps> = ({

setLoading(true)
await addChildSegment({ datasetId, documentId, segmentId: chunkId, body: params }, {
onSuccess(data) {
onSuccess(res) {
notify({
type: 'success',
message: t('datasetDocuments.segment.childChunkAdded'),
Expand All @@ -98,7 +98,7 @@ const NewChildSegmentModal: FC<NewChildSegmentModalProps> = ({
}, 3000)
}
else {
onSave(data.data)
onSave(res.data)
}
},
onSettled() {
Expand Down
79 changes: 40 additions & 39 deletions web/app/components/datasets/documents/detail/index.tsx
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
'use client'
import type { FC } from 'react'
import React, { useMemo, useState } from 'react'
import useSWR from 'swr'
import { createContext, useContext, useContextSelector } from 'use-context-selector'
import { useTranslation } from 'react-i18next'
import { useRouter } from 'next/navigation'
import { omit } from 'lodash-es'
import { RiArrowLeftLine, RiLayoutRight2Line } from '@remixicon/react'
import { OperationAction, StatusItem } from '../list'
import DocumentPicker from '../../common/document-picker'
Expand All @@ -18,14 +16,14 @@ import style from './style.module.css'
import cn from '@/utils/classnames'
import Divider from '@/app/components/base/divider'
import Loading from '@/app/components/base/loading'
import type { MetadataType } from '@/service/datasets'
import { checkSegmentBatchImportProgress, fetchDocumentDetail, segmentBatchImport } from '@/service/datasets'
import { ToastContext } from '@/app/components/base/toast'
import type { ChunkingMode, ParentMode, ProcessMode } from '@/models/datasets'
import { useDatasetDetailContext } from '@/context/dataset-detail'
import FloatRightContainer from '@/app/components/base/float-right-container'
import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
import { LayoutRight2LineMod } from '@/app/components/base/icons/src/public/knowledge'
import { useCheckSegmentBatchImportProgress, useSegmentBatchImport } from '@/service/knowledge/use-segment'
import { useDocumentDetail, useDocumentMetadata } from '@/service/knowledge/use-document'

type DocumentContextValue = {
datasetId?: string
Expand Down Expand Up @@ -95,49 +93,52 @@ const DocumentDetail: FC<Props> = ({ datasetId, documentId }) => {
const showBatchModal = () => setBatchModalVisible(true)
const hideBatchModal = () => setBatchModalVisible(false)
const resetProcessStatus = () => setImportStatus('')

const { mutateAsync: checkSegmentBatchImportProgress } = useCheckSegmentBatchImportProgress()
const checkProcess = async (jobID: string) => {
try {
const res = await checkSegmentBatchImportProgress({ jobID })
setImportStatus(res.job_status)
if (res.job_status === ProcessStatus.WAITING || res.job_status === ProcessStatus.PROCESSING)
setTimeout(() => checkProcess(res.job_id), 2500)
if (res.job_status === ProcessStatus.ERROR)
notify({ type: 'error', message: `${t('datasetDocuments.list.batchModal.runError')}` })
}
catch (e: any) {
notify({ type: 'error', message: `${t('datasetDocuments.list.batchModal.runError')}${'message' in e ? `: ${e.message}` : ''}` })
}
await checkSegmentBatchImportProgress({ jobID }, {
onSuccess: (res) => {
setImportStatus(res.job_status)
if (res.job_status === ProcessStatus.WAITING || res.job_status === ProcessStatus.PROCESSING)
setTimeout(() => checkProcess(res.job_id), 2500)
if (res.job_status === ProcessStatus.ERROR)
notify({ type: 'error', message: `${t('datasetDocuments.list.batchModal.runError')}` })
},
onError: (e) => {
notify({ type: 'error', message: `${t('datasetDocuments.list.batchModal.runError')}${'message' in e ? `: ${e.message}` : ''}` })
},
})
}

const { mutateAsync: segmentBatchImport } = useSegmentBatchImport()
const runBatch = async (csv: File) => {
const formData = new FormData()
formData.append('file', csv)
try {
const res = await segmentBatchImport({
url: `/datasets/${datasetId}/documents/${documentId}/segments/batch_import`,
body: formData,
})
setImportStatus(res.job_status)
checkProcess(res.job_id)
}
catch (e: any) {
notify({ type: 'error', message: `${t('datasetDocuments.list.batchModal.runError')}${'message' in e ? `: ${e.message}` : ''}` })
}
await segmentBatchImport({
url: `/datasets/${datasetId}/documents/${documentId}/segments/batch_import`,
body: formData,
}, {
onSuccess: (res) => {
setImportStatus(res.job_status)
checkProcess(res.job_id)
},
onError: (e) => {
notify({ type: 'error', message: `${t('datasetDocuments.list.batchModal.runError')}${'message' in e ? `: ${e.message}` : ''}` })
},
})
}

const { data: documentDetail, error, mutate: detailMutate } = useSWR({
action: 'fetchDocumentDetail',
const { data: documentDetail, error, refetch: detailMutate } = useDocumentDetail({
datasetId,
documentId,
params: { metadata: 'without' as MetadataType },
}, apiParams => fetchDocumentDetail(omit(apiParams, 'action')))
params: { metadata: 'without' },
})

const { data: documentMetadata, error: metadataErr, mutate: metadataMutate } = useSWR({
action: 'fetchDocumentDetail',
const { data: documentMetadata, error: metadataErr, refetch: metadataMutate } = useDocumentMetadata({
datasetId,
documentId,
params: { metadata: 'only' as MetadataType },
}, apiParams => fetchDocumentDetail(omit(apiParams, 'action')),
)
params: { metadata: 'only' },
})

const backToPrev = () => {
router.push(`/datasets/${datasetId}/documents`)
Expand All @@ -156,12 +157,12 @@ const DocumentDetail: FC<Props> = ({ datasetId, documentId }) => {
}

const mode = useMemo(() => {
return documentDetail?.dataset_process_rule?.mode
}, [documentDetail?.dataset_process_rule])
return documentDetail?.document_process_rule?.mode
}, [documentDetail?.document_process_rule])

const parentMode = useMemo(() => {
return documentDetail?.dataset_process_rule?.rules?.parent_mode
}, [documentDetail?.dataset_process_rule])
return documentDetail?.document_process_rule?.rules?.parent_mode
}, [documentDetail?.document_process_rule])

const isFullDocMode = useMemo(() => {
return mode === 'hierarchical' && parentMode === 'full-doc'
Expand Down
40 changes: 16 additions & 24 deletions web/app/components/datasets/documents/detail/settings/index.tsx
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
'use client'
import React, { useEffect, useMemo, useState } from 'react'
import React, { useMemo } from 'react'
import { useTranslation } from 'react-i18next'
import { useBoolean } from 'ahooks'
import { useContext } from 'use-context-selector'
import { useRouter } from 'next/navigation'
import DatasetDetailContext from '@/context/dataset-detail'
import type { CrawlOptions, CustomFile, FullDocumentDetail } from '@/models/datasets'
import type { MetadataType } from '@/service/datasets'
import { fetchDocumentDetail } from '@/service/datasets'
import type { CrawlOptions, CustomFile } from '@/models/datasets'

import Loading from '@/app/components/base/loading'
import StepTwo from '@/app/components/datasets/create/step-two'
Expand All @@ -16,6 +14,7 @@ import AppUnavailable from '@/app/components/base/app-unavailable'
import { useDefaultModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
import type { NotionPage } from '@/models/common'
import { useDocumentDetail, useInvalidDocumentDetailKey } from '@/service/knowledge/use-document'

type DocumentSettingsProps = {
datasetId: string
Expand All @@ -26,15 +25,23 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
const { t } = useTranslation()
const router = useRouter()
const [isShowSetAPIKey, { setTrue: showSetAPIKey, setFalse: hideSetAPIkey }] = useBoolean()
const [hasError, setHasError] = useState(false)
const { indexingTechnique, dataset } = useContext(DatasetDetailContext)
const { data: embeddingsDefaultModel } = useDefaultModel(ModelTypeEnum.textEmbedding)

const saveHandler = () => router.push(`/datasets/${datasetId}/documents/${documentId}`)
const invalidDocumentDetail = useInvalidDocumentDetailKey()
const saveHandler = () => {
invalidDocumentDetail()
router.push(`/datasets/${datasetId}/documents/${documentId}`)
}

const cancelHandler = () => router.back()

const [documentDetail, setDocumentDetail] = useState<FullDocumentDetail | null>(null)
const { data: documentDetail, error } = useDocumentDetail({
datasetId,
documentId,
params: { metadata: 'without' },
})

const currentPage = useMemo(() => {
return {
workspace_id: documentDetail?.data_source_info.notion_workspace_id,
Expand All @@ -44,23 +51,8 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
type: documentDetail?.data_source_type,
}
}, [documentDetail])
useEffect(() => {
(async () => {
try {
const detail = await fetchDocumentDetail({
datasetId,
documentId,
params: { metadata: 'without' as MetadataType },
})
setDocumentDetail(detail)
}
catch (e) {
setHasError(true)
}
})()
}, [datasetId, documentId])

if (hasError)
if (error)
return <AppUnavailable code={500} unknownReason={t('datasetCreation.error.unavailable') as string} />

return (
Expand All @@ -85,7 +77,7 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
websiteCrawlProvider={documentDetail.data_source_info?.provider}
websiteCrawlJobId={documentDetail.data_source_info?.job_id}
crawlOptions={documentDetail.data_source_info as unknown as CrawlOptions}
indexingType={indexingTechnique || ''}
indexingType={indexingTechnique}
isSetting
documentDetail={documentDetail}
files={[documentDetail.data_source_info.upload_file as CustomFile]}
Expand Down
4 changes: 3 additions & 1 deletion web/context/dataset-detail.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import { createContext, useContext, useContextSelector } from 'use-context-selector'
import type { DataSet } from '@/models/datasets'
import type { IndexingType } from '@/app/components/datasets/create/step-two'

type DatasetDetailContextValue = {
indexingTechnique?: string
indexingTechnique?: IndexingType
dataset?: DataSet
mutateDatasetRes?: () => void
}
Expand Down
4 changes: 2 additions & 2 deletions web/hooks/use-metadata.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
'use client'
import { useTranslation } from 'react-i18next'
import { formatFileSize, formatNumber, formatTime } from '@/utils/format'
import type { DocType } from '@/models/datasets'
import { type DocType, ProcessMode } from '@/models/datasets'
import useTimestamp from '@/hooks/use-timestamp'

export type inputType = 'input' | 'select' | 'textarea'
Expand Down Expand Up @@ -250,7 +250,7 @@ export const useMetadataMap = (): MetadataMap => {
subFieldsMap: {
'dataset_process_rule.mode': {
label: t(`${fieldPrefix}.technicalParameters.segmentSpecification`),
render: value => value === 'automatic' ? (t('datasetDocuments.embedding.automatic') as string) : (t('datasetDocuments.embedding.custom') as string),
render: value => value === ProcessMode.general ? (t('datasetDocuments.embedding.custom') as string) : (t('datasetDocuments.embedding.hierarchical') as string),
},
'dataset_process_rule.rules.segmentation.max_tokens': {
label: t(`${fieldPrefix}.technicalParameters.segmentLength`),
Expand Down
8 changes: 7 additions & 1 deletion web/models/datasets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ export type NotionPage = {
}

export type ProcessRule = {
mode: ChildChunkType | 'hierarchical'
mode: ProcessMode
rules: Rules
}

Expand Down Expand Up @@ -391,6 +391,7 @@ export type FullDocumentDetail = SimpleDocumentDetail & {
doc_metadata?: DocMetadata | null
segment_count: number
dataset_process_rule: PrecessRule
document_process_rule: ProcessRule
[key: string]: any
}

Expand Down Expand Up @@ -656,3 +657,8 @@ export type UpdateDocumentBatchParams = {
documentId?: string
documentIds?: string[] | string
}

export type BatchImportResponse = {
job_id: string
job_status: string
}
Loading

0 comments on commit 746f753

Please sign in to comment.