diff --git a/web/app/components/app-sidebar/dataset-info.tsx b/web/app/components/app-sidebar/dataset-info.tsx index 723296727b6a4d..5ce51d46e91e11 100644 --- a/web/app/components/app-sidebar/dataset-info.tsx +++ b/web/app/components/app-sidebar/dataset-info.tsx @@ -35,7 +35,7 @@ const DatasetInfo: FC = ({ {name}
{isExternal ? t('dataset.externalTag') : t('dataset.localDocs')}
-
{description}
+
{description}
)} {extraInfo} diff --git a/web/app/components/datasets/documents/detail/completed/child-segment-list.tsx b/web/app/components/datasets/documents/detail/completed/child-segment-list.tsx index ca5cc5b622113f..1615ea98cf045a 100644 --- a/web/app/components/datasets/documents/detail/completed/child-segment-list.tsx +++ b/web/app/components/datasets/documents/detail/completed/child-segment-list.tsx @@ -95,7 +95,7 @@ const ChildSegmentList: FC = ({ (isFullDocMode && isLoading) && 'overflow-y-hidden', )}> {isFullDocMode ? : null} -
+
= ({ eventEmitter?.emit('update-segment') await updateSegment({ datasetId, documentId, segmentId, body: params }, { - onSuccess(data) { + onSuccess(res) { notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') }) if (!needRegenerate) onCloseSegmentDetail() for (const seg of segments) { if (seg.id === segmentId) { - seg.answer = data.data.answer - seg.content = data.data.content - seg.keywords = data.data.keywords - seg.word_count = data.data.word_count - seg.hit_count = data.data.hit_count - seg.enabled = data.data.enabled - seg.updated_at = data.data.updated_at - seg.child_chunks = data.data.child_chunks + seg.answer = res.data.answer + seg.content = res.data.content + seg.keywords = res.data.keywords + seg.word_count = res.data.word_count + seg.hit_count = res.data.hit_count + seg.enabled = res.data.enabled + seg.updated_at = res.data.updated_at + seg.child_chunks = res.data.child_chunks } } setSegments([...segments]) @@ -477,41 +477,42 @@ const Completed: FC = ({ params.content = content - try { - eventEmitter?.emit('update-child-segment') - const res = await updateChildSegment({ datasetId, documentId, segmentId, childChunkId, body: params }) - notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') }) - onCloseChildSegmentDetail() - if (parentMode === 'paragraph') { - for (const seg of segments) { - if (seg.id === segmentId) { - for (const childSeg of seg.child_chunks!) { - if (childSeg.id === childChunkId) { - childSeg.content = res.data.content - childSeg.type = res.data.type - childSeg.word_count = res.data.word_count - childSeg.updated_at = res.data.updated_at + eventEmitter?.emit('update-child-segment') + await updateChildSegment({ datasetId, documentId, segmentId, childChunkId, body: params }, { + onSuccess: (res) => { + notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') }) + onCloseChildSegmentDetail() + if (parentMode === 'paragraph') { + for (const seg of segments) { + if (seg.id === segmentId) { + for (const childSeg of seg.child_chunks!) { + if (childSeg.id === childChunkId) { + childSeg.content = res.data.content + childSeg.type = res.data.type + childSeg.word_count = res.data.word_count + childSeg.updated_at = res.data.updated_at + } } } } + setSegments([...segments]) } - setSegments([...segments]) - } - else { - for (const childSeg of childSegments) { - if (childSeg.id === childChunkId) { - childSeg.content = res.data.content - childSeg.type = res.data.type - childSeg.word_count = res.data.word_count - childSeg.updated_at = res.data.updated_at + else { + for (const childSeg of childSegments) { + if (childSeg.id === childChunkId) { + childSeg.content = res.data.content + childSeg.type = res.data.type + childSeg.word_count = res.data.word_count + childSeg.updated_at = res.data.updated_at + } } + setChildSegments([...childSegments]) } - setChildSegments([...childSegments]) - } - } - finally { - eventEmitter?.emit('update-child-segment-done') - } + }, + onSettled: () => { + eventEmitter?.emit('update-child-segment-done') + }, + }) // eslint-disable-next-line react-hooks/exhaustive-deps }, [segments, childSegments, datasetId, documentId, parentMode]) diff --git a/web/app/components/datasets/documents/detail/completed/new-child-segment.tsx b/web/app/components/datasets/documents/detail/completed/new-child-segment.tsx index ef8a1ffd9494ba..55766d8ac4aaab 100644 --- a/web/app/components/datasets/documents/detail/completed/new-child-segment.tsx +++ b/web/app/components/datasets/documents/detail/completed/new-child-segment.tsx @@ -83,7 +83,7 @@ const NewChildSegmentModal: FC = ({ setLoading(true) await addChildSegment({ datasetId, documentId, segmentId: chunkId, body: params }, { - onSuccess(data) { + onSuccess(res) { notify({ type: 'success', message: t('datasetDocuments.segment.childChunkAdded'), @@ -98,7 +98,7 @@ const NewChildSegmentModal: FC = ({ }, 3000) } else { - onSave(data.data) + onSave(res.data) } }, onSettled() { diff --git a/web/app/components/datasets/documents/detail/completed/segment-card.tsx b/web/app/components/datasets/documents/detail/completed/segment-card.tsx index bddff997d9bdf0..1cadd0cf967ad8 100644 --- a/web/app/components/datasets/documents/detail/completed/segment-card.tsx +++ b/web/app/components/datasets/documents/detail/completed/segment-card.tsx @@ -80,6 +80,10 @@ const SegmentCard: FC = ({ return mode === 'hierarchical' }, [mode]) + const isParagraphMode = useMemo(() => { + return mode === 'hierarchical' && parentMode === 'paragraph' + }, [mode, parentMode]) + const isFullDocMode = useMemo(() => { return mode === 'hierarchical' && parentMode === 'full-doc' }, [mode, parentMode]) @@ -249,7 +253,7 @@ const SegmentCard: FC = ({ : null } { - child_chunks.length > 0 + isParagraphMode && child_chunks.length > 0 && = ({ datasetId, documentId }) => { const showBatchModal = () => setBatchModalVisible(true) const hideBatchModal = () => setBatchModalVisible(false) const resetProcessStatus = () => setImportStatus('') + + const { mutateAsync: checkSegmentBatchImportProgress } = useCheckSegmentBatchImportProgress() const checkProcess = async (jobID: string) => { - try { - const res = await checkSegmentBatchImportProgress({ jobID }) - setImportStatus(res.job_status) - if (res.job_status === ProcessStatus.WAITING || res.job_status === ProcessStatus.PROCESSING) - setTimeout(() => checkProcess(res.job_id), 2500) - if (res.job_status === ProcessStatus.ERROR) - notify({ type: 'error', message: `${t('datasetDocuments.list.batchModal.runError')}` }) - } - catch (e: any) { - notify({ type: 'error', message: `${t('datasetDocuments.list.batchModal.runError')}${'message' in e ? `: ${e.message}` : ''}` }) - } + await checkSegmentBatchImportProgress({ jobID }, { + onSuccess: (res) => { + setImportStatus(res.job_status) + if (res.job_status === ProcessStatus.WAITING || res.job_status === ProcessStatus.PROCESSING) + setTimeout(() => checkProcess(res.job_id), 2500) + if (res.job_status === ProcessStatus.ERROR) + notify({ type: 'error', message: `${t('datasetDocuments.list.batchModal.runError')}` }) + }, + onError: (e) => { + notify({ type: 'error', message: `${t('datasetDocuments.list.batchModal.runError')}${'message' in e ? `: ${e.message}` : ''}` }) + }, + }) } + + const { mutateAsync: segmentBatchImport } = useSegmentBatchImport() const runBatch = async (csv: File) => { const formData = new FormData() formData.append('file', csv) - try { - const res = await segmentBatchImport({ - url: `/datasets/${datasetId}/documents/${documentId}/segments/batch_import`, - body: formData, - }) - setImportStatus(res.job_status) - checkProcess(res.job_id) - } - catch (e: any) { - notify({ type: 'error', message: `${t('datasetDocuments.list.batchModal.runError')}${'message' in e ? `: ${e.message}` : ''}` }) - } + await segmentBatchImport({ + url: `/datasets/${datasetId}/documents/${documentId}/segments/batch_import`, + body: formData, + }, { + onSuccess: (res) => { + setImportStatus(res.job_status) + checkProcess(res.job_id) + }, + onError: (e) => { + notify({ type: 'error', message: `${t('datasetDocuments.list.batchModal.runError')}${'message' in e ? `: ${e.message}` : ''}` }) + }, + }) } - const { data: documentDetail, error, mutate: detailMutate } = useSWR({ - action: 'fetchDocumentDetail', + const { data: documentDetail, error, refetch: detailMutate } = useDocumentDetail({ datasetId, documentId, - params: { metadata: 'without' as MetadataType }, - }, apiParams => fetchDocumentDetail(omit(apiParams, 'action'))) + params: { metadata: 'without' }, + }) - const { data: documentMetadata, error: metadataErr, mutate: metadataMutate } = useSWR({ - action: 'fetchDocumentDetail', + const { data: documentMetadata, error: metadataErr, refetch: metadataMutate } = useDocumentMetadata({ datasetId, documentId, - params: { metadata: 'only' as MetadataType }, - }, apiParams => fetchDocumentDetail(omit(apiParams, 'action')), - ) + params: { metadata: 'only' }, + }) const backToPrev = () => { router.push(`/datasets/${datasetId}/documents`) @@ -156,12 +157,12 @@ const DocumentDetail: FC = ({ datasetId, documentId }) => { } const mode = useMemo(() => { - return documentDetail?.dataset_process_rule?.mode - }, [documentDetail?.dataset_process_rule]) + return documentDetail?.document_process_rule?.mode + }, [documentDetail?.document_process_rule]) const parentMode = useMemo(() => { - return documentDetail?.dataset_process_rule?.rules?.parent_mode - }, [documentDetail?.dataset_process_rule]) + return documentDetail?.document_process_rule?.rules?.parent_mode + }, [documentDetail?.document_process_rule]) const isFullDocMode = useMemo(() => { return mode === 'hierarchical' && parentMode === 'full-doc' @@ -175,7 +176,7 @@ const DocumentDetail: FC = ({ datasetId, documentId }) => { mode, parentMode, }}> -
+
diff --git a/web/app/components/datasets/documents/detail/settings/index.tsx b/web/app/components/datasets/documents/detail/settings/index.tsx index b2646654584d20..05c52d4de898b0 100644 --- a/web/app/components/datasets/documents/detail/settings/index.tsx +++ b/web/app/components/datasets/documents/detail/settings/index.tsx @@ -1,13 +1,11 @@ 'use client' -import React, { useEffect, useMemo, useState } from 'react' +import React, { useMemo } from 'react' import { useTranslation } from 'react-i18next' import { useBoolean } from 'ahooks' import { useContext } from 'use-context-selector' import { useRouter } from 'next/navigation' import DatasetDetailContext from '@/context/dataset-detail' -import type { CrawlOptions, CustomFile, FullDocumentDetail } from '@/models/datasets' -import type { MetadataType } from '@/service/datasets' -import { fetchDocumentDetail } from '@/service/datasets' +import type { CrawlOptions, CustomFile } from '@/models/datasets' import Loading from '@/app/components/base/loading' import StepTwo from '@/app/components/datasets/create/step-two' @@ -16,6 +14,7 @@ import AppUnavailable from '@/app/components/base/app-unavailable' import { useDefaultModel } from '@/app/components/header/account-setting/model-provider-page/hooks' import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations' import type { NotionPage } from '@/models/common' +import { useDocumentDetail, useInvalidDocumentDetailKey } from '@/service/knowledge/use-document' type DocumentSettingsProps = { datasetId: string @@ -26,15 +25,23 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => { const { t } = useTranslation() const router = useRouter() const [isShowSetAPIKey, { setTrue: showSetAPIKey, setFalse: hideSetAPIkey }] = useBoolean() - const [hasError, setHasError] = useState(false) const { indexingTechnique, dataset } = useContext(DatasetDetailContext) const { data: embeddingsDefaultModel } = useDefaultModel(ModelTypeEnum.textEmbedding) - const saveHandler = () => router.push(`/datasets/${datasetId}/documents/${documentId}`) + const invalidDocumentDetail = useInvalidDocumentDetailKey() + const saveHandler = () => { + invalidDocumentDetail() + router.push(`/datasets/${datasetId}/documents/${documentId}`) + } const cancelHandler = () => router.back() - const [documentDetail, setDocumentDetail] = useState(null) + const { data: documentDetail, error } = useDocumentDetail({ + datasetId, + documentId, + params: { metadata: 'without' }, + }) + const currentPage = useMemo(() => { return { workspace_id: documentDetail?.data_source_info.notion_workspace_id, @@ -44,23 +51,8 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => { type: documentDetail?.data_source_type, } }, [documentDetail]) - useEffect(() => { - (async () => { - try { - const detail = await fetchDocumentDetail({ - datasetId, - documentId, - params: { metadata: 'without' as MetadataType }, - }) - setDocumentDetail(detail) - } - catch (e) { - setHasError(true) - } - })() - }, [datasetId, documentId]) - if (hasError) + if (error) return return ( @@ -85,7 +77,7 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => { websiteCrawlProvider={documentDetail.data_source_info?.provider} websiteCrawlJobId={documentDetail.data_source_info?.job_id} crawlOptions={documentDetail.data_source_info as unknown as CrawlOptions} - indexingType={indexingTechnique || ''} + indexingType={indexingTechnique} isSetting documentDetail={documentDetail} files={[documentDetail.data_source_info.upload_file as CustomFile]} diff --git a/web/context/dataset-detail.ts b/web/context/dataset-detail.ts index b880c97dc1a144..198de381a8b4ef 100644 --- a/web/context/dataset-detail.ts +++ b/web/context/dataset-detail.ts @@ -1,7 +1,9 @@ import { createContext, useContext, useContextSelector } from 'use-context-selector' import type { DataSet } from '@/models/datasets' +import type { IndexingType } from '@/app/components/datasets/create/step-two' + type DatasetDetailContextValue = { - indexingTechnique?: string + indexingTechnique?: IndexingType dataset?: DataSet mutateDatasetRes?: () => void } diff --git a/web/hooks/use-metadata.ts b/web/hooks/use-metadata.ts index 6a4965f2bfc25e..5d1d86c20e7250 100644 --- a/web/hooks/use-metadata.ts +++ b/web/hooks/use-metadata.ts @@ -1,7 +1,7 @@ 'use client' import { useTranslation } from 'react-i18next' import { formatFileSize, formatNumber, formatTime } from '@/utils/format' -import type { DocType } from '@/models/datasets' +import { type DocType, ProcessMode } from '@/models/datasets' import useTimestamp from '@/hooks/use-timestamp' export type inputType = 'input' | 'select' | 'textarea' @@ -250,7 +250,7 @@ export const useMetadataMap = (): MetadataMap => { subFieldsMap: { 'dataset_process_rule.mode': { label: t(`${fieldPrefix}.technicalParameters.segmentSpecification`), - render: value => value === 'automatic' ? (t('datasetDocuments.embedding.automatic') as string) : (t('datasetDocuments.embedding.custom') as string), + render: value => value === ProcessMode.general ? (t('datasetDocuments.embedding.custom') as string) : (t('datasetDocuments.embedding.hierarchical') as string), }, 'dataset_process_rule.rules.segmentation.max_tokens': { label: t(`${fieldPrefix}.technicalParameters.segmentLength`), diff --git a/web/models/datasets.ts b/web/models/datasets.ts index 56a5d51ca17fa4..9d4768b67c2c45 100644 --- a/web/models/datasets.ts +++ b/web/models/datasets.ts @@ -353,7 +353,7 @@ export type NotionPage = { } export type ProcessRule = { - mode: ChildChunkType | 'hierarchical' + mode: ProcessMode rules: Rules } @@ -391,6 +391,7 @@ export type FullDocumentDetail = SimpleDocumentDetail & { doc_metadata?: DocMetadata | null segment_count: number dataset_process_rule: PrecessRule + document_process_rule: ProcessRule [key: string]: any } @@ -656,3 +657,8 @@ export type UpdateDocumentBatchParams = { documentId?: string documentIds?: string[] | string } + +export type BatchImportResponse = { + job_id: string + job_status: string +} diff --git a/web/service/datasets.ts b/web/service/datasets.ts index 2a30cc685a1bd0..87f4e3a63858de 100644 --- a/web/service/datasets.ts +++ b/web/service/datasets.ts @@ -5,7 +5,6 @@ import type { CreateDocumentReq, DataSet, DataSetListResponse, - DocumentDetailResponse, DocumentListResponse, ErrorDocsResponse, ExternalAPIDeleteResponse, @@ -149,10 +148,6 @@ export const fetchIndexingStatusBatch: Fetcher(`/datasets/${datasetId}/batch/${batchId}/indexing-status`, {}) } -export const fetchDocumentDetail: Fetcher = ({ datasetId, documentId, params }) => { - return get(`/datasets/${datasetId}/documents/${documentId}`, { params }) -} - export const renameDocumentName: Fetcher = ({ datasetId, documentId, name }) => { return post(`/datasets/${datasetId}/documents/${documentId}/rename`, { body: { name }, @@ -175,15 +170,6 @@ export const modifyDocMetadata: Fetcher(`/datasets/${datasetId}/documents/${documentId}/metadata`, { body }) } -// apis for segments in a document -export const segmentBatchImport: Fetcher<{ job_id: string; job_status: string }, { url: string; body: FormData }> = ({ url, body }) => { - return post<{ job_id: string; job_status: string }>(url, { body }, { bodyStringify: false, deleteContentType: true }) -} - -export const checkSegmentBatchImportProgress: Fetcher<{ job_id: string; job_status: string }, { jobID: string }> = ({ jobID }) => { - return get<{ job_id: string; job_status: string }>(`/datasets/batch_import_status/${jobID}`) -} - // hit testing export const hitTesting: Fetcher = ({ datasetId, queryText, retrieval_model }) => { return post(`/datasets/${datasetId}/hit-testing`, { body: { query: queryText, retrieval_model } }) diff --git a/web/service/knowledge/use-document.ts b/web/service/knowledge/use-document.ts index 6682f557df751a..2b9981f22f32eb 100644 --- a/web/service/knowledge/use-document.ts +++ b/web/service/knowledge/use-document.ts @@ -4,7 +4,8 @@ import { } from '@tanstack/react-query' import { del, get, patch } from '../base' import { useInvalid } from '../use-base' -import type { SimpleDocumentDetail, UpdateDocumentBatchParams } from '@/models/datasets' +import type { MetadataType } from '../datasets' +import type { DocumentDetailResponse, SimpleDocumentDetail, UpdateDocumentBatchParams } from '@/models/datasets' import { DocumentActionType } from '@/models/datasets' import type { CommonResponse } from '@/models/common' @@ -92,3 +93,32 @@ export const useSyncWebsite = () => { }, }) } + +const useDocumentDetailKey = [NAME_SPACE, 'documentDetail'] +export const useDocumentDetail = (payload: { + datasetId: string + documentId: string + params: { metadata: MetadataType } +}) => { + const { datasetId, documentId, params } = payload + return useQuery({ + queryKey: [...useDocumentDetailKey, 'withoutMetaData', datasetId, documentId], + queryFn: () => get(`/datasets/${datasetId}/documents/${documentId}`, { params }), + }) +} + +export const useDocumentMetadata = (payload: { + datasetId: string + documentId: string + params: { metadata: MetadataType } +}) => { + const { datasetId, documentId, params } = payload + return useQuery({ + queryKey: [...useDocumentDetailKey, 'withMetaData', datasetId, documentId], + queryFn: () => get(`/datasets/${datasetId}/documents/${documentId}`, { params }), + }) +} + +export const useInvalidDocumentDetailKey = () => { + return useInvalid(useDocumentDetailKey) +} diff --git a/web/service/knowledge/use-segment.ts b/web/service/knowledge/use-segment.ts index 8461135acafd75..749bbf2d28615a 100644 --- a/web/service/knowledge/use-segment.ts +++ b/web/service/knowledge/use-segment.ts @@ -1,7 +1,15 @@ import { useMutation, useQuery } from '@tanstack/react-query' import { del, get, patch, post } from '../base' import type { CommonResponse } from '@/models/common' -import type { ChildChunkDetail, ChildSegmentsResponse, ChunkingMode, SegmentDetailModel, SegmentUpdater, SegmentsResponse } from '@/models/datasets' +import type { + BatchImportResponse, + ChildChunkDetail, + ChildSegmentsResponse, + ChunkingMode, + SegmentDetailModel, + SegmentUpdater, + SegmentsResponse, +} from '@/models/datasets' const NAME_SPACE = 'segment' @@ -139,3 +147,23 @@ export const useUpdateChildSegment = () => { }, }) } + +export const useSegmentBatchImport = () => { + return useMutation({ + mutationKey: [NAME_SPACE, 'batchImport'], + mutationFn: (payload: { url: string; body: FormData }) => { + const { url, body } = payload + return post(url, { body }, { bodyStringify: false, deleteContentType: true }) + }, + }) +} + +export const useCheckSegmentBatchImportProgress = () => { + return useMutation({ + mutationKey: [NAME_SPACE, 'batchImport', 'checkProgress'], + mutationFn: (payload: { jobID: string }) => { + const { jobID } = payload + return get(`/datasets/batch_import_status/${jobID}`) + }, + }) +}