diff --git a/api/controllers/console/datasets/datasets_document.py b/api/controllers/console/datasets/datasets_document.py index 5432c1ea560b3c..80d64c0b5499e1 100644 --- a/api/controllers/console/datasets/datasets_document.py +++ b/api/controllers/console/datasets/datasets_document.py @@ -3,23 +3,39 @@ from datetime import UTC, datetime from typing import cast +from flask import request +from flask_login import current_user # type: ignore +from flask_restful import Resource, fields, marshal, marshal_with, reqparse # type: ignore +from sqlalchemy import asc, desc +from transformers.hf_argparser import string_to_bool # type: ignore +from werkzeug.exceptions import Forbidden, NotFound + import services from controllers.console import api from controllers.console.app.error import ( - ProviderModelCurrentlyNotSupportError, ProviderNotInitializeError, - ProviderQuotaExceededError) -from controllers.console.datasets.error import (ArchivedDocumentImmutableError, - DocumentAlreadyFinishedError, - DocumentIndexingError, - IndexingEstimateError, - InvalidActionError, - InvalidMetadataError) -from controllers.console.wraps import (account_initialization_required, - cloud_edition_billing_resource_check, - setup_required) -from core.errors.error import (LLMBadRequestError, - ModelCurrentlyNotSupportError, - ProviderTokenNotInitError, QuotaExceededError) + ProviderModelCurrentlyNotSupportError, + ProviderNotInitializeError, + ProviderQuotaExceededError, +) +from controllers.console.datasets.error import ( + ArchivedDocumentImmutableError, + DocumentAlreadyFinishedError, + DocumentIndexingError, + IndexingEstimateError, + InvalidActionError, + InvalidMetadataError, +) +from controllers.console.wraps import ( + account_initialization_required, + cloud_edition_billing_resource_check, + setup_required, +) +from core.errors.error import ( + LLMBadRequestError, + ModelCurrentlyNotSupportError, + ProviderTokenNotInitError, + QuotaExceededError, +) from core.indexing_runner import IndexingRunner from core.model_manager import ModelManager from core.model_runtime.entities.model_entities import ModelType @@ -27,25 +43,18 @@ from core.rag.extractor.entity.extract_setting import ExtractSetting from extensions.ext_database import db from extensions.ext_redis import redis_client -from fields.document_fields import (dataset_and_document_fields, - document_fields, document_status_fields, - document_with_segments_fields) -from flask import request -from flask_login import current_user # type: ignore -from flask_restful import (Resource, fields, marshal, # type: ignore - marshal_with, reqparse) +from fields.document_fields import ( + dataset_and_document_fields, + document_fields, + document_status_fields, + document_with_segments_fields, +) from libs.login import login_required -from models import (Dataset, DatasetProcessRule, Document, DocumentSegment, - UploadFile) +from models import Dataset, DatasetProcessRule, Document, DocumentSegment, UploadFile from services.dataset_service import DatasetService, DocumentService -from services.entities.knowledge_entities.knowledge_entities import \ - KnowledgeConfig -from sqlalchemy import asc, desc +from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig from tasks.add_document_to_index_task import add_document_to_index_task -from tasks.remove_document_from_index_task import \ - remove_document_from_index_task -from transformers.hf_argparser import string_to_bool # type: ignore -from werkzeug.exceptions import Forbidden, NotFound +from tasks.remove_document_from_index_task import remove_document_from_index_task class DocumentResource(Resource): diff --git a/api/controllers/console/workspace/account.py b/api/controllers/console/workspace/account.py index 7b3c4014c9dc4c..2361005304694d 100644 --- a/api/controllers/console/workspace/account.py +++ b/api/controllers/console/workspace/account.py @@ -1,29 +1,29 @@ import datetime import pytz +from flask import request +from flask_login import current_user # type: ignore +from flask_restful import Resource, fields, marshal_with, reqparse # type: ignore + from configs import dify_config from constants.languages import supported_language from controllers.console import api from controllers.console.workspace.error import ( - AccountAlreadyInitedError, CurrentPasswordIncorrectError, - InvalidAccountDeletionCodeError, InvalidInvitationCodeError, - RepeatPasswordNotMatchError) -from controllers.console.wraps import (account_initialization_required, - enterprise_license_required, - setup_required) + AccountAlreadyInitedError, + CurrentPasswordIncorrectError, + InvalidAccountDeletionCodeError, + InvalidInvitationCodeError, + RepeatPasswordNotMatchError, +) +from controllers.console.wraps import account_initialization_required, enterprise_license_required, setup_required from extensions.ext_database import db from fields.member_fields import account_fields -from flask import request -from flask_login import current_user # type: ignore -from flask_restful import (Resource, fields, marshal_with, # type: ignore - reqparse) from libs.helper import TimestampField, timezone from libs.login import login_required from models import AccountIntegrate, InvitationCode from services.account_service import AccountService from services.billing_service import BillingService -from services.errors.account import \ - CurrentPasswordIncorrectError as ServiceCurrentPasswordIncorrectError +from services.errors.account import CurrentPasswordIncorrectError as ServiceCurrentPasswordIncorrectError class AccountInitApi(Resource): diff --git a/api/core/file/file_manager.py b/api/core/file/file_manager.py index 8fe87b4834ec39..32eb204506242f 100644 --- a/api/core/file/file_manager.py +++ b/api/core/file/file_manager.py @@ -4,11 +4,13 @@ from configs import dify_config from core.helper import ssrf_proxy -from core.model_runtime.entities import (AudioPromptMessageContent, - DocumentPromptMessageContent, - ImagePromptMessageContent, - MultiModalPromptMessageContent, - VideoPromptMessageContent) +from core.model_runtime.entities import ( + AudioPromptMessageContent, + DocumentPromptMessageContent, + ImagePromptMessageContent, + MultiModalPromptMessageContent, + VideoPromptMessageContent, +) from extensions.ext_storage import storage from . import helpers diff --git a/api/core/indexing_runner.py b/api/core/indexing_runner.py index 1996ffeeaa89c7..83ba848fc9954e 100644 --- a/api/core/indexing_runner.py +++ b/api/core/indexing_runner.py @@ -8,9 +8,12 @@ import uuid from typing import Any, Optional, cast +from flask import current_app +from flask_login import current_user +from sqlalchemy.orm.exc import ObjectDeletedError + from configs import dify_config -from core.entities.knowledge_entities import (IndexingEstimate, PreviewDetail, - QAPreviewDetail) +from core.entities.knowledge_entities import IndexingEstimate, PreviewDetail, QAPreviewDetail from core.errors.error import ProviderTokenNotInitError from core.model_manager import ModelInstance, ModelManager from core.model_runtime.entities.model_entities import ModelType @@ -20,25 +23,22 @@ from core.rag.extractor.entity.extract_setting import ExtractSetting from core.rag.index_processor.constant.index_type import IndexType from core.rag.index_processor.index_processor_base import BaseIndexProcessor -from core.rag.index_processor.index_processor_factory import \ - IndexProcessorFactory +from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from core.rag.models.document import ChildDocument, Document from core.rag.splitter.fixed_text_splitter import ( - EnhanceRecursiveCharacterTextSplitter, FixedRecursiveCharacterTextSplitter) + EnhanceRecursiveCharacterTextSplitter, + FixedRecursiveCharacterTextSplitter, +) from core.rag.splitter.text_splitter import TextSplitter from core.tools.utils.web_reader_tool import get_image_upload_file_ids from extensions.ext_database import db from extensions.ext_redis import redis_client from extensions.ext_storage import storage -from flask import current_app -from flask_login import current_user from libs import helper -from models.dataset import ChildChunk, Dataset, DatasetProcessRule +from models.dataset import ChildChunk, Dataset, DatasetProcessRule, DocumentSegment from models.dataset import Document as DatasetDocument -from models.dataset import DocumentSegment from models.model import UploadFile from services.feature_service import FeatureService -from sqlalchemy.orm.exc import ObjectDeletedError class IndexingRunner: @@ -293,15 +293,15 @@ def indexing_estimate( process_rule=processing_rule.to_dict(), tenant_id=current_user.current_tenant_id, doc_language=doc_language, - preview=True + preview=True, ) total_segments += len(documents) for document in documents: if len(preview_texts) < 10: if doc_form and doc_form == "qa_model": - preview_detail = QAPreviewDetail(question=document.page_content, - answer=document.metadata.get("answer") - ) + preview_detail = QAPreviewDetail( + question=document.page_content, answer=document.metadata.get("answer") + ) preview_texts.append(preview_detail) else: preview_detail = PreviewDetail(content=document.page_content) @@ -324,9 +324,7 @@ def indexing_estimate( db.session.delete(image_file) if doc_form and doc_form == "qa_model": - return IndexingEstimate( - total_segments=total_segments * 20, qa_preview=preview_texts, preview=[] - ) + return IndexingEstimate(total_segments=total_segments * 20, qa_preview=preview_texts, preview=[]) return IndexingEstimate(total_segments=total_segments, preview=preview_texts) def _extract( @@ -545,7 +543,7 @@ def _load( with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: futures = [] for i in range(0, len(documents), chunk_size): - chunk_documents = documents[i: i + chunk_size] + chunk_documents = documents[i : i + chunk_size] futures.append( executor.submit( self._process_chunk, diff --git a/api/core/rag/datasource/retrieval_service.py b/api/core/rag/datasource/retrieval_service.py index f6076b16491be8..568517c0ea6d36 100644 --- a/api/core/rag/datasource/retrieval_service.py +++ b/api/core/rag/datasource/retrieval_service.py @@ -1,6 +1,8 @@ import threading from typing import Optional +from flask import Flask, current_app + from core.rag.data_post_processor.data_post_processor import DataPostProcessor from core.rag.datasource.keyword.keyword_factory import Keyword from core.rag.datasource.vdb.vector_factory import Vector @@ -10,10 +12,8 @@ from core.rag.rerank.rerank_type import RerankMode from core.rag.retrieval.retrieval_methods import RetrievalMethod from extensions.ext_database import db -from flask import Flask, current_app -from models.dataset import ChildChunk, Dataset +from models.dataset import ChildChunk, Dataset, DocumentSegment from models.dataset import Document as DatasetDocument -from models.dataset import DocumentSegment from services.external_knowledge_service import ExternalDatasetService default_retrieval_model = { diff --git a/api/core/rag/docstore/dataset_docstore.py b/api/core/rag/docstore/dataset_docstore.py index 68e316241e6c4e..60ecdf670ed11f 100644 --- a/api/core/rag/docstore/dataset_docstore.py +++ b/api/core/rag/docstore/dataset_docstore.py @@ -1,12 +1,13 @@ from collections.abc import Sequence from typing import Any, Optional +from sqlalchemy import func + from core.model_manager import ModelManager from core.model_runtime.entities.model_entities import ModelType from core.rag.models.document import Document from extensions.ext_database import db from models.dataset import ChildChunk, Dataset, DocumentSegment -from sqlalchemy import func class DatasetDocumentStore: diff --git a/api/core/rag/index_processor/processor/parent_child_index_processor.py b/api/core/rag/index_processor/processor/parent_child_index_processor.py index 6a668421b84fc1..7ff15b9f4c86d5 100644 --- a/api/core/rag/index_processor/processor/parent_child_index_processor.py +++ b/api/core/rag/index_processor/processor/parent_child_index_processor.py @@ -14,8 +14,7 @@ from extensions.ext_database import db from libs import helper from models.dataset import ChildChunk, Dataset, DocumentSegment -from services.entities.knowledge_entities.knowledge_entities import ( - ParentMode, Rule) +from services.entities.knowledge_entities.knowledge_entities import ParentMode, Rule class ParentChildIndexProcessor(BaseIndexProcessor): @@ -124,9 +123,7 @@ def clean(self, dataset: Dataset, node_ids: Optional[list[str]], with_keywords: vector.delete() if delete_child_chunks: - db.session.query(ChildChunk).filter( - ChildChunk.dataset_id == dataset.id - ).delete() + db.session.query(ChildChunk).filter(ChildChunk.dataset_id == dataset.id).delete() db.session.commit() def retrieve( diff --git a/api/core/rag/index_processor/processor/qa_index_processor.py b/api/core/rag/index_processor/processor/qa_index_processor.py index d7c64acc6235bd..6535d4626117f8 100644 --- a/api/core/rag/index_processor/processor/qa_index_processor.py +++ b/api/core/rag/index_processor/processor/qa_index_processor.py @@ -7,6 +7,9 @@ from typing import Optional import pandas as pd +from flask import Flask, current_app +from werkzeug.datastructures import FileStorage + from core.llm_generator.llm_generator import LLMGenerator from core.rag.cleaner.clean_processor import CleanProcessor from core.rag.datasource.retrieval_service import RetrievalService @@ -16,11 +19,9 @@ from core.rag.index_processor.index_processor_base import BaseIndexProcessor from core.rag.models.document import Document from core.tools.utils.text_processing_utils import remove_leading_symbols -from flask import Flask, current_app from libs import helper from models.dataset import Dataset from services.entities.knowledge_entities.knowledge_entities import Rule -from werkzeug.datastructures import FileStorage class QAIndexProcessor(BaseIndexProcessor): @@ -69,15 +70,17 @@ def transform(self, documents: list[Document], **kwargs) -> list[Document]: split_documents.append(document_node) all_documents.extend(split_documents) if preview: - self._format_qa_document(current_app._get_current_object(), - kwargs.get("tenant_id"), - all_documents[0], - all_qa_documents, - kwargs.get("doc_language", "English")) + self._format_qa_document( + current_app._get_current_object(), + kwargs.get("tenant_id"), + all_documents[0], + all_qa_documents, + kwargs.get("doc_language", "English"), + ) else: for i in range(0, len(all_documents), 10): threads = [] - sub_documents = all_documents[i: i + 10] + sub_documents = all_documents[i : i + 10] for doc in sub_documents: document_format_thread = threading.Thread( target=self._format_qa_document, diff --git a/api/core/rag/models/document.py b/api/core/rag/models/document.py index 9d9319a0e26ab6..f314dcc3151acd 100644 --- a/api/core/rag/models/document.py +++ b/api/core/rag/models/document.py @@ -2,20 +2,7 @@ from collections.abc import Sequence from typing import Any, Optional -from pydantic import BaseModel - - -class ChildDocument(BaseModel): - """Class for storing a piece of text and associated metadata.""" - - page_content: str - - vector: Optional[list[float]] = None - - """Arbitrary metadata about the page content (e.g., source, relationships to other - documents, etc.). - """ - metadata: dict = {} +from pydantic import BaseModel, Field class ChildDocument(BaseModel): diff --git a/api/libs/helper.py b/api/libs/helper.py index 93004a6a876d1d..eaa4efdb714355 100644 --- a/api/libs/helper.py +++ b/api/libs/helper.py @@ -84,8 +84,7 @@ def timestamp_value(timestamp): raise ValueError return int_timestamp except ValueError: - error = "{timestamp} is not a valid timestamp.".format( - timestamp=timestamp) + error = "{timestamp} is not a valid timestamp.".format(timestamp=timestamp) raise ValueError(error) @@ -154,8 +153,7 @@ def timezone(timezone_string): if timezone_string and timezone_string in available_timezones(): return timezone_string - error = "{timezone_string} is not a valid timezone.".format( - timezone_string=timezone_string) + error = "{timezone_string} is not a valid timezone.".format(timezone_string=timezone_string) raise ValueError(error) @@ -211,31 +209,26 @@ def generate_token( account_email = account.email if account else email if account_id: - old_token = cls._get_current_token_for_account( - account_id, token_type) + old_token = cls._get_current_token_for_account(account_id, token_type) if old_token: if isinstance(old_token, bytes): old_token = old_token.decode("utf-8") cls.revoke_token(old_token, token_type) token = str(uuid.uuid4()) - token_data = {"account_id": account_id, - "email": account_email, "token_type": token_type} + token_data = {"account_id": account_id, "email": account_email, "token_type": token_type} if additional_data: token_data.update(additional_data) - expiry_minutes = dify_config.model_dump().get( - f"{token_type.upper()}_TOKEN_EXPIRY_MINUTES") + expiry_minutes = dify_config.model_dump().get(f"{token_type.upper()}_TOKEN_EXPIRY_MINUTES") if expiry_minutes is None: - raise ValueError( - f"Expiry minutes for {token_type} token is not set") + raise ValueError(f"Expiry minutes for {token_type} token is not set") token_key = cls._get_token_key(token, token_type) expiry_time = int(expiry_minutes * 60) redis_client.setex(token_key, expiry_time, json.dumps(token_data)) if account_id: - cls._set_current_token_for_account( - account_id, token, token_type, expiry_minutes) + cls._set_current_token_for_account(account_id, token, token_type, expiry_minutes) return token @@ -253,8 +246,7 @@ def get_token_data(cls, token: str, token_type: str) -> Optional[dict[str, Any]] key = cls._get_token_key(token, token_type) token_data_json = redis_client.get(key) if token_data_json is None: - logging.warning( - f"{token_type} token {token} not found with key {key}") + logging.warning(f"{token_type} token {token} not found with key {key}") return None token_data: Optional[dict[str, Any]] = json.loads(token_data_json) return token_data diff --git a/api/models/dataset.py b/api/models/dataset.py index a7d0e63fb7f1a9..e309d4002f9de3 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -11,13 +11,13 @@ from json import JSONDecodeError from typing import Any, cast +from sqlalchemy import func +from sqlalchemy.dialects.postgresql import JSONB + from configs import dify_config from core.rag.retrieval.retrieval_methods import RetrievalMethod from extensions.ext_storage import storage -from services.entities.knowledge_entities.knowledge_entities import ( - ParentMode, Rule) -from sqlalchemy import func -from sqlalchemy.dialects.postgresql import JSONB +from services.entities.knowledge_entities.knowledge_entities import ParentMode, Rule from .account import Account from .engine import db @@ -622,7 +622,7 @@ def get_sign_content(self): # Reconstruct the text with signed URLs offset = 0 for start, end, signed_url in signed_urls: - text = text[: start + offset] + signed_url + text[end + offset:] + text = text[: start + offset] + signed_url + text[end + offset :] offset += len(signed_url) - (end - start) return text diff --git a/api/models/model.py b/api/models/model.py index 44c379bbc16029..e37f91e806c742 100644 --- a/api/models/model.py +++ b/api/models/model.py @@ -18,7 +18,7 @@ from core.file.tool_file_parser import ToolFileParser from libs.helper import generate_string from models.enums import CreatedByRole -from models.workflow import WorkflowRunStatus, Workflow +from models.workflow import Workflow, WorkflowRunStatus from .account import Account, Tenant from .engine import db diff --git a/api/schedule/mail_clean_document_notify_task.py b/api/schedule/mail_clean_document_notify_task.py index 9078d782530e24..abe6887555b1f6 100644 --- a/api/schedule/mail_clean_document_notify_task.py +++ b/api/schedule/mail_clean_document_notify_task.py @@ -3,8 +3,9 @@ import click from celery import shared_task -from extensions.ext_mail import mail from flask import render_template + +from extensions.ext_mail import mail from models.dataset import DatasetAutoDisableLog @@ -18,9 +19,7 @@ def send_document_clean_notify_task(): if not mail.is_inited(): return - logging.info( - click.style("Start send document clean notify mail", fg="green") - ) + logging.info(click.style("Start send document clean notify mail", fg="green")) start_at = time.perf_counter() # send document clean notify mail @@ -33,9 +32,7 @@ def send_document_clean_notify_task(): end_at = time.perf_counter() logging.info( - click.style( - "Send document clean notify mail succeeded: latency: {}".format(end_at - start_at), fg="green" - ) + click.style("Send document clean notify mail succeeded: latency: {}".format(end_at - start_at), fg="green") ) except Exception: logging.exception("Send invite member mail to {} failed".format(to)) diff --git a/api/services/account_service.py b/api/services/account_service.py index f787ef64e2c52f..50aeeccfd76590 100644 --- a/api/services/account_service.py +++ b/api/services/account_service.py @@ -8,6 +8,10 @@ from hashlib import sha256 from typing import Any, Optional, cast +from pydantic import BaseModel +from sqlalchemy import func +from werkzeug.exceptions import Unauthorized + from configs import dify_config from constants.languages import language_timezone_mapping, languages from events.tenant_event import tenant_was_created @@ -17,34 +21,41 @@ from libs.passport import PassportService from libs.password import compare_password, hash_password, valid_password from libs.rsa import generate_key_pair -from models.account import (Account, AccountIntegrate, AccountStatus, Tenant, - TenantAccountJoin, TenantAccountJoinRole, - TenantAccountRole, TenantStatus) +from models.account import ( + Account, + AccountIntegrate, + AccountStatus, + Tenant, + TenantAccountJoin, + TenantAccountJoinRole, + TenantAccountRole, + TenantStatus, +) from models.model import DifySetup -from pydantic import BaseModel from services.billing_service import BillingService -from services.errors.account import (AccountAlreadyInTenantError, - AccountLoginError, AccountNotFoundError, - AccountNotLinkTenantError, - AccountPasswordError, - AccountRegisterError, - CannotOperateSelfError, - CurrentPasswordIncorrectError, - InvalidActionError, - LinkAccountIntegrateError, - MemberNotInTenantError, NoPermissionError, - RoleAlreadyAssignedError, - TenantNotFoundError) +from services.errors.account import ( + AccountAlreadyInTenantError, + AccountLoginError, + AccountNotFoundError, + AccountNotLinkTenantError, + AccountPasswordError, + AccountRegisterError, + CannotOperateSelfError, + CurrentPasswordIncorrectError, + InvalidActionError, + LinkAccountIntegrateError, + MemberNotInTenantError, + NoPermissionError, + RoleAlreadyAssignedError, + TenantNotFoundError, +) from services.errors.workspace import WorkSpaceNotAllowedCreateError from services.feature_service import FeatureService -from sqlalchemy import func from tasks.delete_account_task import delete_account_task -from tasks.mail_account_deletion_task import \ - send_account_deletion_verification_code +from tasks.mail_account_deletion_task import send_account_deletion_verification_code from tasks.mail_email_code_login import send_email_code_login_mail_task from tasks.mail_invite_member_task import send_invite_member_mail_task from tasks.mail_reset_password_task import send_reset_password_mail_task -from werkzeug.exceptions import Unauthorized class TokenPair(BaseModel): @@ -253,8 +264,7 @@ def generate_account_deletion_verification_code(account: Account) -> tuple[str, def send_account_deletion_verification_email(cls, account: Account, code: str): language, email = account.interface_language, account.email if cls.email_code_account_deletion_rate_limiter.is_rate_limited(email): - from controllers.console.auth.error import \ - EmailCodeAccountDeletionRateLimitExceededError + from controllers.console.auth.error import EmailCodeAccountDeletionRateLimitExceededError raise EmailCodeAccountDeletionRateLimitExceededError() @@ -389,8 +399,7 @@ def send_reset_password_email( raise ValueError("Email must be provided.") if cls.reset_password_rate_limiter.is_rate_limited(account_email): - from controllers.console.auth.error import \ - PasswordResetRateLimitExceededError + from controllers.console.auth.error import PasswordResetRateLimitExceededError raise PasswordResetRateLimitExceededError() @@ -426,8 +435,7 @@ def send_email_code_login_email( if email is None: raise ValueError("Email must be provided.") if cls.email_code_login_rate_limiter.is_rate_limited(email): - from controllers.console.auth.error import \ - EmailCodeLoginRateLimitExceededError + from controllers.console.auth.error import EmailCodeLoginRateLimitExceededError raise EmailCodeLoginRateLimitExceededError() diff --git a/api/services/billing_service.py b/api/services/billing_service.py index d2034643503d2b..0fc619f54287a8 100644 --- a/api/services/billing_service.py +++ b/api/services/billing_service.py @@ -2,10 +2,10 @@ from typing import Optional import httpx +from tenacity import retry, retry_if_exception_type, stop_before_delay, wait_fixed + from extensions.ext_database import db from models.account import TenantAccountJoin, TenantAccountRole -from tenacity import (retry, retry_if_exception_type, stop_before_delay, - wait_fixed) class BillingService: diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index 63ddc28e52aa55..b25807ed846af4 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -6,6 +6,10 @@ import uuid from typing import Any, Optional +from flask_login import current_user # type: ignore +from sqlalchemy import func +from werkzeug.exceptions import NotFound + from configs import dify_config from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError from core.model_manager import ModelManager @@ -16,21 +20,32 @@ from events.document_event import document_was_deleted from extensions.ext_database import db from extensions.ext_redis import redis_client -from flask_login import current_user # type: ignore from libs import helper from models.account import Account, TenantAccountRole -from models.dataset import (AppDatasetJoin, ChildChunk, Dataset, - DatasetAutoDisableLog, DatasetCollectionBinding, - DatasetPermission, DatasetPermissionEnum, - DatasetProcessRule, DatasetQuery, Document, - DocumentSegment, ExternalKnowledgeBindings) +from models.dataset import ( + AppDatasetJoin, + ChildChunk, + Dataset, + DatasetAutoDisableLog, + DatasetCollectionBinding, + DatasetPermission, + DatasetPermissionEnum, + DatasetProcessRule, + DatasetQuery, + Document, + DocumentSegment, + ExternalKnowledgeBindings, +) from models.model import UploadFile from models.source import DataSourceOauthBinding from services.entities.knowledge_entities.knowledge_entities import ( - ChildChunkUpdateArgs, KnowledgeConfig, RetrievalModel, SegmentUpdateArgs) + ChildChunkUpdateArgs, + KnowledgeConfig, + RetrievalModel, + SegmentUpdateArgs, +) from services.errors.account import InvalidActionError, NoPermissionError -from services.errors.chunk import (ChildChunkDeleteIndexError, - ChildChunkIndexingError) +from services.errors.chunk import ChildChunkDeleteIndexError, ChildChunkIndexingError from services.errors.dataset import DatasetNameDuplicateError from services.errors.document import DocumentIndexingError from services.errors.file import FileNotExistsError @@ -38,25 +53,19 @@ from services.feature_service import FeatureModel, FeatureService from services.tag_service import TagService from services.vector_service import VectorService -from sqlalchemy import func from tasks.batch_clean_document_task import batch_clean_document_task from tasks.clean_notion_document_task import clean_notion_document_task from tasks.deal_dataset_vector_index_task import deal_dataset_vector_index_task from tasks.delete_segment_from_index_task import delete_segment_from_index_task -from tasks.disable_segment_from_index_task import \ - disable_segment_from_index_task -from tasks.disable_segments_from_index_task import \ - disable_segments_from_index_task +from tasks.disable_segment_from_index_task import disable_segment_from_index_task +from tasks.disable_segments_from_index_task import disable_segments_from_index_task from tasks.document_indexing_task import document_indexing_task from tasks.document_indexing_update_task import document_indexing_update_task -from tasks.duplicate_document_indexing_task import \ - duplicate_document_indexing_task +from tasks.duplicate_document_indexing_task import duplicate_document_indexing_task from tasks.enable_segments_to_index_task import enable_segments_to_index_task from tasks.recover_document_indexing_task import recover_document_indexing_task from tasks.retry_document_indexing_task import retry_document_indexing_task -from tasks.sync_website_document_indexing_task import \ - sync_website_document_indexing_task -from werkzeug.exceptions import NotFound +from tasks.sync_website_document_indexing_task import sync_website_document_indexing_task class DatasetService: diff --git a/api/services/vector_service.py b/api/services/vector_service.py index 2e6da74a9e8dc4..6698e6e7188223 100644 --- a/api/services/vector_service.py +++ b/api/services/vector_service.py @@ -5,13 +5,11 @@ from core.rag.datasource.keyword.keyword_factory import Keyword from core.rag.datasource.vdb.vector_factory import Vector from core.rag.index_processor.constant.index_type import IndexType -from core.rag.index_processor.index_processor_factory import \ - IndexProcessorFactory +from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from core.rag.models.document import Document from extensions.ext_database import db -from models.dataset import ChildChunk, Dataset, DatasetProcessRule +from models.dataset import ChildChunk, Dataset, DatasetProcessRule, DocumentSegment from models.dataset import Document as DatasetDocument -from models.dataset import DocumentSegment from services.entities.knowledge_entities.knowledge_entities import ParentMode diff --git a/api/services/workflow_service.py b/api/services/workflow_service.py index ea22e226232d56..71e1c726af6687 100644 --- a/api/services/workflow_service.py +++ b/api/services/workflow_service.py @@ -5,31 +5,31 @@ from typing import Optional, cast from uuid import uuid4 -from core.app.apps.advanced_chat.app_config_manager import \ - AdvancedChatAppConfigManager +from core.app.apps.advanced_chat.app_config_manager import AdvancedChatAppConfigManager from core.app.apps.workflow.app_config_manager import WorkflowAppConfigManager from core.model_runtime.utils.encoders import jsonable_encoder from core.variables import Variable -from core.workflow.entities.node_entities import (NodeRunMetadataKey, - NodeRunResult) +from core.workflow.entities.node_entities import NodeRunMetadataKey, NodeRunResult from core.workflow.errors import WorkflowNodeRunFailedError from core.workflow.nodes import NodeType from core.workflow.nodes.base.entities import BaseNodeData from core.workflow.nodes.base.node import BaseNode from core.workflow.nodes.enums import ErrorStrategy from core.workflow.nodes.event import RunCompletedEvent -from core.workflow.nodes.node_mapping import (LATEST_VERSION, - NODE_TYPE_CLASSES_MAPPING) +from core.workflow.nodes.node_mapping import LATEST_VERSION, NODE_TYPE_CLASSES_MAPPING from core.workflow.workflow_entry import WorkflowEntry -from events.app_event import (app_draft_workflow_was_synced, - app_published_workflow_was_updated) +from events.app_event import app_draft_workflow_was_synced, app_published_workflow_was_updated from extensions.ext_database import db from models.account import Account from models.enums import CreatedByRole from models.model import App, AppMode -from models.workflow import (Workflow, WorkflowNodeExecution, - WorkflowNodeExecutionStatus, - WorkflowNodeExecutionTriggeredFrom, WorkflowType) +from models.workflow import ( + Workflow, + WorkflowNodeExecution, + WorkflowNodeExecutionStatus, + WorkflowNodeExecutionTriggeredFrom, + WorkflowType, +) from services.errors.app import WorkflowHashNotEqualError from services.workflow.workflow_converter import WorkflowConverter diff --git a/api/tasks/add_document_to_index_task.py b/api/tasks/add_document_to_index_task.py index af724ceb6b6e67..9a172b2d9d8157 100644 --- a/api/tasks/add_document_to_index_task.py +++ b/api/tasks/add_document_to_index_task.py @@ -4,16 +4,15 @@ import click from celery import shared_task # type: ignore +from werkzeug.exceptions import NotFound + from core.rag.index_processor.constant.index_type import IndexType -from core.rag.index_processor.index_processor_factory import \ - IndexProcessorFactory +from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from core.rag.models.document import ChildDocument, Document from extensions.ext_database import db from extensions.ext_redis import redis_client -from models.dataset import DatasetAutoDisableLog +from models.dataset import DatasetAutoDisableLog, DocumentSegment from models.dataset import Document as DatasetDocument -from models.dataset import DocumentSegment -from werkzeug.exceptions import NotFound @shared_task(queue="dataset") diff --git a/api/tasks/batch_clean_document_task.py b/api/tasks/batch_clean_document_task.py index af69a96fa4ef57..9e81fefaa75d0e 100644 --- a/api/tasks/batch_clean_document_task.py +++ b/api/tasks/batch_clean_document_task.py @@ -3,8 +3,8 @@ import click from celery import shared_task -from core.rag.index_processor.index_processor_factory import \ - IndexProcessorFactory + +from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from core.tools.utils.web_reader_tool import get_image_upload_file_ids from extensions.ext_database import db from extensions.ext_storage import storage diff --git a/api/tasks/delete_account_task.py b/api/tasks/delete_account_task.py index 0f4ae8421fb32e..884918dc8b43a5 100644 --- a/api/tasks/delete_account_task.py +++ b/api/tasks/delete_account_task.py @@ -1,6 +1,7 @@ import logging from celery import shared_task # type: ignore + from extensions.ext_database import db from models.account import Account from services.billing_service import BillingService diff --git a/api/tasks/disable_segments_from_index_task.py b/api/tasks/disable_segments_from_index_task.py index c16e520ca1b9e0..cea67382fa643b 100644 --- a/api/tasks/disable_segments_from_index_task.py +++ b/api/tasks/disable_segments_from_index_task.py @@ -3,13 +3,12 @@ import click from celery import shared_task -from core.rag.index_processor.index_processor_factory import \ - IndexProcessorFactory + +from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from extensions.ext_database import db from extensions.ext_redis import redis_client -from models.dataset import Dataset +from models.dataset import Dataset, DocumentSegment from models.dataset import Document as DatasetDocument -from models.dataset import DocumentSegment @shared_task(queue="dataset") diff --git a/api/tasks/enable_segments_to_index_task.py b/api/tasks/enable_segments_to_index_task.py index 8311380d1a66e0..ecafc99a94b433 100644 --- a/api/tasks/enable_segments_to_index_task.py +++ b/api/tasks/enable_segments_to_index_task.py @@ -4,15 +4,14 @@ import click from celery import shared_task + from core.rag.index_processor.constant.index_type import IndexType -from core.rag.index_processor.index_processor_factory import \ - IndexProcessorFactory +from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from core.rag.models.document import ChildDocument, Document from extensions.ext_database import db from extensions.ext_redis import redis_client -from models.dataset import Dataset +from models.dataset import Dataset, DocumentSegment from models.dataset import Document as DatasetDocument -from models.dataset import DocumentSegment @shared_task(queue="dataset")