Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Common: Support import the exist file in object storage. #3677

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions api/apps/storage_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import os

from flask import request
from flask_login import login_required, current_user

from api.db import FileType
from api.db.services.file_service import FileService
from api.utils import get_uuid
from api.utils.api_utils import get_json_result
from api.utils.api_utils import server_error_response, get_data_error_result
from api.utils.file_utils import filename_type
from rag.utils.storage_factory import STORAGE_IMPL


@manager.route('/list', methods=['GET'])
def list_storage_keys():
dir = request.args.get("dir", "/")
try:
def filter_dir_and_exist_file(key):
if key["size"] == 0:
return False
parent_dir = os.path.dirname(key["name"])
file_name = os.path.basename(key["name"])
dirs = list(filter(None, parent_dir.split("/")))
user_root_folder = FileService.get_root_folder(current_user.id)
pf_id = user_root_folder["id"]
for dir in dirs:
exist_file = FileService.get_by_pf_id_name(id=pf_id, name=dir)
if not exist_file:
return True
pf_id = exist_file.id
exist_file = FileService.get_by_pf_id_name(id=pf_id, name=file_name)
return exist_file == None

files = STORAGE_IMPL.list(None, dir)
return get_json_result(data=list(filter(filter_dir_and_exist_file, files)))
except Exception as e:
return server_error_response(e)


@manager.route('/import', methods=['POST'])
@login_required
def import_storage_keys():
content = request.json
keys = content['keys']
is_dir = content['dir']
try:
all_keys = []
file_res = []
if is_dir:
for key in keys:
all_keys = all_keys + STORAGE_IMPL.list(bucket=None, dir=key, recursive=True)
else:
for key in keys:
all_keys.append(key)

for key in all_keys:
parent_dir = os.path.dirname(key)
file_name = os.path.basename(key)

dirs = list(filter(None, parent_dir.split("/")))
user_root_folder = FileService.get_root_folder(current_user.id)
pf_id = user_root_folder["id"]
for dir in dirs:
exist_file = FileService.get_by_pf_id_name(id=pf_id, name=dir)
if exist_file:
pf_id = exist_file.id
continue
file = FileService.insert({
"id": get_uuid(),
"parent_id": pf_id,
"tenant_id": current_user.id,
"created_by": current_user.id,
"name": dir,
"location": "",
"size": 0,
"type": FileType.FOLDER.value
})
pf_id = file.id

e, file = FileService.get_by_id(pf_id)
if not e:
return get_data_error_result(
retmsg="Can't find this folder!")

if FileService.get_by_pf_id_name(id=pf_id, name=file_name):
continue

filetype = filename_type(file_name)
location = key
file = {
"id": get_uuid(),
"parent_id": pf_id,
"tenant_id": current_user.id,
"created_by": current_user.id,
"type": filetype,
"name": file_name,
"location": location,
"size": STORAGE_IMPL.get_properties("bucket", key)["size"],
}
file = FileService.insert(file)
file_res.append(file.to_json())

return get_json_result(data=content)
except Exception as e:
return server_error_response(e)
1 change: 1 addition & 0 deletions conf/service_conf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ minio:
user: 'rag_flow'
password: 'infini_rag_flow'
host: 'minio:9000'
import_bucket: 'bucket'
es:
hosts: 'http://es01:1200'
username: 'elastic'
Expand Down
3 changes: 2 additions & 1 deletion docker/service_conf.yaml.template
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ minio:
user: '${MINIO_USER:-rag_flow}'
password: '${MINIO_PASSWORD:-infini_rag_flow}'
host: '${MINIO_HOST:-minio}:9000'
import_bucket: '${MINIO_IMPORT_BUCKET:-bucket}'
es:
hosts: 'http://${ES_HOST:-es01}:9200'
username: '${ES_USER:-elastic}'
Expand All @@ -22,7 +23,7 @@ infinity:
db_name: 'default_db'
redis:
db: 1
password: '${REDIS_PASSWORD:-infini_rag_flow}'
password: '${REDIS_PASSWORD:-infini_rag_flow}'
host: '${REDIS_HOST:-redis}:6379'

# postgres:
Expand Down
8 changes: 8 additions & 0 deletions rag/utils/azure_sas_conn.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@ def health(self):
bucket, fnm, binary = "txtxtxtxt1", "txtxtxtxt1", b"_t@@@1"
return self.conn.upload_blob(name=fnm, data=BytesIO(binary), length=len(binary))

def get_properties(self, bucket, key):
info = self.conn.get_blob_client(key).get_blob_properties()
return {"name": info.name, "size": info.size, "etag": info.etag, "owner": "None"}

def list(self, bucket, dir, recursive=True):
keys = self.conn.list_blobs(name_starts_with=dir)
return [{"name": key.name, "size": key.size, "etag": key.etag, "owner": "None"} for key in keys]

def put(self, bucket, fnm, binary):
for _ in range(3):
try:
Expand Down
8 changes: 8 additions & 0 deletions rag/utils/azure_spn_conn.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,14 @@ def health(self):
f.append_data(binary, offset=0, length=len(binary))
return f.flush_data(len(binary))

def get_properties(self, bucket, key):
info = self.conn.get_file_client(self.full_path(key)).get_file_properties()
return {"name": info.name, "size": info.size, "etag": info.etag, "owner": info.owner}

def list(self, bucket, dir, recursive=True):
paths = self.conn.get_paths(path=self.full_path(dir), recursive=recursive, max_results=10000)
return [{"name": path.name, "size": path.content_length, "etag": path.etag, "owner": path.owner} for path in paths]

def put(self, bucket, fnm, binary):
for _ in range(3):
try:
Expand Down
18 changes: 18 additions & 0 deletions rag/utils/minio_conn.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,15 @@ def __open__(self):
secret_key=settings.MINIO["password"],
secure=False
)
self.import_bucket = settings.MINIO["import_bucket"]
except Exception:
logging.exception(
"Fail to connect %s " % settings.MINIO["host"])

except Exception as e:
minio_logger.error(
"Fail to connect %s " % settings.MINIO["host"] + str(e))

def __close__(self):
del self.conn
self.conn = None
Expand All @@ -44,6 +49,19 @@ def health(self):
)
return r

def get_properties(self, bucket, key):
info = self.conn.stat_object(bucket_name=bucket, object_name=key)
return {"name": info.object_name, "size": info.size, "etag": info.etag, "owner": info.owner_name}

def list(self, bucket, dir, recursive=True):
bucket = bucket if bucket else self.import_bucket
if dir != "/":
keys = self.conn.list_objects(bucket_name=bucket, prefix=dir, recursive=recursive)
else:
keys = self.conn.list_objects(bucket_name=bucket, recursive=recursive)
data = [{"name": key.object_name, "size": key.size, "etag": key.etag, "owner": key.owner_name} for key in keys]
return data

def put(self, bucket, fnm, binary):
for _ in range(3):
try:
Expand Down
22 changes: 16 additions & 6 deletions rag/utils/s3_conn.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,19 @@
from botocore.client import Config
import time
from io import BytesIO
from rag import settings
from rag.settings import s3_logger
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm getting a message here that s3_logger does not exist in the settings.

from rag.utils import singleton

@singleton
class RAGFlowS3(object):
def __init__(self):
self.conn = None
self.endpoint = os.getenv('ENDPOINT', None)
self.access_key = os.getenv('ACCESS_KEY', None)
self.secret_key = os.getenv('SECRET_KEY', None)
self.region = os.getenv('REGION', None)
self.import_bucket = settings.S3["import_bucket"]
self.endpoint = settings.S3["endpoint"]
self.access_key = settings.S3["access_key"]
self.secret_key = settings.S3["secret_key"]
self.region = settings.S3["region"]
self.__open__()

def __open__(self):
Expand Down Expand Up @@ -69,10 +72,17 @@ def health(self):
return r

def get_properties(self, bucket, key):
return {}
info = self.conn.stat_object(bucket_name=bucket, object_name=key)
return {"name": info.key, "size": info.size, "etag": info.etag, "owner": info.owner}

def list(self, bucket, dir, recursive=True):
return []
bucket = bucket if bucket else self.import_bucket
if dir != "/":
keys = self.conn.list_objects(bucket_name=bucket, prefix=dir, recursive=recursive)
else:
keys = self.conn.list_objects(bucket_name=bucket, recursive=recursive)
data = [{"name": key.key, "size": key.size, "etag": key.etag, "owner": key.owner} for key in keys]
return data

def put(self, bucket, fnm, binary):
logging.debug(f"bucket name {bucket}; filename :{fnm}:")
Expand Down
56 changes: 56 additions & 0 deletions web/src/hooks/import-file-hooks.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import { useGetPagination } from '@/hooks/logic-hooks';
import { ResponseType } from '@/interfaces/database/base';
import fileManagerService from '@/services/file-manager-service';
import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query';
import { PaginationProps } from 'antd';

export interface IImportListResult {
name: string;
size: string;
etag: string;
owner: string;
}

export interface IListResult {
data: IImportListResult[];
pagination: PaginationProps;
setPagination: (pagination: { page: number; pageSize: number }) => void;
loading: boolean;
}

export const useFetchImportFileList = (): ResponseType<any> & IListResult => {
const { pagination } = useGetPagination();
const { data, isFetching: loading } = useQuery({
queryKey: ['getImportFiles', {}],
initialData: {},
gcTime: 0,
queryFn: async (params: any) => {
console.info(params);
const { data } = await fileManagerService.getImportFiles();
return data;
},
});

return {
...data,
pagination: { ...pagination, total: data?.data?.total },
loading,
};
};

export const useImportFiles = () => {
const queryClient = useQueryClient();
const {
data,
isPending: loading,
mutateAsync,
} = useMutation({
mutationKey: ['importFiles'],
mutationFn: async (params: { keys: string[]; dir: boolean }) => {
const { data = {} } = await fileManagerService.importFiles(params);
return data;
},
});

return { data, loading, importFiles: mutateAsync };
};
8 changes: 8 additions & 0 deletions web/src/locales/en.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,14 @@ export default {
searchKnowledgePlaceholder: 'Search',
noMoreData: 'It is all, nothing more',
},
knowledgeImport: {
importFile: 'Import file',
name: 'Name',
size: 'Size',
etag: 'Etag',
owner: 'Owner',
action: 'Action',
},
knowledgeDetails: {
dataset: 'Dataset',
testing: 'Retrieval testing',
Expand Down
8 changes: 8 additions & 0 deletions web/src/locales/zh-traditional.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,14 @@ export default {
searchKnowledgePlaceholder: '搜索',
noMoreData: 'It is all, nothing more',
},
knowledgeImport: {
importFile: '导入文件',
name: '名稱',
size: '大小',
etag: 'MD5',
owner: '創建者',
action: '操作',
},
knowledgeDetails: {
dataset: '數據集',
testing: '檢索測試',
Expand Down
8 changes: 8 additions & 0 deletions web/src/locales/zh.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,14 @@ export default {
searchKnowledgePlaceholder: '搜索',
noMoreData: '没有更多数据了',
},
knowledgeImport: {
importFile: '导入文件',
name: '名称',
size: '大小',
etag: 'MD5',
owner: '创建者',
action: '操作',
},
knowledgeDetails: {
dataset: '数据集',
testing: '检索测试',
Expand Down
27 changes: 27 additions & 0 deletions web/src/pages/file-manager/file-toolbar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
IListResult,
useFetchParentFolderList,
} from '@/hooks/file-manager-hooks';
import ImportFilesFromStorageModal from '@/pages/file-manager/import-storage-files-modal';
import {
DownOutlined,
FileTextOutlined,
Expand All @@ -26,6 +27,7 @@ import { useCallback, useMemo } from 'react';
import {
useHandleBreadcrumbClick,
useHandleDeleteFile,
useImportFilesFromStorage,
useSelectBreadcrumbItems,
} from './hooks';

Expand All @@ -50,6 +52,14 @@ const FileToolbar = ({
showMoveFileModal,
}: IProps) => {
const { t } = useTranslate('knowledgeDetails');
const { t: t_import_file } = useTranslate('knowledgeImport');
const {
visible,
hideModal,
showModal,
onCreateOk,
loading: creatingLoading,
} = useImportFilesFromStorage();
const breadcrumbItems = useSelectBreadcrumbItems();
const { handleBreadcrumbClick } = useHandleBreadcrumbClick();
const parentFolderList = useFetchParentFolderList();
Expand Down Expand Up @@ -184,7 +194,24 @@ const FileToolbar = ({
</Button>
</Dropdown>
)}

{isKnowledgeBase || (
<Button
type="primary"
icon={<PlusOutlined />}
onClick={showModal}
className={styles.topButton}
>
{t_import_file('importFile')}
</Button>
)}
</Space>
<ImportFilesFromStorageModal
loading={creatingLoading}
visible={visible}
hideModal={hideModal}
onOk={onCreateOk}
></ImportFilesFromStorageModal>
</div>
);
};
Expand Down
Loading
Loading