diff --git a/.dockerignore b/.dockerignore index bf272fbd..f40ce925 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,4 +3,10 @@ node_modules Dockerfile .dockerignore .vscode -.github \ No newline at end of file +.github + +# Other +*.config.json +*.ignore.me +ignore.me.* +*.ignore.me.* \ No newline at end of file diff --git a/.eslintrc.js b/.eslintrc.js index 38a6ef8b..196a2c6e 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -11,10 +11,10 @@ module.exports = { tsconfigRootDir: __dirname, sourceType: 'module' }, - ignorePatterns: ['**/index.d.ts', '**/global.d.ts', '**/*.test.ts', '*.js', '**/dist/**'], + ignorePatterns: ['**/index.d.ts', '**/global.d.ts', '*.js', '**/dist/**'], plugins: ['eslint-plugin-import', 'eslint-plugin-jsdoc', '@typescript-eslint'], rules: { - '@typescript-eslint/consistent-type-definitions': 'error', + '@typescript-eslint/consistent-type-definitions': ['warn', 'type'], '@typescript-eslint/member-delimiter-style': [ 'error', { @@ -100,6 +100,19 @@ module.exports = { 'no-trailing-spaces': 'error', 'no-var': 'error', 'object-shorthand': 'error', - 'prefer-const': 'warn' + 'prefer-const': 'warn', + '@typescript-eslint/explicit-member-accessibility': [ + 'warn', + { + accessibility: 'explicit', + overrides: { + accessors: 'explicit', + constructors: 'no-public', + methods: 'explicit', + properties: 'off', + parameterProperties: 'explicit' + } + } + ] } } diff --git a/.github/actions/setup-e2e/action.yml b/.github/actions/setup-e2e/action.yml new file mode 100644 index 00000000..cd0a05be --- /dev/null +++ b/.github/actions/setup-e2e/action.yml @@ -0,0 +1,51 @@ +name: Setup E2E +description: Setup E2E NLU Usage + +inputs: + lang_server_dim: + description: 'Dimension in which to run the language server' + required: false + type: 'string' + default: '25' + + lang_server_lang: + description: 'Language to download in the language server' + required: false + type: 'string' + default: 'en' + +runs: + using: 'composite' + steps: + - uses: actions/setup-node@v2 + with: + node-version-file: '.nvmrc' + cache: 'yarn' + - name: Fetch Node Packages + shell: bash + run: yarn + - name: Build + shell: bash + run: yarn build + - name: package + shell: bash + run: yarn package --linux + - name: Rename binary + id: rename_binary + shell: bash + run: | + bin_original_name=$(node -e "console.log(require('./scripts/utils/binary').getFileName())") + echo "Moving ./dist/$bin_original_name to ./nlu ..." + mv ./dist/$bin_original_name ./nlu + - name: Download language models + shell: bash + run: ./nlu lang download --lang ${{ inputs.lang_server_lang }} --dim ${{ inputs.lang_server_dim }} + - name: Start Language Server + shell: bash + run: | + ./nlu lang --dim ${{ inputs.lang_server_dim }} & + echo "Lang Server started on pid $!" + - name: Sleep + uses: jakejarvis/wait-action@master + with: + time: '15s' diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml new file mode 100644 index 00000000..0d1ab3d6 --- /dev/null +++ b/.github/workflows/bench.yml @@ -0,0 +1,34 @@ +name: Bench +on: + workflow_dispatch: {} + pull_request: + types: + - opened + - edited + - reopened + - synchronize +jobs: + benchmark: + name: Run benchmark + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@master + - uses: actions/setup-node@v1 + with: + node-version: '16.13.0' + - name: Fetch Node Packages + run: | + yarn --verbose + - name: Build + run: | + yarn build + - name: Download language models + run: | + yarn start lang download --lang fr --dim 100 + yarn start lang download --lang en --dim 100 + - name: Run Regression Test + run: | + yarn start lang --dim 100 & + sleep 15s && yarn start nlu --doc false --log-level "critical" --ducklingEnabled false --languageURL http://localhost:3100 & + sleep 25s && yarn bench --skip="clinc150" diff --git a/.github/workflows/binary.yml b/.github/workflows/binary.yml deleted file mode 100644 index 0d0a79d4..00000000 --- a/.github/workflows/binary.yml +++ /dev/null @@ -1,39 +0,0 @@ -name: Binary -on: [pull_request] -jobs: - nlu: - name: Try out binary executable file - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@master - - uses: actions/setup-node@v1 - with: - node-version: '12.13.0' - - name: Fetch Node Packages - run: | - yarn --verbose - - name: Build - run: | - yarn build - - name: package - run: | - yarn package --linux - - name: Rename binary - id: rename_binary - run: | - bin_original_name=$(node -e "console.log(require('./scripts/utils/binary').getFileName())") - echo "Moving ./dist/$bin_original_name to ./nlu ..." - mv ./dist/$bin_original_name ./nlu - - name: Download language models - run: | - ./nlu download --lang en --dim 25 - - name: Run Regression Test - run: | - - # Running in dimension 25 as we don't care about regressions here... - # We just want to make sure nothing breaks - - ./nlu lang --dim 25 & - sleep 15s && ./nlu --doc false --verbose 0 --ducklingEnabled false --languageURL http://localhost:3100 & - sleep 25s && BITFAN_LANG='en' yarn e2e -t "bpds-intent" -k diff --git a/.github/workflows/bitfan.yml b/.github/workflows/bitfan.yml deleted file mode 100644 index 50c396b1..00000000 --- a/.github/workflows/bitfan.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: Bitfan -on: [pull_request] -jobs: - nlu: - name: Run regression benchmark - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@master - - uses: actions/setup-node@v1 - with: - node-version: '12.13.0' - - name: Fetch Node Packages - run: | - yarn --verbose - - name: Build - run: | - yarn build - - name: Download language models - run: | - yarn start download --lang fr --dim 100 - yarn start download --lang en --dim 100 - - name: Run Regression Test - run: | - yarn start lang --dim 100 & - sleep 15s && yarn start nlu --doc false --verbose 0 --ducklingEnabled false --languageURL http://localhost:3100 & - sleep 25s && yarn e2e --skip="clinc150" diff --git a/.github/workflows/codestyle.yml b/.github/workflows/codestyle.yml index dab86d71..e35ff921 100644 --- a/.github/workflows/codestyle.yml +++ b/.github/workflows/codestyle.yml @@ -1,5 +1,12 @@ name: Codestyle -on: [pull_request] +on: + workflow_dispatch: {} + pull_request: + types: + - opened + - edited + - reopened + - synchronize jobs: run_prettier: name: Run Prettier on codebase @@ -9,7 +16,7 @@ jobs: uses: actions/checkout@master - uses: actions/setup-node@v1 with: - node-version: '12.13.0' + node-version: '16.13.0' - name: Install Dependencies run: | yarn @@ -24,7 +31,7 @@ jobs: uses: actions/checkout@master - uses: actions/setup-node@v1 with: - node-version: '12.13.0' + node-version: '16.13.0' - name: Install Dependencies run: | yarn diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml new file mode 100644 index 00000000..1e832a0b --- /dev/null +++ b/.github/workflows/e2e.yml @@ -0,0 +1,150 @@ +name: E2E +on: + workflow_dispatch: {} + pull_request: + types: + - opened + - edited + - reopened + - synchronize +jobs: + fs: + name: file system + runs-on: ubuntu-latest + services: + duckling: + image: rasa/duckling + ports: + - 8000:8000 + steps: + - name: Checkout code + uses: actions/checkout@master + - name: Setup E2E + uses: ./.github/actions/setup-e2e + - name: Start NLU Server + run: | + ./nlu \ + --log-level "critical" \ + --ducklingURL http://localhost:8000 \ + --languageURL http://localhost:3100 \ + --modelTransferEnabled \ + --port 3200 & + nlu_pid=$! + echo "NLU Server started on pid $nlu_pid" + - name: Sleep + uses: jakejarvis/wait-action@master + with: + time: '15s' + - name: Run Tests + run: | + yarn e2e --nlu-endpoint http://localhost:3200 + db: + name: database + runs-on: ubuntu-latest + services: + duckling: + image: rasa/duckling + ports: + - 8000:8000 + postgres: + # Docker Hub image + image: postgres + env: + POSTGRES_DB: botpress-nlu-1 + POSTGRES_PASSWORD: postgres + POSTGRES_USER: postgres + POSTGRES_PORT: 5432 + ports: + - 5432:5432 + # Set health checks to wait until postgres has started + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + steps: + - name: Checkout code + uses: actions/checkout@master + - name: Setup E2E + uses: ./.github/actions/setup-e2e + - name: Start NLU Server + run: | + ./nlu \ + --log-level "critical" \ + --ducklingURL http://localhost:8000 \ + --languageURL http://localhost:3100 \ + --modelTransferEnabled \ + --port 3201 \ + --dbURL postgres://postgres:postgres@localhost:5432/botpress-nlu-1 & \ + nlu_pid=$! + echo "NLU Server started on pid $nlu_pid" + - name: Sleep + uses: jakejarvis/wait-action@master + with: + time: '15s' + - name: Run Tests + run: | + yarn e2e --nlu-endpoint http://localhost:3201 + cluster: + name: cluster + runs-on: ubuntu-latest + services: + duckling: + image: rasa/duckling + ports: + - 8000:8000 + postgres: + # Docker Hub image + image: postgres + env: + POSTGRES_DB: botpress-nlu-2 + POSTGRES_PASSWORD: postgres + POSTGRES_USER: postgres + POSTGRES_PORT: 5432 + ports: + - 5432:5432 + # Set health checks to wait until postgres has started + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + steps: + - name: Checkout code + uses: actions/checkout@master + - name: Setup E2E + uses: ./.github/actions/setup-e2e + - name: Start First NLU Server on port 3202 + run: | + ./nlu \ + --maxTraining 0 \ + --maxLinting 0 \ + --log-level "critical" \ + --ducklingURL http://localhost:8000 \ + --languageURL http://localhost:3100 \ + --modelTransferEnabled \ + --port 3202 \ + --dbURL postgres://postgres:postgres@localhost:5432/botpress-nlu-2 & \ + nlu_pid1=$! + echo "NLU Server started on pid $nlu_pid1" + - name: Sleep + uses: jakejarvis/wait-action@master + with: + time: '5s' + - name: Start Second NLU Server on port 3203 + run: | + ./nlu \ + --log-level "critical" \ + --ducklingURL http://localhost:8000 \ + --languageURL http://localhost:3100 \ + --port 3203 \ + --dbURL postgres://postgres:postgres@localhost:5432/botpress-nlu-2 & \ + nlu_pid2=$! + echo "NLU Server started on pid $nlu_pid2" + - name: Sleep + uses: jakejarvis/wait-action@master + with: + time: '15s' + - name: Run Tests + run: | + yarn e2e --nlu-endpoint http://localhost:3202 diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 51a46226..4cc571c9 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -1,5 +1,12 @@ name: Tests -on: [pull_request] +on: + workflow_dispatch: {} + pull_request: + types: + - opened + - edited + - reopened + - synchronize jobs: unit: name: Run unit tests diff --git a/.gitignore b/.gitignore index 6ceba9aa..850cf899 100644 --- a/.gitignore +++ b/.gitignore @@ -72,7 +72,11 @@ jspm_packages/ .env.test .env*.local +# Typescript build output dist/ - -config.json \ No newline at end of file +# Other +*.config.json +*.ignore.me +ignore.me.* +*.ignore.me.* \ No newline at end of file diff --git a/.nvmrc b/.nvmrc index 47c0a98a..58a4133d 100644 --- a/.nvmrc +++ b/.nvmrc @@ -1 +1 @@ -12.13.0 +16.13.0 diff --git a/.vscode/launch.json b/.vscode/launch.json index d37e1f49..57f72e51 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -6,25 +6,25 @@ "configurations": [ { "type": "pwa-node", - "runtimeVersion": "12.13.0", + "runtimeVersion": "16.13.0", "request": "launch", "name": "Debug NLU Server", "skipFiles": ["/**"], - "program": "${workspaceFolder}/packages/nlu-cli/dist/index.js", - "console": "internalConsole", + "program": "${workspaceFolder}/packages/nlu-bin/dist/index.js", + "console": "integratedTerminal", "sourceMaps": true, "autoAttachChildProcesses": true, - "args": ["--config=./config.json"], + "args": ["--config=./nlu.config.json"], "outFiles": ["${workspaceRoot}/packages/**/*.js", "${workspaceRoot}/node_modules/**/*.js"] }, { "type": "pwa-node", - "runtimeVersion": "12.13.0", + "runtimeVersion": "16.13.0", "request": "launch", "name": "Debug Lang Server", "skipFiles": ["/**"], - "program": "${workspaceFolder}/packages/nlu-cli/dist/index.js", - "console": "internalConsole", + "program": "${workspaceFolder}/packages/nlu-bin/dist/index.js", + "console": "integratedTerminal", "sourceMaps": true, "autoAttachChildProcesses": true, "args": ["lang", "--dim=100"], @@ -32,44 +32,68 @@ }, { "type": "pwa-node", - "runtimeVersion": "12.13.0", + "runtimeVersion": "16.13.0", "request": "launch", "name": "Debug Download Model", "skipFiles": ["/**"], - "program": "${workspaceFolder}/packages/nlu-cli/dist/index.js", - "console": "internalConsole", + "program": "${workspaceFolder}/packages/nlu-bin/dist/index.js", + "console": "integratedTerminal", "sourceMaps": true, "autoAttachChildProcesses": true, "args": ["download", "--lang=en", "--dim=100"], "outFiles": ["${workspaceRoot}/packages/**/*.js", "${workspaceRoot}/node_modules/**/*.js"] }, { - "type": "node", - "runtimeVersion": "12.13.0", + "type": "pwa-node", "request": "launch", - "name": "Debug Jest Current File", + "name": "Debug All Tests", "program": "${workspaceFolder}/node_modules/.bin/jest", - "args": ["${fileBasenameNoExtension}"], - "console": "internalConsole", - "internalConsoleOptions": "neverOpen", - "windows": { - "program": "${workspaceFolder}/node_modules/jest/bin/jest" - } + "console": "integratedTerminal", + "args": ["-c", "${workspaceFolder}/jest.config.js", "--runInBand", "--no-cache", "${input:testFilter}"], + "sourceMaps": true }, { "type": "pwa-node", - "runtimeVersion": "12.13.0", + "runtimeVersion": "16.13.0", + "request": "launch", + "name": "Debug E2E Tests", + "skipFiles": ["/**"], + "program": "${workspaceFolder}/packages/nlu-e2e/dist/index.js", + "console": "integratedTerminal", + "sourceMaps": true, + "autoAttachChildProcesses": true, + "args": ["--nlu-endpoint", "${input:nluEndpoint}", "--pattern", "${input:testFilter}"], + "outFiles": ["${workspaceRoot}/packages/**/*.js", "${workspaceRoot}/node_modules/**/*.js"] + }, + { + "name": "Run TS-Node", + "type": "node", "request": "launch", - "name": "Debug App tests", - "cwd": "${workspaceFolder}/packages/nlu-cli", - "console": "internalConsole", - "args": [ - "${workspaceFolder}/packages/nlu-cli/node_modules/.bin/jest", - "--rootDir=${workspaceFolder}/packages/nlu-cli", - "-c=${workspaceFolder}/packages/nlu-cli/jest.config.js", - "--detectOpenHandles", - "src/nlu-server/app.test.ts" - ] + "runtimeExecutable": "node", + "runtimeArgs": ["--nolazy", "-r", "ts-node/register/transpile-only"], + "args": ["${input:tsfile}"], + "cwd": "${workspaceRoot}", + "console": "integratedTerminal", + "skipFiles": ["/**", "node_modules/**"] + } + ], + "inputs": [ + { + "id": "testFilter", + "description": "filter when running tests", + "type": "promptString", + "default": "" + }, + { + "id": "nluEndpoint", + "description": "nlu server endpoint for e2e", + "type": "promptString", + "default": "http://localhost:3200" + }, + { + "id": "tsfile", + "description": "ts file to run with ts-node", + "type": "promptString" } ] } diff --git a/Dockerfile b/Dockerfile index 649e649b..a9749185 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM node:12.13.1 +FROM node:16.13.0 ADD . /nlu diff --git a/gulpfile.js b/gulpfile.js index dbd7b54f..bf081200 100644 --- a/gulpfile.js +++ b/gulpfile.js @@ -1,6 +1,5 @@ const gulp = require('gulp') const package = require('./scripts/gulp.package') -const config = require('./scripts/gulp.config') const release = require('./scripts/gulp.release') gulp.task('default', (cb) => { @@ -8,7 +7,6 @@ gulp.task('default', (cb) => { Development Cheat Sheet ================================== yarn cmd package Packages Application in binaries - yarn cmd config Upsert new NLU server config file yarn cmd bump Bump version and update change log yarn cmd changelog Print change log `) @@ -16,6 +14,5 @@ gulp.task('default', (cb) => { }) gulp.task('package', package.package) -gulp.task('config', config.upsertConfigFile) gulp.task('bump', release.bumpVersion) gulp.task('changelog', release.printChangeLog) diff --git a/jest.config.js b/jest.config.js new file mode 100644 index 00000000..7f77978a --- /dev/null +++ b/jest.config.js @@ -0,0 +1,8 @@ +module.exports = { + testEnvironment: 'node', + testPathIgnorePatterns: ['node_modules'], + rootDir: '.', + roots: ['/packages/'], + moduleFileExtensions: ['js'], + collectCoverage: false +} diff --git a/package.json b/package.json index 605482cf..2e2ec22b 100644 --- a/package.json +++ b/package.json @@ -8,10 +8,10 @@ "type": "git", "url": "https://github.com/botpress/nlu.git" }, - "bin": "./packages/nlu-cli/dist/index.js", + "bin": "./packages/nlu-bin/dist/index.js", "pkg": { "scripts": [ - "./packages/nlu-cli/dist/**/*.js", + "./packages/nlu-bin/dist/**/*.js", "node_modules/@botpress/*/dist/**/*.js", "node_modules/pg-format/lib/*.js" ], @@ -22,26 +22,29 @@ ] }, "scripts": { - "postinstall": "yarn cmd config", "cmd": "yarn run gulp", - "dev": "cross-env tsnd --transpile-only --watch ./packages/**/*.ts ./packages/nlu-cli/src/index.ts", - "start": "cross-env node ./packages/nlu-cli/dist/index.js", - "build": "tsc --build", + "dev": "cross-env tsnd --transpile-only --exit-child --watch ./packages/**/*.ts ./packages/nlu-bin/src/index.ts", + "start": "cross-env node ./packages/nlu-bin/dist/index.js", + "build": "tsc --build ./tsconfig.json", + "clean": "yarn workspaces -p run clean && rimraf ./dist && rimraf ./node_modules", + "rebuild": "yarn clean && yarn && yarn build --force", "postbuild": "yarn workspace @botpress/nlu-server postbuild", "package": "yarn cmd package", - "e2e": "yarn workspace e2e start", - "test": "yarn workspaces run test", + "bench": "yarn workspace @botpress/nlu-bench start", + "e2e": "yarn workspace @botpress/nlu-e2e start", + "test": "jest -c ./jest.config.js --runInBand --detectOpenHandles", "prettier": "cross-env prettier --check './packages/**/*.ts' '!**/*.d.ts'", - "prettier-fix": "cross-env prettier --write './packages/**/*.ts' '!**/*.d.ts'", + "prettier:fix": "cross-env prettier --write './packages/**/*.ts' '!**/*.d.ts'", "eslint": "cross-env eslint packages/ --ext .ts", - "eslint-fix": "cross-env eslint --fix packages/ --ext .ts" + "eslint:fix": "cross-env eslint --fix packages/ --ext .ts" }, "devDependencies": { "conventional-changelog": "^3.1.24", "gulp": "^4.0.0", "gulp-cli": "^2.0.1", - "pkg": "^4.5.1", + "pkg": "~5.2.0", "prepend-file": "^2.0.0", + "rimraf": "^3.0.2", "semver": "^7.3.5", "ts-node": "10.4.0", "ts-node-dev": "^1.1.6", @@ -51,4 +54,4 @@ "workspaces": [ "packages/*" ] -} +} \ No newline at end of file diff --git a/packages/bitfan/babel.config.js b/packages/bitfan/babel.config.js deleted file mode 100644 index cdf925d0..00000000 --- a/packages/bitfan/babel.config.js +++ /dev/null @@ -1,14 +0,0 @@ -module.exports = { - presets: [ - [ - "@babel/preset-env", - { - targets: { - node: "current", - }, - }, - ], - "@babel/preset-typescript", - ], - plugins: ["@babel/plugin-proposal-optional-chaining"], -}; diff --git a/packages/bitfan/datasets/slot/bpds/F-train.en.ds.json b/packages/bitfan/datasets/slot/bpds/F-train.en.ds.json index 9bfdc5ad..02a51e27 100644 --- a/packages/bitfan/datasets/slot/bpds/F-train.en.ds.json +++ b/packages/bitfan/datasets/slot/bpds/F-train.en.ds.json @@ -5,15 +5,11 @@ "variables": [ { "name": "flight-number", - "types": [ - "city" - ] + "types": ["city"] }, { "name": "arrival-city", - "types": [ - "city" - ] + "types": ["city"] } ], "enums": [ @@ -22,9 +18,7 @@ "values": [ { "name": "Paris", - "synonyms": [ - "city of lights" - ] + "synonyms": ["city of lights"] }, { "name": "Sydney", @@ -551,7 +545,7 @@ "synonyms": [] }, { - "name": "La Paz ", + "name": "La Paz", "synonyms": [] }, { @@ -583,7 +577,7 @@ "synonyms": [] }, { - "name": "Porto-Novo ", + "name": "Porto-Novo", "synonyms": [] }, { @@ -599,11 +593,11 @@ "synonyms": [] }, { - "name": "Pretoria ", + "name": "Pretoria", "synonyms": [] }, { - "name": "Putrajaya ", + "name": "Putrajaya", "synonyms": [] }, { @@ -941,4 +935,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/packages/bitfan/datasets/slot/bpds/G-train.en.ds.json b/packages/bitfan/datasets/slot/bpds/G-train.en.ds.json index 791e8561..82edd2df 100644 --- a/packages/bitfan/datasets/slot/bpds/G-train.en.ds.json +++ b/packages/bitfan/datasets/slot/bpds/G-train.en.ds.json @@ -5,15 +5,11 @@ "variables": [ { "name": "from_city", - "types": [ - "city" - ] + "types": ["city"] }, { "name": "to_city", - "types": [ - "city" - ] + "types": ["city"] } ], "enums": [ @@ -22,9 +18,7 @@ "values": [ { "name": "Paris", - "synonyms": [ - "city of lights" - ] + "synonyms": ["city of lights"] }, { "name": "Sydney", @@ -551,7 +545,7 @@ "synonyms": [] }, { - "name": "La Paz ", + "name": "La Paz", "synonyms": [] }, { @@ -583,7 +577,7 @@ "synonyms": [] }, { - "name": "Porto-Novo ", + "name": "Porto-Novo", "synonyms": [] }, { @@ -599,11 +593,11 @@ "synonyms": [] }, { - "name": "Pretoria ", + "name": "Pretoria", "synonyms": [] }, { - "name": "Putrajaya ", + "name": "Putrajaya", "synonyms": [] }, { @@ -950,4 +944,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/packages/bitfan/jest.config.js b/packages/bitfan/jest.config.js deleted file mode 100644 index 129961bf..00000000 --- a/packages/bitfan/jest.config.js +++ /dev/null @@ -1,10 +0,0 @@ -module.exports = { - preset: "ts-jest", - globals: { - "ts-jest": { - babelConfig: true, - diagnostics: false, - }, - }, - testEnvironment: "node", -}; diff --git a/packages/bitfan/package.json b/packages/bitfan/package.json index 0e9b480c..755dd893 100644 --- a/packages/bitfan/package.json +++ b/packages/bitfan/package.json @@ -9,7 +9,8 @@ "types": "./src/bitfan.d.ts", "scripts": { "build": "tsc --build", - "test": "jest" + "test": "jest --roots ./dist", + "clean": "rimraf ./dist && rimraf ./node_modules" }, "publishConfig": { "registry": "https://registry.npmjs.org" @@ -36,12 +37,11 @@ "@types/fs-extra": "^9.0.1", "@types/jest": "^26.0.14", "@types/lodash": "^4.14.161", - "@types/node": "^12.13.0", + "@types/node": "^16.11.10", "@types/recursive-readdir": "^2.2.0", "@types/seedrandom": "^2.4.28", "babel-jest": "^24.0.0", "jest": "^24.0.0", - "ts-jest": "^24.3.0", - "typescript": "^3.9.10" + "typescript": "^5.0.4" } } diff --git a/packages/bitfan/src/bitfan.d.ts b/packages/bitfan/src/bitfan.d.ts index 2595796e..f5d88b82 100644 --- a/packages/bitfan/src/bitfan.d.ts +++ b/packages/bitfan/src/bitfan.d.ts @@ -158,14 +158,14 @@ export namespace tables { /** * @description Collection of problems with an engine to solve them */ -export interface Solution { +export type Solution = { name: string problems: Problem[] engine: Engine cb?: ResultViewer } -export interface UnsupervisedSolution { +export type UnsupervisedSolution = { name: string problems: UnsupervisedProblem[] engine: UnsupervisedEngine @@ -181,7 +181,7 @@ export type MultiLabel = 'multi-intent' | 'multi-intent-topic' */ export type ProblemType = SingleLabel | MultiLabel | 'spell' | 'lang' | 'slot' -interface Dic { +type Dic = { [key: string]: T } @@ -199,14 +199,14 @@ export type Label = T extends SingleLabel ? { name: string; start: number; end: number }[] : string -export interface Candidate { +export type Candidate = { elected: Elected confidence: number } export type Elected = T extends 'slot' ? { name: string; start: number; end: number } : string -interface BaseProblem { +type BaseProblem = { name: string type: ProblemType testSet: DataSet @@ -227,7 +227,7 @@ export type UnsupervisedProblem = BaseProblem & { export type ProgressCb = (p: number) => void -interface Predictor { +type Predictor = { predict: (testSet: DataSet, progress: ProgressCb) => Promise[]> } @@ -242,7 +242,7 @@ export type UnsupervisedEngine = Predictor & { train: (corpus: Document[], seed: number, progress: ProgressCb) => Promise } -export interface Prediction { +export type Prediction = { text: string candidates: Candidate[] label: Label @@ -253,7 +253,7 @@ export interface Prediction { * @returns A number between 0 and 1 where 0 means that the test has failed. * For multi-class problems, this number will often be, neither 1 or 0, but a fraction. */ -export interface Criteria { +export type Criteria = { name: string eval(res: Prediction): number } @@ -272,21 +272,21 @@ export type ResultViewer = ( export type DatasetViewer = (...datasets: DataSet[]) => void -export interface ScoreInfo { +export type ScoreInfo = { metric: string seed: number problem: string score: number } -export interface PerformanceReport { +export type PerformanceReport = { generatedOn: Date scores: ScoreInfo[] } export type RegressionStatus = 'success' | 'regression' | 'tolerated-regression' -export interface RegressionReason { +export type RegressionReason = { status: RegressionStatus metric: string problem: string @@ -296,12 +296,12 @@ export interface RegressionReason { allowedRegression: number } -export interface ComparisonReport { +export type ComparisonReport = { status: RegressionStatus reasons: RegressionReason[] } -export interface CompareOptions { +export type CompareOptions = { toleranceByMetric: Dic } @@ -309,7 +309,7 @@ export interface CompareOptions { * @description Function that compute a performance score given the whole results. * @returns A performance score between 0 and 1. */ -export interface Metric { +export type Metric = { name: string eval: (res: Result[]) => number } @@ -321,13 +321,13 @@ export type DataSet = { samples: Sample[] } & (T extends 'slot' ? VariablesDef : {}) -export interface VariablesDef { +export type VariablesDef = { variables: Variable[] patterns: Pattern[] enums: Enum[] } -export interface Document { +export type Document = { name: string type: ProblemType lang: string @@ -335,7 +335,7 @@ export interface Document { } export type FileType = 'document' | 'dataset' -interface FileDef { +type FileDef = { name: string type: T fileType: F @@ -345,24 +345,24 @@ interface FileDef { export type DataSetDef = FileDef export type DocumentDef = FileDef -interface Variable { +type Variable = { name: string types: string[] } -interface Enum { +type Enum = { name: string values: { name: string; synonyms: string[] }[] fuzzy: number } -interface Pattern { +type Pattern = { name: string regex: string case_sensitive: boolean } -interface Sample { +type Sample = { text: string label: Label } diff --git a/packages/bitfan/src/builtin/engines/intent.ts b/packages/bitfan/src/builtin/engines/intent.ts index 419ca11d..b753b7be 100644 --- a/packages/bitfan/src/builtin/engines/intent.ts +++ b/packages/bitfan/src/builtin/engines/intent.ts @@ -16,7 +16,7 @@ export class BpIntentEngine implements sdk.Engine<'intent'> { this._stanProvider = new StanProvider(bpEndpoint) } - train(trainSet: sdk.DataSet<'intent'>, seed: number, progress: sdk.ProgressCb) { + public train(trainSet: sdk.DataSet<'intent'>, seed: number, progress: sdk.ProgressCb) { const allLabels = _(trainSet.samples) .flatMap((r) => r.label) .uniq() @@ -55,7 +55,7 @@ export class BpIntentEngine implements sdk.Engine<'intent'> { return candidates } - async predict(testSet: sdk.DataSet<'intent'>, progress: sdk.ProgressCb) { + public async predict(testSet: sdk.DataSet<'intent'>, progress: sdk.ProgressCb) { const results: sdk.Prediction<'intent'>[] = [] let done = 0 diff --git a/packages/bitfan/src/builtin/engines/slot.ts b/packages/bitfan/src/builtin/engines/slot.ts index 1cbd72d2..89641a83 100644 --- a/packages/bitfan/src/builtin/engines/slot.ts +++ b/packages/bitfan/src/builtin/engines/slot.ts @@ -16,7 +16,7 @@ export class BpSlotEngine implements sdk.Engine<'slot'> { this._stanProvider = new StanProvider(bpEndpoint) } - train(trainSet: sdk.DataSet<'slot'>, seed: number, progress: sdk.ProgressCb) { + public train(trainSet: sdk.DataSet<'slot'>, seed: number, progress: sdk.ProgressCb) { const { enums, patterns, lang, samples, variables } = trainSet const utterances = samples.map((r) => { @@ -85,7 +85,7 @@ export class BpSlotEngine implements sdk.Engine<'slot'> { } } - async predict(testSet: sdk.DataSet<'slot'>, progress: sdk.ProgressCb) { + public async predict(testSet: sdk.DataSet<'slot'>, progress: sdk.ProgressCb) { const results: sdk.Prediction<'slot'>[] = [] let done = 0 diff --git a/packages/bitfan/src/builtin/engines/spell.ts b/packages/bitfan/src/builtin/engines/spell.ts index b09950b4..d50654de 100644 --- a/packages/bitfan/src/builtin/engines/spell.ts +++ b/packages/bitfan/src/builtin/engines/spell.ts @@ -16,7 +16,7 @@ export class BpSpellingEngine implements sdk.UnsupervisedEngine<'spell'> { this._stanProvider = new StanProvider(bpEndpoint) } - train(corpus: sdk.Document[], seed: number, progress: sdk.ProgressCb) { + public train(corpus: sdk.Document[], seed: number, progress: sdk.ProgressCb) { if (!corpus.length) { throw new Error('Botpress Spelling Engine needs at least one document for training.') } @@ -43,7 +43,7 @@ export class BpSpellingEngine implements sdk.UnsupervisedEngine<'spell'> { }) } - async predict(testSet: sdk.DataSet<'spell'>, progress: sdk.ProgressCb) { + public async predict(testSet: sdk.DataSet<'spell'>, progress: sdk.ProgressCb) { const results: sdk.Prediction<'spell'>[] = [] let done = 0 diff --git a/packages/bitfan/src/builtin/engines/topic.ts b/packages/bitfan/src/builtin/engines/topic.ts index e622e617..0dca0860 100644 --- a/packages/bitfan/src/builtin/engines/topic.ts +++ b/packages/bitfan/src/builtin/engines/topic.ts @@ -14,7 +14,7 @@ export class BpTopicEngine implements sdk.Engine<'topic'> { this._stanProvider = new StanProvider(bpEndpoint) } - train(trainSet: sdk.DataSet<'topic'>, seed: number, progress: sdk.ProgressCb) { + public train(trainSet: sdk.DataSet<'topic'>, seed: number, progress: sdk.ProgressCb) { const samples = trainSet.samples const allTopics = _(samples) @@ -47,7 +47,7 @@ export class BpTopicEngine implements sdk.Engine<'topic'> { }) } - async predict(testSet: sdk.DataSet<'topic'>, progress: sdk.ProgressCb) { + public async predict(testSet: sdk.DataSet<'topic'>, progress: sdk.ProgressCb) { const results: sdk.Prediction<'topic'>[] = [] let done = 0 diff --git a/packages/bitfan/src/builtin/metrics/oos.ts b/packages/bitfan/src/builtin/metrics/oos.ts index ce887c56..3ccd9428 100644 --- a/packages/bitfan/src/builtin/metrics/oos.ts +++ b/packages/bitfan/src/builtin/metrics/oos.ts @@ -4,14 +4,14 @@ import { isOOS } from '../../builtin/labels' import { mostConfident } from '../election/mostConfident' -interface ConfusionMatrix { +type ConfusionMatrix = { truePos: number falsePos: number trueNeg: number falseNeg: number } -interface OOSPerformance { +type OOSPerformance = { oosAccuracy: number oosPrecision: number oosRecall: number diff --git a/packages/bitfan/src/services/bp-provider/stan-provider.ts b/packages/bitfan/src/services/bp-provider/stan-provider.ts index c1b13507..2f2b7080 100644 --- a/packages/bitfan/src/services/bp-provider/stan-provider.ts +++ b/packages/bitfan/src/services/bp-provider/stan-provider.ts @@ -1,4 +1,4 @@ -import { PredictOutput, TrainingState, TrainInput, Client } from '@botpress/nlu-client' +import { PredictOutput, TrainingState, TrainInput, Client, http } from '@botpress/nlu-client' import _ from 'lodash' import { sleep } from '../../utils' @@ -19,7 +19,7 @@ export class StanProvider { if (data.success) { return data.session } - throw new Error(data.error) + throw this._deserializeError(data.error) } private async _waitForTraining(modelId: string, loggingCb?: (time: number, progress: number) => void) { @@ -59,14 +59,21 @@ export class StanProvider { this._modelId = modelId return this._waitForTraining(modelId, loggingCb) } - throw new Error(data.error) + throw this._deserializeError(data.error) } public async predict(utterances: string[]): Promise { const predOutput = await this._client.predict(APP_ID, this._modelId ?? '', { utterances }) if (!predOutput.success) { - throw new Error(`An error occured at prediction: ${predOutput.error}.`) + throw new Error(`An error occured at prediction: ${predOutput.error.message}.`) } return predOutput.predictions } + + private _deserializeError = (error: http.NLUError): Error => { + const { message, stack } = error + const err = new Error(message) + err.stack = stack + return err + } } diff --git a/packages/bitfan/src/solution.ts b/packages/bitfan/src/solution.ts index 5498910a..99f0296b 100644 --- a/packages/bitfan/src/solution.ts +++ b/packages/bitfan/src/solution.ts @@ -37,7 +37,8 @@ const makeSolutionRunner = ( for (const problem of problems) { try { await runProblem({ problem, engine: solution.engine } as any) // TODO: refactor this - } catch (err) { + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) // eslint-disable-next-line no-console console.log( chalk.red( @@ -59,12 +60,12 @@ const makeSolutionRunner = ( } } -interface Unsupervised { +type Unsupervised = { problem: sdk.UnsupervisedProblem engine: sdk.UnsupervisedEngine } -interface Supervised { +type Supervised = { problem: sdk.Problem engine: sdk.Engine } diff --git a/packages/distributed/package.json b/packages/distributed/package.json new file mode 100644 index 00000000..aac533c5 --- /dev/null +++ b/packages/distributed/package.json @@ -0,0 +1,27 @@ +{ + "name": "@botpress/distributed", + "version": "0.0.1", + "author": "Botpress, Inc.", + "license": "AGPL-3.0", + "scripts": { + "build": "tsc --build", + "clean": "rimraf ./dist && rimraf ./node_modules" + }, + "dependencies": { + "@bpinternal/log4bot": "^0.0.4", + "lodash": "^4.17.19", + "yn": "^4.0.0", + "bluebird": "^3.7.2", + "moment": "^2.24.0", + "ms": "^2.1.1", + "nanoid": "^3.1.23", + "pg-pubsub": "^0.6.1" + }, + "devDependencies": { + "@types/lodash": "^4.14.116", + "@types/node": "^16.11.10", + "typescript": "^5.0.4" + }, + "main": "./dist/index.js", + "types": "./dist/index.d.ts" +} diff --git a/packages/locks/readme.md b/packages/distributed/readme.md similarity index 55% rename from packages/locks/readme.md rename to packages/distributed/readme.md index c1a64fcf..52f1a08f 100644 --- a/packages/locks/readme.md +++ b/packages/distributed/readme.md @@ -1,13 +1,19 @@ -# Botpress Locks +# Botpress Distributed -Contains locks and transaction queues to prevent many kinds of race conditions +Contains distributed locks and distributed queues -Race conditions can occur: +## Locks + +Locks prevent multiple types of race conditions. Race conditions can occur: - in single-threaded apps when using asynchronous code - in multi-threaded/multi-process apps - in mutli-instances apps +## Queues + +A Distributed Task Queue can run tasks on multiple instances without the need for a master slave configuration. + ## Licensing This software is protected by the same license as the [main Botpress repository](https://github.com/botpress/botpress). You can find the license file [here](https://github.com/botpress/botpress/blob/master/LICENSE). diff --git a/packages/distributed/src/index.ts b/packages/distributed/src/index.ts new file mode 100644 index 00000000..e38a84e0 --- /dev/null +++ b/packages/distributed/src/index.ts @@ -0,0 +1,2 @@ +export * as locks from './locks' +export * from './queues' diff --git a/packages/locks/src/in-mem-trx-queue.ts b/packages/distributed/src/locks/in-mem-trx-locker.ts similarity index 72% rename from packages/locks/src/in-mem-trx-queue.ts rename to packages/distributed/src/locks/in-mem-trx-locker.ts index 83953581..f2b97f51 100644 --- a/packages/locks/src/in-mem-trx-queue.ts +++ b/packages/distributed/src/locks/in-mem-trx-locker.ts @@ -1,19 +1,22 @@ import _ from 'lodash' -import { LockedTransactionQueue, Logger, Task } from './typings' +import { TransactionLocker, Logger, Transaction } from './typings' -export class InMemoryTransactionQueue implements LockedTransactionQueue { - private _tasks: Task[] = [] +/** + * For race conditions occuring because of the event loop in a single-threaded application + */ +export class InMemoryTransactionLocker implements TransactionLocker { + private _tasks: Transaction[] = [] constructor(private _logger?: Logger) {} public async initialize() {} public async teardown() {} - public runInLock(t: Task): Promise { + public runInLock(t: Transaction): Promise { this._logger?.(`Task "${t.name}" waiting.`) return new Promise((resolve, reject) => { - const mockTask: Task = { + const mockTask: Transaction = { name: t.name, cb: async () => { try { @@ -32,7 +35,7 @@ export class InMemoryTransactionQueue implements LockedTransactionQueue { }) } - private _push(t: Task) { + private _push(t: Transaction) { const first = !this._tasks.length this._tasks.unshift(t) diff --git a/packages/distributed/src/locks/index.ts b/packages/distributed/src/locks/index.ts new file mode 100644 index 00000000..5b95a718 --- /dev/null +++ b/packages/distributed/src/locks/index.ts @@ -0,0 +1,3 @@ +export * from './typings' +export { InMemoryTransactionLocker } from './in-mem-trx-locker' +export { PGTransactionLocker } from './pg-trx-locker' diff --git a/packages/locks/src/pg-trx-queue.ts b/packages/distributed/src/locks/pg-trx-locker.ts similarity index 71% rename from packages/locks/src/pg-trx-queue.ts rename to packages/distributed/src/locks/pg-trx-locker.ts index 0291e193..e0e7cc5a 100644 --- a/packages/locks/src/pg-trx-queue.ts +++ b/packages/distributed/src/locks/pg-trx-locker.ts @@ -1,14 +1,17 @@ import crypto from 'crypto' import _ from 'lodash' import { Client } from 'pg' -import { InMemoryTransactionQueue } from './in-mem-trx-queue' -import { LockedTransactionQueue, Task, Logger } from './typings' +import { InMemoryTransactionLocker } from './in-mem-trx-locker' +import { TransactionLocker, Transaction, Logger } from './typings' const TRX_LOCK_KEY = 'trx_lock' -export class PGTransactionQueue implements LockedTransactionQueue { +/** + * For race conditions occuring in distributed applications + */ +export class PGTransactionLocker implements TransactionLocker { private client: Client - private _memQueue = new InMemoryTransactionQueue() + private _memQueue = new InMemoryTransactionLocker() constructor(dbURL: string, private _logger?: Logger) { this.client = new Client(dbURL) @@ -22,8 +25,8 @@ export class PGTransactionQueue implements LockedTransactionQueue { return this.client.end() } - public async runInLock(t: Task): Promise { - this._logger?.(`Task "${t.name}" waiting.`) + public async runInLock(t: Transaction): Promise { + this._logger?.(`Trx "${t.name}" waiting.`) return this._memQueue.runInLock({ name: t.name, @@ -32,9 +35,9 @@ export class PGTransactionQueue implements LockedTransactionQueue { try { await this._waitForLock(TRX_LOCK_KEY) - this._logger?.(`Task "${t.name}" started.`) + this._logger?.(`Trx "${t.name}" started.`) x = await t.cb() - this._logger?.(`Task "${t.name}" done.`) + this._logger?.(`Trx "${t.name}" done.`) } finally { await this._releaseLock(TRX_LOCK_KEY) } diff --git a/packages/locks/src/typings.d.ts b/packages/distributed/src/locks/typings.ts similarity index 55% rename from packages/locks/src/typings.d.ts rename to packages/distributed/src/locks/typings.ts index 8d731512..62cfbe84 100644 --- a/packages/locks/src/typings.d.ts +++ b/packages/distributed/src/locks/typings.ts @@ -1,4 +1,4 @@ -export interface Task { +export type Transaction = { name: string cb: () => Promise } @@ -9,25 +9,15 @@ export interface Task { * * Does not involve to mannualy check if lock is free. */ -export interface LockedTransactionQueue { +export type TransactionLocker = { /** * Waits for a lock to be free, aquires it and runs function * @param t The async function to run inside the aquire and release lock statement * @returns A promise that resolves of rejects once the task is done or throws */ - runInLock(t: Task): Promise + runInLock(t: Transaction): Promise initialize(): Promise teardown(): Promise } export type Logger = (msg: string) => void - -/** - * For race conditions occuring because of the event loop in a single-threaded application - */ -export const makeInMemoryTrxQueue: (logger?: Logger) => LockedTransactionQueue - -/** - * For race conditions occuring in distributed applications - */ -export const makePostgresTrxQueue: (pgURI: string, logger?: Logger) => LockedTransactionQueue diff --git a/packages/distributed/src/queues/base-queue.ts b/packages/distributed/src/queues/base-queue.ts new file mode 100644 index 00000000..c1a9f03a --- /dev/null +++ b/packages/distributed/src/queues/base-queue.ts @@ -0,0 +1,160 @@ +import { Logger } from '@bpinternal/log4bot' +import Bluebird from 'bluebird' +import _ from 'lodash' +import moment from 'moment' +import { nanoid } from 'nanoid' + +import { TaskAlreadyStartedError } from './errors' +import { createTimer, InterruptTimer } from './interrupt' +import { + Task, + TaskProgress, + SafeTaskRepository, + TaskRunner, + TaskState, + TaskStatus, + TaskRepository, + QueueOptions, + TaskQueue as ITaskQueue +} from './typings' + +export abstract class BaseTaskQueue implements ITaskQueue { + private _schedulingTimmer!: InterruptTimer<[]> + protected _clusterId: string = nanoid() + + constructor( + protected _taskRepo: SafeTaskRepository, + protected _taskRunner: TaskRunner, + protected _logger: Logger, + protected _idToString: (id: TId) => string, + protected _options: QueueOptions + ) {} + + public async initialize() { + this._logger.debug(`cluster id: "${this._clusterId}"`) + await this._taskRepo.initialize() + this._schedulingTimmer = createTimer(this._runSchedulerInterrupt.bind(this), this._options.maxProgressDelay * 2) + } + + public async teardown() { + await this._taskRepo.teardown() + return this._schedulingTimmer.stop() + } + + public getLocalTaskCount = async () => { + const localTasks = await this._taskRepo.query({ cluster: this._clusterId, status: 'running' }) + return localTasks.length + } + + public queueTask = async (taskId: TId, input: TInput) => { + const taskKey = this._idToString(taskId) + await this._taskRepo.inTransaction(async (repo) => { + const currentTask = await repo.get(taskId) + if (currentTask && (currentTask.status === 'running' || currentTask.status === 'pending')) { + throw new TaskAlreadyStartedError(taskKey) + } + + const state: TaskState = { + status: 'pending', + cluster: this._clusterId, + progress: this._options.initialProgress, + input, + data: this._options.initialData + } + + return repo.set({ ...state, ...taskId }) + }, 'queueTask') + + // to return asap from queuing + void this.runSchedulerInterrupt() + } + + public abstract cancelTask(taskId: TId): Promise + + protected async runSchedulerInterrupt() { + try { + return this._schedulingTimmer.run() + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + this._logger.attachError(err).error('An error occured when running scheduler interrupt.') + } + } + + private _runSchedulerInterrupt = async () => { + return this._taskRepo.inTransaction(async (repo) => { + await this._queueBackZombies(repo) + + const localTasks = await repo.query({ cluster: this._clusterId, status: 'running' }) + if (localTasks.length >= this._options.maxTasks) { + this._logger.debug( + `[${this._clusterId}/${this._options.queueId}] max allowed of task already launched in queue.` + ) + return + } + + const pendings = await repo.query({ status: 'pending' }) + if (pendings.length <= 0) { + return + } + + const task = pendings[0] + task.status = 'running' + task.cluster = this._clusterId + await repo.set(task) + + // floating promise to return fast from scheduler interrupt and to prevent deadlock + void this._runTask(task) + }, '_runSchedulerInterrupt') + } + + private _runTask = async (task: Task) => { + const taskKey = this._idToString(task) + this._logger.debug(`task "${taskKey}" is about to start.`) + + const updateTask = _.throttle(async () => { + await this._taskRepo.inTransaction((repo) => repo.set(task), 'progressCallback') + }, this._options.progressThrottle) + + try { + const terminatedTask = await this._taskRunner.run(task, async (progress: TaskProgress, data?: TData) => { + task.status = 'running' + task.progress = progress + if (data) { + task.data = data + } + void updateTask() + }) + + updateTask.flush() + + if (terminatedTask) { + await this._taskRepo.inTransaction((repo) => repo.set(terminatedTask), '_task_terminated') + } + } catch (thrown) { + updateTask.flush() + + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + this._logger.attachError(err).error(`Unhandled error when running task "${taskKey}"`) + } finally { + // to return asap + void this.runSchedulerInterrupt() + } + } + + protected _queueBackZombies = async (repo: TaskRepository) => { + const zombieThreshold = moment().subtract(this._options.maxProgressDelay, 'ms').toDate() + const newZombies = await repo.queryOlderThan({ status: 'running' }, zombieThreshold) + if (newZombies.length) { + this._logger.debug(`Queuing back ${newZombies.length} tasks because they seem to be zombies.`) + + const progress = this._options.initialProgress + const newState = { status: 'zombie', cluster: this._clusterId, progress } + await Bluebird.each(newZombies, (z) => repo.set({ ...z, ...newState })) + } + } + + protected _isCancelable = (task: Task) => { + const cancellableStatus: TaskStatus[] = ['running', 'pending', 'zombie'] + return cancellableStatus.includes(task.status) + } +} diff --git a/packages/distributed/src/queues/errors.ts b/packages/distributed/src/queues/errors.ts new file mode 100644 index 00000000..b56699c3 --- /dev/null +++ b/packages/distributed/src/queues/errors.ts @@ -0,0 +1,17 @@ +export class TaskNotFoundError extends Error { + constructor(taskId: string) { + super(`no current task for model: ${taskId}`) + } +} + +export class TaskNotRunning extends Error { + constructor(taskId: string) { + super(`no current running or pending task for model: ${taskId}`) + } +} + +export class TaskAlreadyStartedError extends Error { + constructor(taskId: string) { + super(`Training "${taskId}" already started...`) + } +} diff --git a/packages/distributed/src/queues/index.ts b/packages/distributed/src/queues/index.ts new file mode 100644 index 00000000..44048d91 --- /dev/null +++ b/packages/distributed/src/queues/index.ts @@ -0,0 +1,5 @@ +export * from './typings' + +export { PGDistributedTaskQueue } from './pg-distributed-queue' +export { LocalTaskQueue } from './local-queue' +export { TaskAlreadyStartedError, TaskNotFoundError, TaskNotRunning } from './errors' diff --git a/packages/nlu-server/src/utils/watch-dog.ts b/packages/distributed/src/queues/interrupt.ts similarity index 77% rename from packages/nlu-server/src/utils/watch-dog.ts rename to packages/distributed/src/queues/interrupt.ts index 65eafcb8..f9a8b488 100644 --- a/packages/nlu-server/src/utils/watch-dog.ts +++ b/packages/distributed/src/queues/interrupt.ts @@ -5,19 +5,19 @@ class Interval { this.reset() } - reset(): void { + public reset(): void { this.stop() this._int = setInterval(this.f, this.ms) } - stop(): void { + public stop(): void { this._int && clearInterval(this._int) } } type Func = (...x: X) => Y -export interface WatchDog { +export type InterruptTimer = { run(...x: X): Promise stop: () => void } @@ -30,9 +30,9 @@ export interface WatchDog { * * @param f Function to run * @param ms Max allowed time beetween function invocation - * @returns a watchdog object that can be ran or stopped + * @returns a timer object that can be ran or stopped */ -export const watchDog = (f: Func>, ms: number): WatchDog => { +export const createTimer = (f: Func>, ms: number): InterruptTimer => { const interval = new Interval(f, ms) const run: Func> = (...x: X): Promise => { diff --git a/packages/distributed/src/queues/local-queue.ts b/packages/distributed/src/queues/local-queue.ts new file mode 100644 index 00000000..13a92f54 --- /dev/null +++ b/packages/distributed/src/queues/local-queue.ts @@ -0,0 +1,46 @@ +import { Logger } from '@bpinternal/log4bot' +import _ from 'lodash' +import { InMemoryTransactionLocker } from '../locks' +import { TaskNotFoundError } from '.' +import { BaseTaskQueue } from './base-queue' + +import { TaskNotRunning } from './errors' +import { SafeTaskRepo } from './safe-repo' +import { TaskRunner, TaskRepository, QueueOptions, TaskQueue as ITaskQueue, TaskStatus } from './typings' + +export class LocalTaskQueue + extends BaseTaskQueue + implements ITaskQueue { + constructor( + taskRepo: TaskRepository, + taskRunner: TaskRunner, + logger: Logger, + idToString: (id: TId) => string, + opt: QueueOptions + ) { + const logCb = (msg: string) => logger.sub('trx-queue').debug(msg) + const safeRepo = new SafeTaskRepo(taskRepo, new InMemoryTransactionLocker(logCb)) + super(safeRepo, taskRunner, logger, idToString, opt) + } + + public cancelTask(taskId: TId): Promise { + const taskKey = this._idToString(taskId) + return this._taskRepo.inTransaction(async (repo) => { + await this._queueBackZombies(repo) + + const currentTask = await repo.get(taskId) + if (!currentTask) { + throw new TaskNotFoundError(taskKey) + } + if (!this._isCancelable(currentTask)) { + throw new TaskNotRunning(taskKey) + } + + if (currentTask.status === 'pending' || currentTask.status === 'zombie') { + const newTask = { ...currentTask, status: 'canceled' } + return repo.set(newTask) + } + return this._taskRunner.cancel(currentTask) + }, 'cancelTask') + } +} diff --git a/packages/distributed/src/queues/pg-distributed-queue.ts b/packages/distributed/src/queues/pg-distributed-queue.ts new file mode 100644 index 00000000..19233454 --- /dev/null +++ b/packages/distributed/src/queues/pg-distributed-queue.ts @@ -0,0 +1,124 @@ +import { Logger } from '@bpinternal/log4bot' +import Bluebird from 'bluebird' +import ms from 'ms' +import PGPubSub from 'pg-pubsub' +import { PGTransactionLocker } from '../locks' +import { TaskNotFoundError } from '.' +import { BaseTaskQueue } from './base-queue' +import { TaskNotRunning } from './errors' +import { PGQueueEventObserver } from './pg-event-observer' +import { SafeTaskRepo } from './safe-repo' +import { TaskRunner, TaskRepository, QueueOptions, TaskQueue as ITaskQueue, TaskStatus } from './typings' + +const DISTRIBUTED_CANCEL_TIMEOUT_DELAY = ms('2s') + +export class PGDistributedTaskQueue + extends BaseTaskQueue + implements ITaskQueue { + private _obs: PGQueueEventObserver + + constructor( + pgURL: string, + taskRepo: TaskRepository, + taskRunner: TaskRunner, + logger: Logger, + idToString: (id: TId) => string, + opt: QueueOptions + ) { + super(PGDistributedTaskQueue._makeSafeRepo(pgURL, taskRepo, logger), taskRunner, logger, idToString, opt) + const _pubsub = new PGPubSub(pgURL, { log: () => {} }) + this._obs = new PGQueueEventObserver(_pubsub, opt.queueId) + } + + private static _makeSafeRepo( + pgURL: string, + taskRepo: TaskRepository, + logger: Logger + ) { + const logCb = (msg: string) => logger.sub('trx-queue').debug(msg) + return new SafeTaskRepo(taskRepo, new PGTransactionLocker(pgURL, logCb)) + } + + public async initialize() { + await super.initialize() + await this._obs.initialize() + this._obs.on('run_scheduler_interrupt', super.runSchedulerInterrupt.bind(this)) + this._obs.on('cancel_task', ({ taskId, clusterId }) => this._handleCancelTaskEvent(taskId, clusterId)) + } + + public async cancelTask(taskId: TId) { + const taskKey = this._idToString(taskId) + + return this._taskRepo.inTransaction(async (repo) => { + await this._queueBackZombies(repo) + + const currentTask = await this._taskRepo.get(taskId) + if (!currentTask) { + throw new TaskNotFoundError(taskKey) + } + if (!this._isCancelable(currentTask)) { + throw new TaskNotRunning(taskKey) + } + + if (currentTask.status === 'pending' || currentTask.status === 'zombie') { + const newTask = { ...currentTask, status: 'canceled' } + return repo.set(newTask) + } + + if (currentTask.cluster === this._clusterId) { + return this._taskRunner.cancel(currentTask) + } + + this._logger.debug(`Task "${taskId}" was not launched on this instance`) + await Bluebird.race([ + this._cancelAndWaitForResponse(taskId, currentTask.cluster), + this._timeoutTaskCancelation(DISTRIBUTED_CANCEL_TIMEOUT_DELAY) + ]) + }, 'cancelTask') + } + + private _cancelAndWaitForResponse = (taskId: TId, clusterId: string): Promise => + new Promise(async (resolve, reject) => { + this._obs.onceOrMore('cancel_task_done', async (response) => { + if (this._idToString(response.taskId) !== this._idToString(taskId)) { + return 'stay' // canceled task is not the one we're waiting for + } + + if (response.err) { + const { message, stack } = response.err + const err = new Error(message) + err.stack = stack + reject(err) + return 'leave' + } + + resolve() + return 'leave' + }) + await this._obs.emit('cancel_task', { taskId, clusterId }) + }) + + private _timeoutTaskCancelation = (ms: number): Promise => + new Promise((_resolve, reject) => { + setTimeout(() => reject(new Error(`Canceling operation took more than ${ms} ms`)), ms) + }) + + private _handleCancelTaskEvent = async (taskId: TId, clusterId: string) => { + if (clusterId !== this._clusterId) { + return // message was not adressed to this instance + } + + try { + await this._taskRunner.cancel(taskId) + await this._obs.emit('cancel_task_done', { taskId }) + } catch (thrown) { + const { message, stack } = thrown instanceof Error ? thrown : new Error(`${thrown}`) + await this._obs.emit('cancel_task_done', { taskId, err: { message, stack } }) + } + } + + // for if an completly busy instance receives a queue task http call + protected runSchedulerInterrupt() { + return this._obs.emit('run_scheduler_interrupt', undefined) + } +} diff --git a/packages/distributed/src/queues/pg-event-observer.ts b/packages/distributed/src/queues/pg-event-observer.ts new file mode 100644 index 00000000..6ab2aa15 --- /dev/null +++ b/packages/distributed/src/queues/pg-event-observer.ts @@ -0,0 +1,62 @@ +import Bluebird from 'bluebird' +import { EventEmitter2 } from 'eventemitter2' +import PGPubSub from 'pg-pubsub' + +const CHANNELS = ['cancel_task', 'run_scheduler_interrupt', 'cancel_task_done'] as const +type Channel = typeof CHANNELS[number] + +type CancelTaskError = { + message: string + stack?: string +} + +type PGQueueEventData = C extends 'run_scheduler_interrupt' + ? void + : C extends 'cancel_task' + ? { taskId: TId; clusterId: string } + : C extends 'cancel_task_done' + ? { taskId: TId; err?: CancelTaskError } + : never + +export class PGQueueEventObserver { + constructor(private _pubsub: PGPubSub, private _queueId: string) {} + + private _evEmitter = new EventEmitter2() + + public initialize = async (): Promise => { + await Bluebird.map(CHANNELS, (c: Channel) => + this._pubsub.addChannel(this._pgChannelId(c), (x) => this._evEmitter.emit(c, x)) + ) + } + + public teardown = async (): Promise => { + await Bluebird.map(CHANNELS, (c: Channel) => this._pubsub.removeChannel(this._pgChannelId(c))) + } + + public on(c: C, handler: (data: PGQueueEventData) => Promise): void { + this._evEmitter.on(c, handler) + } + + public off(c: C, handler: (data: PGQueueEventData) => Promise): void { + this._evEmitter.off(c, handler) + } + + public onceOrMore( + c: C, + handler: (data: PGQueueEventData) => Promise<'stay' | 'leave'> + ): void { + const cb = async (x: PGQueueEventData) => { + const y = await handler(x) + if (y === 'leave') { + this._evEmitter.off(c, cb) + } + } + this._evEmitter.on(c, cb) + } + + public async emit(c: C, data: PGQueueEventData): Promise { + return this._pubsub.publish(this._pgChannelId(c), data) + } + + private _pgChannelId = (c: Channel) => `${this._queueId}:${c}` +} diff --git a/packages/distributed/src/queues/safe-repo.ts b/packages/distributed/src/queues/safe-repo.ts new file mode 100644 index 00000000..4bcf656f --- /dev/null +++ b/packages/distributed/src/queues/safe-repo.ts @@ -0,0 +1,37 @@ +import ms from 'ms' +import { TransactionLocker } from '../locks' +import { SafeTaskRepository as ISafeTaskRepository, TaskRepository, TaskTrx } from './typings' + +const TRANSACTION_TIMEOUT_MS = ms('5s') + +export class SafeTaskRepo implements ISafeTaskRepository { + constructor( + private _taskRepo: TaskRepository, + private _trxLock: TransactionLocker + ) {} + + public initialize = this._trxLock.initialize.bind(this._trxLock) + public teardown = this._trxLock.teardown.bind(this._trxLock) + public get = this._taskRepo.get.bind(this._taskRepo) + public has = this._taskRepo.has.bind(this._taskRepo) + public query = this._taskRepo.query.bind(this._taskRepo) + public queryOlderThan = this._taskRepo.queryOlderThan.bind(this._taskRepo) + + public inTransaction(trx: TaskTrx, name: string): Promise { + const cb = async () => { + const operation = () => trx(this._taskRepo) + return Promise.race([operation(), this._timeout(TRANSACTION_TIMEOUT_MS)]) + } + + return this._trxLock.runInLock({ + name, + cb + }) + } + + private _timeout = (ms: number) => { + return new Promise((_, reject) => { + setTimeout(() => reject(new Error("Transaction exceeded it's time limit")), ms) + }) + } +} diff --git a/packages/distributed/src/queues/typings.ts b/packages/distributed/src/queues/typings.ts new file mode 100644 index 00000000..915c82e1 --- /dev/null +++ b/packages/distributed/src/queues/typings.ts @@ -0,0 +1,72 @@ +export type TaskTrx = (repo: TaskRepository) => Promise + +export type TaskHandler = (task: Task) => Promise +export type ProgressCb<_TId, _TInput, TData, _TError> = (progress: TaskProgress, data?: TData) => void + +export type TaskRunner = { + run: ( + task: Task, + progress: ProgressCb + ) => Promise | undefined> + cancel: (taskId: TId) => Promise +} + +export type TaskTerminatedStatus = 'done' | 'canceled' | 'errored' +export type TaskStatus = TaskTerminatedStatus | 'pending' | 'running' | 'zombie' +export type TaskState<_TId, TInput, TData, TError> = { + status: TaskStatus + cluster: string + progress: TaskProgress + input: TInput + data: TData + error?: TError +} + +export type Task = TId & TaskState + +type Override = Omit & K +export type TerminatedTask = TId & + Override, { status: TaskTerminatedStatus }> + +export type ReadonlyTaskRepository = { + get: (id: TId) => Promise | undefined> + has: (id: TId) => Promise + query: (query: Partial>) => Promise[]> + queryOlderThan: ( + query: Partial>, + threshold: Date + ) => Promise[]> +} + +export type TaskRepository = ReadonlyTaskRepository & { + set: (task: Task) => Promise +} + +export type SafeTaskRepository = ReadonlyTaskRepository & { + initialize: () => Promise + teardown: () => Promise + inTransaction: (trx: TaskTrx, name: string) => Promise +} + +export type TaskProgress = { + start: number + end: number + current: number +} + +export type QueueOptions<_TId, _TInput, TData, _TError> = { + queueId: string + maxTasks: number + initialProgress: TaskProgress + initialData: TData + maxProgressDelay: number + progressThrottle: number +} + +export type TaskQueue = { + initialize(): Promise + teardown(): Promise + getLocalTaskCount(): Promise + queueTask(id: TId, input: TInput): Promise + cancelTask(id: TId): Promise +} diff --git a/packages/locks/tsconfig.json b/packages/distributed/tsconfig.json similarity index 100% rename from packages/locks/tsconfig.json rename to packages/distributed/tsconfig.json diff --git a/packages/e2e/src/tests/bpds-intents.ts b/packages/e2e/src/tests/bpds-intents.ts deleted file mode 100644 index fe650931..00000000 --- a/packages/e2e/src/tests/bpds-intents.ts +++ /dev/null @@ -1,83 +0,0 @@ -const problemMaker = (bitfan) => async (name, lang, trainSet, testSet) => { - const fileDef = { - lang, - fileType: 'dataset', - type: 'intent', - namespace: 'bpds' - } - - const trainFileDef = { name: trainSet, ...fileDef } - const testFileDef = { name: testSet, ...fileDef } - - return { - name, - type: 'intent', - trainSet: await bitfan.datasets.readDataset(trainFileDef), - testSet: await bitfan.datasets.readDataset(testFileDef), - lang - } -} - -export default function (bitfan) { - const metrics = [ - bitfan.metrics.accuracy, - bitfan.metrics.oosAccuracy, - bitfan.metrics.oosPrecision, - bitfan.metrics.oosRecall, - bitfan.metrics.oosF1 - ] - - return { - name: 'bpds-intent', - - computePerformance: async () => { - const makeProblem = problemMaker(bitfan) - let problems = [ - await makeProblem('bpsd A-en', 'en', 'A-train', 'A-test'), - await makeProblem('bpds A imbalanced-en', 'en', 'A-imbalanced-train', 'A-test'), - await makeProblem('bpds A fewshot-en', 'en', 'A-fewshot-train', 'A-test'), - await makeProblem('bpds B', 'en', 'B-train', 'B-test'), - await makeProblem('bpsd A-fr', 'fr', 'A-train', 'A-test'), - await makeProblem('bpds A imbalanced-fr', 'fr', 'A-imbalanced-train', 'A-test'), - await makeProblem('bpds A fewshot-fr', 'fr', 'A-fewshot-train', 'A-test') - ] - - const usedLang = process.env.BITFAN_LANG - if (usedLang) { - problems = problems.filter((p) => p.lang === usedLang) - } - - const nluServerEndpoint = process.env.NLU_SERVER_ENDPOINT ?? 'http://localhost:3200' - const password = '123456' - const engine = bitfan.engines.makeBpIntentEngine(nluServerEndpoint, password) - - const solution = { - name: 'bpds intent', - problems, - engine - } - - const seeds = [42, 69, 666] - const results = await bitfan.runSolution(solution, seeds) - - const performanceReport = bitfan.evaluateMetrics(results, metrics) - - await bitfan.visualisation.showPerformanceReport(performanceReport, { groupBy: 'seed' }) - await bitfan.visualisation.showPerformanceReport(performanceReport, { groupBy: 'problem' }) - await bitfan.visualisation.showOOSConfusion(results) - - return performanceReport - }, - - evaluatePerformance: (currentPerformance, previousPerformance) => { - const toleranceByMetric = { - [bitfan.metrics.accuracy.name]: 0.075, - [bitfan.metrics.oosAccuracy.name]: 0.075, - [bitfan.metrics.oosPrecision.name]: 0.075, - [bitfan.metrics.oosRecall.name]: 0.075, - [bitfan.metrics.oosF1.name]: 0.15 // more tolerance for f1 score - } - return bitfan.comparePerformances(currentPerformance, previousPerformance, { toleranceByMetric }) - } - } -} diff --git a/packages/e2e/src/tests/clinc-intents.ts b/packages/e2e/src/tests/clinc-intents.ts deleted file mode 100644 index 55f09a9f..00000000 --- a/packages/e2e/src/tests/clinc-intents.ts +++ /dev/null @@ -1,68 +0,0 @@ -const problemMaker = (bitfan) => async (name, trainSet, testSet) => { - const fileDef = { - lang: 'en', - fileType: 'dataset', - type: 'intent', - namespace: '' - } - const trainFileDef = { name: trainSet, ...fileDef } - const testFileDef = { name: testSet, ...fileDef } - - return { - name, - type: 'intent', - trainSet: await bitfan.datasets.readDataset(trainFileDef), - testSet: await bitfan.datasets.readDataset(testFileDef), - lang: 'en' - } -} - -export default function (bitfan) { - const metrics = [ - bitfan.metrics.accuracy, - bitfan.metrics.oosAccuracy, - bitfan.metrics.oosPrecision, - bitfan.metrics.oosRecall, - bitfan.metrics.oosF1 - ] - - return { - name: 'clinc150', - - computePerformance: async () => { - const nluServerEndpoint = process.env.NLU_SERVER_ENDPOINT ?? 'http://localhost:3200' - const password = '123456' - const engine = bitfan.engines.makeBpIntentEngine(nluServerEndpoint, password) - - const makeProblem = problemMaker(bitfan) - - const results = await bitfan.runSolution( - { - name: 'bpds intent', - problems: [ - await makeProblem('clinc150, 20 utt/intent, seed 42', 'clinc150_20_42-train', 'clinc150_100-test') - ], - engine - }, - [42] - ) - - const performanceReport = bitfan.evaluateMetrics(results, metrics) - await bitfan.visualisation.showPerformanceReport(performanceReport, { groupBy: 'problem' }) - await bitfan.visualisation.showOOSConfusion(results) - - return performanceReport - }, - - evaluatePerformance: (currentPerformance, previousPerformance) => { - const toleranceByMetric = { - [bitfan.metrics.accuracy.name]: 0.05, - [bitfan.metrics.oosAccuracy.name]: 0.05, - [bitfan.metrics.oosPrecision.name]: 0.1, - [bitfan.metrics.oosRecall.name]: 0.1, - [bitfan.metrics.oosF1.name]: 0.15 // more tolerance for f1 score - } - return bitfan.comparePerformances(currentPerformance, previousPerformance, { toleranceByMetric }) - } - } -} diff --git a/packages/lang-client/package.json b/packages/lang-client/package.json new file mode 100644 index 00000000..cfff6a46 --- /dev/null +++ b/packages/lang-client/package.json @@ -0,0 +1,27 @@ +{ + "name": "@botpress/lang-client", + "version": "1.2.0", + "description": "Client and typings for Language Server's API", + "author": "Botpress, Inc.", + "license": "AGPL-3.0", + "scripts": { + "build": "tsc --build", + "test": "jest --roots ./dist", + "clean": "rimraf ./dist && rimraf ./node_modules" + }, + "dependencies": { + "axios": "^0.21.1", + "lodash": "^4.17.19", + "joi": "^17.2.2" + }, + "devDependencies": { + "@types/lodash": "^4.14.116", + "@types/joi": "^17.2.3", + "@types/node": "^16.11.10", + "@types/jest": "^24.9.0", + "jest": "^24.9.0", + "typescript": "^5.0.4" + }, + "types": "./src/typings.d.ts", + "main": "./dist/index.js" +} diff --git a/packages/lang-client/readme.md b/packages/lang-client/readme.md new file mode 100644 index 00000000..43fb482c --- /dev/null +++ b/packages/lang-client/readme.md @@ -0,0 +1,3 @@ +# Botpress Language Client + +NodeJS SDK for the Botpress Language Server written in TypeScript. diff --git a/packages/lang-client/src/client.ts b/packages/lang-client/src/client.ts new file mode 100644 index 00000000..ca03ff7a --- /dev/null +++ b/packages/lang-client/src/client.ts @@ -0,0 +1,120 @@ +import axios, { AxiosInstance, AxiosRequestConfig, AxiosResponse } from 'axios' + +import _ from 'lodash' +import { + Client as IClient, + LangClientConfig as ClientConfig, + InfoResponseBody, + TokenizeResponseBody, + VectorizeResponseBody, + LanguagesResponseBody, + DownloadLangResponseBody, + ErrorResponse, + SuccessReponse, + TokenizeRequestBody, + VectorizeRequestBody +} from './typings' + +import { validateResponse, HTTPCall, ClientResponseError, HTTPVerb } from './validation' + +const DEFAULT_CONFIG: AxiosRequestConfig = { + validateStatus: () => true +} + +export class LangClient implements IClient { + protected _axios: AxiosInstance + + constructor(config: ClientConfig) { + this._axios = axios.create({ ...DEFAULT_CONFIG, ...config }) + } + + public get axios() { + return this._axios + } + + public async getInfo(): Promise { + const ressource = 'info' + const call: HTTPCall<'GET'> = { verb: 'GET', ressource } + const res = await this._get(call) + return validateResponse(call, res) + } + + public async tokenize(utterances: string[], lang: string): Promise { + const ressource = `tokenize/${lang}` + const body: TokenizeRequestBody = { utterances } + const call: HTTPCall<'POST'> = { verb: 'POST', ressource } + const res = await this._post(call, body) + return validateResponse(call, res) + } + + public async vectorize(tokens: string[], lang: string): Promise { + const ressource = `vectorize/${lang}` + const body: VectorizeRequestBody = { tokens } + const call: HTTPCall<'POST'> = { verb: 'POST', ressource } + const res = await this._post(call, body) + return validateResponse(call, res) + } + + public async getLanguages(): Promise { + const ressource = 'languages' + const call: HTTPCall<'GET'> = { verb: 'GET', ressource } + const res = await this._get(call) + return validateResponse(call, res) + } + + public async startDownload(lang: string): Promise { + const ressource = `languages/${lang}` + const call: HTTPCall<'POST'> = { verb: 'POST', ressource } + const res = await this._post(call) + return validateResponse(call, res) + } + + public async deleteLang(lang: string): Promise { + const ressource = `languages/${lang}/delete` + const call: HTTPCall<'POST'> = { verb: 'POST', ressource } + const res = await this._post(call) + return validateResponse(call, res) + } + + public async loadLang(lang: string): Promise { + const ressource = `languages/${lang}/load` + const call: HTTPCall<'POST'> = { verb: 'POST', ressource } + const res = await this._post(call) + return validateResponse(call, res) + } + + public async cancelDownload(downloadId: string): Promise { + const ressource = `languages/cancel/${downloadId}` + const call: HTTPCall<'POST'> = { verb: 'POST', ressource } + const res = await this._post(call) + return validateResponse(call, res) + } + + private _post = async (call: HTTPCall<'POST'>, body?: any): Promise> => { + try { + const { ressource } = call + const res = await this._axios.post(ressource, body) + return res + } catch (err) { + // axios validate status does not prevent all exceptions + throw this._mapErr(call, err) + } + } + + private _get = async (call: HTTPCall<'GET'>): Promise> => { + try { + const { ressource } = call + const res = await this._axios.get(ressource) + return res + } catch (err) { + // axios validate status does not prevent all exceptions + throw this._mapErr(call, err) + } + } + + private _mapErr = (call: HTTPCall, thrown: any): ClientResponseError => { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + const httpStatus = -1 + return new ClientResponseError(call, httpStatus, err.message) + } +} diff --git a/packages/lang-client/src/index.ts b/packages/lang-client/src/index.ts new file mode 100644 index 00000000..d677daaf --- /dev/null +++ b/packages/lang-client/src/index.ts @@ -0,0 +1 @@ +export { LangClient as Client } from './client' diff --git a/packages/lang-client/src/typings.d.ts b/packages/lang-client/src/typings.d.ts new file mode 100644 index 00000000..9c8c1c3b --- /dev/null +++ b/packages/lang-client/src/typings.d.ts @@ -0,0 +1,124 @@ +import { AxiosRequestConfig, AxiosInstance } from 'axios' + +/** + * ################### + * ### Basic Types ### + * ################### + */ +export type LanguageInfo = { + version: string + ready: boolean + dimentions: number + domain: string + readOnly: boolean +} + +export type TokenizeResult = { + utterances: string[] + language: string + tokens: string[][] +} + +export type VectorizeResult = { + language: string + vectors: number[][] +} + +export type AvailableModel = { + code: string + name: string + flag: string +} + +export type InstalledModel = { + code: string + name: string + loaded: boolean +} + +export type DownloadStatusType = 'pending' | 'downloading' | 'loading' | 'errored' | 'done' +export type DownloadStatus = { + status: DownloadStatusType + message: string +} + +export type DownloadProgress = { + status: DownloadStatus + downloadId: string + size: number +} + +export type DownloadingModel = { + lang: string + progress: DownloadProgress +} + +export type LanguageState = { + available: AvailableModel[] + installed: InstalledModel[] + downloading: DownloadingModel[] +} + +export type DownloadStartResult = { + downloadId: string +} + +/** + * ########################## + * ### HTTP Communication ### + * ########################## + */ +export type ErrorType = 'bad_request' | 'not_ready' | 'unauthorized' | 'offline' | 'internal' +export type LangError = { + message: string + stack?: string + type: ErrorType + code: number +} + +export type ErrorResponse = { + success: false + error: LangError +} + +export type SuccessReponse = { + success: true +} + +export type InfoResponseBody = {} & SuccessReponse & LanguageInfo +export type TokenizeResponseBody = {} & SuccessReponse & TokenizeResult +export type VectorizeResponseBody = {} & SuccessReponse & VectorizeResult +export type LanguagesResponseBody = {} & SuccessReponse & LanguageState +export type DownloadLangResponseBody = {} & SuccessReponse & DownloadStartResult + +export type TokenizeRequestBody = { + utterances: string[] +} + +export type VectorizeRequestBody = { + tokens: string[] +} + +/** + * #################### + * ### Client Class ### + * #################### + */ +export type LangClientConfig = AxiosRequestConfig & { + authToken?: string +} + +export class Client { + readonly axios: AxiosInstance + + constructor(config: LangClientConfig) + + public getInfo(): Promise + public tokenize(utterances: string[], language: string): Promise + public vectorize(tokens: string[], language: string): Promise + public getLanguages(): Promise + public startDownload(lang: string): Promise + public deleteLang(lang: string): Promise + public loadLang(lang: string): Promise + public cancelDownload(downloadId: string): Promise +} diff --git a/packages/lang-client/src/validation.test.ts b/packages/lang-client/src/validation.test.ts new file mode 100644 index 00000000..c8f616f3 --- /dev/null +++ b/packages/lang-client/src/validation.test.ts @@ -0,0 +1,93 @@ +import { AxiosResponse } from 'axios' +import { SuccessReponse, ErrorResponse, LangError } from './typings' +import { HTTPCall, validateResponse } from './validation' + +const augmentWithExtraKey = (res: Object) => { + return [ + { ...res, someExtraKey: undefined }, + { ...res, someExtraKey: null }, + { ...res, someExtraKey: '' }, + { ...res, someExtraKey: 'a value' }, + { ...res, someExtraKey: 69 }, + { ...res, someExtraKey: { key1: 69, key2: '42' } }, + { ...res, someExtraKey: [{ key1: 69, key2: '42' }, 666] } + ] +} + +const error: LangError = { code: 500, type: 'internal', message: 'An error' } +const call: HTTPCall<'GET'> = { verb: 'GET', ressource: 'info' } + +const axiosRes = (data: any): AxiosResponse => { + const x: Partial> = { data, status: 200 } + return x as AxiosResponse +} + +test('validating with absent success key should fail', async () => { + // arrange && act && assert + expect(() => validateResponse(call, axiosRes({}))).toThrow() + expect(() => validateResponse(call, axiosRes({ someKey: 'some text' }))).toThrow() +}) + +test('validating a successfull response should pass', async () => { + // arrange + const res: SuccessReponse = { success: true } + + // act && assert + expect(() => validateResponse(call, axiosRes(res))).not.toThrow() +}) + +test('validating an unsuccessfull response with unempty error should pass', async () => { + // arrange + const res: ErrorResponse = { success: false, error } + + // act && assert + expect(() => validateResponse(call, axiosRes(res))).not.toThrow() +}) + +test('validating an unsuccessfull response with empty error message should pass', async () => { + const error: LangError = { message: '', code: 500, type: 'internal' } + + // arrange + const res: ErrorResponse = { success: false, error } + + // act && assert + expect(() => validateResponse(call, axiosRes(res))).not.toThrow() +}) + +test('validating an unsuccessfull response with empty error should fail', async () => { + // arrange + const res: ErrorResponse = { success: false, error: {} as LangError } + + // act && assert + expect(() => validateResponse(call, axiosRes(res))).toThrow() +}) + +test('validating an unsuccessfull response with undefined error should fail', async () => { + // arrange + const res: Partial = { success: false } + + // act && assert + expect(() => validateResponse(call, axiosRes(res))).toThrow() +}) + +test('validating a successfull response with unknown keys should pass', async () => { + // arrange + const res = { success: true } + + // act && assert + const responses = augmentWithExtraKey(res) + responses.forEach((r) => { + expect(() => validateResponse(call, axiosRes(r))).not.toThrow() + }) +}) + +test('validating an unsuccessfull response with unknown keys should pass', async () => { + // arrange + const res = { success: false, error } + + // act && assert + const responses = augmentWithExtraKey(res) + responses.forEach((r) => { + expect(() => validateResponse(call, axiosRes(r))).not.toThrow() + }) +}) diff --git a/packages/lang-client/src/validation.ts b/packages/lang-client/src/validation.ts new file mode 100644 index 00000000..f409017a --- /dev/null +++ b/packages/lang-client/src/validation.ts @@ -0,0 +1,69 @@ +import { AxiosResponse } from 'axios' +import Joi from 'joi' +import _ from 'lodash' +import { SuccessReponse, ErrorResponse } from './typings' + +const ERROR_RESPONSE_SCHEMA = Joi.object().keys({ + message: Joi.string().required().allow(''), + stack: Joi.string().optional().allow(''), + code: Joi.number().required(), + type: Joi.string().required() +}) + +export type HTTPVerb = 'GET' | 'POST' | 'PUT' | 'DELETE' +export type HTTPCall = { + verb: V + ressource: string +} + +export class ClientResponseError extends Error { + constructor(call: HTTPCall, status: number, message: string) { + const { verb, ressource } = call + const ressourcePath = `lang-server/${ressource}` + const prefix = status >= 300 ? `${verb} ${ressourcePath} -> ${status}` : `${verb} ${ressourcePath}` + super(`(${prefix}) ${message}`) + } +} + +export const validateResponse = ( + call: HTTPCall, + res: AxiosResponse +): S | ErrorResponse => { + const { status, data } = res + + if (_.isNil(data)) { + throw new ClientResponseError(call, status, 'Received empty HTTP response.') + } + + if (typeof data !== 'object') { + const responseType = typeof data + throw new ClientResponseError( + call, + status, + `Received ${responseType} HTTP response. Expected response to be an object.` + ) + } + + if (data.success === true) { + return data + } + + if (data.success === false) { + const { error } = data + if (_.isNil(error) || typeof error !== 'object') { + throw new ClientResponseError( + call, + status, + 'Received unsuccessfull HTTP response with no error. Expected response.error to be an object.' + ) + } + Joi.assert(error, ERROR_RESPONSE_SCHEMA) + return data + } + + throw new ClientResponseError( + call, + status, + 'Received HTTP response body has no attribute "success". Expected response.success to be a boolean.' + ) +} diff --git a/packages/lang-client/tsconfig.json b/packages/lang-client/tsconfig.json new file mode 100644 index 00000000..171631ed --- /dev/null +++ b/packages/lang-client/tsconfig.json @@ -0,0 +1,12 @@ +{ + "extends": "../../tsconfig.packages.json", + "references": [], + "compilerOptions": { + "outDir": "./dist" /* Redirect output structure to the directory. */, + "rootDir": "./src" /* Specify the root directory of input files. Use to control the output directory structure with --outDir. */, + "forceConsistentCasingInFileNames": true /* Disallow inconsistently-cased references to the same file. */, + "baseUrl": ".", + "composite": true, + "typeRoots": ["./node_modules/@types", "../../node_modules/@types"] + } +} diff --git a/packages/lang-server/package.json b/packages/lang-server/package.json index 86061a24..9765b61e 100644 --- a/packages/lang-server/package.json +++ b/packages/lang-server/package.json @@ -1,6 +1,6 @@ { "name": "@botpress/lang-server", - "version": "1.1.0", + "version": "1.2.0", "description": "Botpress Language Server", "main": "./dist/index.js", "author": "Botpress, Inc.", @@ -8,8 +8,16 @@ "types": "./src/typings.d.ts", "bin": "./dist/index.js", "dependencies": { + "@botpress/lang-client": "*", "@botpress/nlu-engine": "*", - "@botpress/logger": "*", + "@botpress/telemetry": "*", + "@bpinternal/log4bot": "^0.0.4", + "@bpinternal/trail": "^0.1.0", + "@opentelemetry/api": "1.1.0", + "@opentelemetry/instrumentation-express": "0.27.0", + "@opentelemetry/instrumentation-http": "0.27.0", + "@opentelemetry/instrumentation-knex": "0.27.0", + "@opentelemetry/instrumentation-pg": "0.27.0", "axios": "^0.21.1", "bluebird": "^3.7.2", "body-parser": "^1.18.3", @@ -19,17 +27,19 @@ "cors": "^2.8.5", "eventemitter2": "^5.0.1", "express": "^4.16.4", - "express-rate-limit": "^3.5.1", + "express-rate-limit": "^5.5.0", "fs-extra": "^9.1.0", "glob": "^7.1.6", "joi": "^13.6.0", "lodash": "^4.17.19", "ms": "^2.1.1", "on-headers": "^1.0.2", - "yn": "^2.0.0" + "yn": "^4.0.0" }, "devDependencies": { + "@types/cors": "^2.8.12", "@types/express": "^4.16.0", + "@types/express-rate-limit": "^5.1.3", "@types/fs-extra": "^5.0.4", "@types/jest": "^24.9.0", "@types/joi": "^13.4.5", @@ -37,7 +47,8 @@ "@types/lru-cache": "^5.1.0", "@types/mkdirp": "^1.0.1", "@types/ms": "^0.7.30", - "@types/node": "^12.13.0", + "@types/node": "^16.11.10", + "@types/on-headers": "^1.0.0", "@typescript-eslint/eslint-plugin": "^4.22.0", "@typescript-eslint/parser": "^4.22.0", "cross-env": "^5.2.0", @@ -47,12 +58,11 @@ "eslint-plugin-jsdoc": "^32.3.2", "jest": "^24.9.0", "prettier": "^2.2.1", - "ts-jest": "^26.5.5", - "typescript": "^3.9.10" + "typescript": "^5.0.4" }, "scripts": { "build": "tsc --build", - "start": "cross-env node ./dist/index.js", - "test": "echo \"no tests\"" + "start": "node -r @bpinternal/trail/init ./dist/index.js", + "clean": "rimraf ./dist && rimraf ./node_modules" } } diff --git a/packages/lang-server/src/api.ts b/packages/lang-server/src/api.ts deleted file mode 100644 index e95e9db2..00000000 --- a/packages/lang-server/src/api.ts +++ /dev/null @@ -1,224 +0,0 @@ -import { Logger } from '@botpress/logger' -import { LanguageService } from '@botpress/nlu-engine' -import Bluebird from 'bluebird' -import bodyParser from 'body-parser' -import cors from 'cors' -import express, { Application } from 'express' -import rateLimit from 'express-rate-limit' -import { createServer } from 'http' -import _ from 'lodash' -import ms from 'ms' -import yn from 'yn' - -import { getLanguageByCode } from './languages' -import { monitoringMiddleware, startMonitoring } from './monitoring' -import DownloadManager from './service/download-manager' -import { assertValidLanguage, serviceLoadingMiddleware } from './util' -import { authMiddleware, handleErrorLogging, handleUnexpectedError, isAdminToken, RequestWithLang } from './utils/http' -import { BadRequestError } from './utils/http/errors' - -export interface APIOptions { - version: string - host: string - port: number - authToken?: string - limitWindow: string - limit: number - adminToken: string -} - -const OFFLINE_ERR_MSG = 'The server is running in offline mode. This function is disabled.' - -const cachePolicy = { 'Cache-Control': `max-age=${ms('1d')}` } - -const createExpressApp = (options: APIOptions, baseLogger: Logger): Application => { - const app = express() - const requestLogger = baseLogger.sub('api').sub('request') - - // This must be first, otherwise the /info endpoint can't be called when token is used - app.use(cors()) - - app.use(bodyParser.json({ limit: '250kb' })) - - app.use((req, res, next) => { - res.header('X-Powered-By', 'Botpress') - requestLogger.debug(`incoming ${req.method} ${req.path}`, { ip: req.ip }) - next() - }) - - app.use(monitoringMiddleware) - app.use(handleUnexpectedError) - - if (process.env.REVERSE_PROXY) { - const boolVal = yn(process.env.REVERSE_PROXY) - app.set('trust proxy', boolVal === null ? process.env.REVERSE_PROXY : boolVal) - } - - if (options.limit > 0) { - app.use( - rateLimit({ - windowMs: ms(options.limitWindow), - max: options.limit, - message: 'Too many requests, please slow down' - }) - ) - } - - if (options.authToken && options.authToken.length) { - // Both tokens can be used to query the language server - app.use(authMiddleware(options.authToken, baseLogger, options.adminToken)) - } - - return app -} - -export default async function ( - options: APIOptions, - baseLogger: Logger, - languageService: LanguageService, - downloadManager?: DownloadManager -) { - const app = createExpressApp(options, baseLogger) - const logger = baseLogger.sub('lang').sub('api') - - const waitForServiceMw = serviceLoadingMiddleware(languageService) - const validateLanguageMw = assertValidLanguage(languageService) - const adminTokenMw = authMiddleware(options.adminToken, baseLogger) - - app.get('/info', (req, res) => { - res.send({ - version: options.version, - ready: languageService.isReady, - dimentions: languageService.dim, - domain: languageService.domain, - readOnly: !isAdminToken(req, options.adminToken), - languages: languageService.getModels().filter((x) => x.loaded) // TODO remove this from info and make clients use /languages route - }) - }) - - app.post('/tokenize', waitForServiceMw, validateLanguageMw, async (req: RequestWithLang, res, next) => { - try { - const utterances = req.body.utterances - const language = req.language! - - if (!utterances || !_.isArray(utterances) || !utterances.length) { - // For backward compatibility with Botpress 12.0.0 - 12.0.2 - const singleInput = req.body.input - if (!singleInput || !_.isString(singleInput)) { - throw new BadRequestError('Param `utterances` is mandatory (must be an array of string)') - } - const tokens = await languageService.tokenize([singleInput], language) - res.set(cachePolicy).json({ input: singleInput, language, tokens: tokens[0] }) - } else { - const tokens = await languageService.tokenize(utterances, language) - res.set(cachePolicy).json({ utterances, language, tokens }) - } - } catch (err) { - next(err) - } - }) - - app.post('/vectorize', waitForServiceMw, validateLanguageMw, async (req: RequestWithLang, res, next) => { - try { - const tokens = req.body.tokens - const lang = req.language! - - if (!tokens || !tokens.length || !_.isArray(tokens)) { - throw new BadRequestError('Param `tokens` is mandatory (must be an array of strings)') - } - - const result = await languageService.vectorize(tokens, lang) - res.set(cachePolicy).json({ language: lang, vectors: result }) - } catch (err) { - next(err) - } - }) - - const router = express.Router({ mergeParams: true }) - - router.get('/', (req, res) => { - if (!downloadManager) { - const localLanguages = languageService.getModels().map((m) => { - const { name } = getLanguageByCode(m.lang) - return { ...m, code: m.lang, name } - }) - - return res.send({ - available: localLanguages, - installed: localLanguages, - downloading: [] - }) - } - - const downloading = downloadManager.inProgress.map((x) => ({ - lang: x.lang, - progress: { - status: x.getStatus(), - downloadId: x.id, - size: x.totalDownloadSizeProgress - } - })) - - res.send({ - available: downloadManager.downloadableLanguages, - installed: languageService.getModels(), - downloading - }) - }) - - router.post('/:lang', adminTokenMw, async (req, res) => { - const { lang } = req.params - if (!downloadManager) { - return res.status(404).send({ success: false, error: OFFLINE_ERR_MSG }) - } - - try { - const downloadId = await downloadManager.download(lang) - res.json({ success: true, downloadId }) - } catch (err) { - res.status(404).send({ success: false, error: err.message }) - } - }) - - router.post('/:lang/delete', adminTokenMw, validateLanguageMw, async (req: RequestWithLang, res, next) => { - languageService.remove(req.language!) - res.sendStatus(200) - }) - - router.post('/:lang/load', adminTokenMw, validateLanguageMw, async (req: RequestWithLang, res, next) => { - try { - await languageService.loadModel(req.language!) - res.sendStatus(200) - } catch (err) { - res.status(500).send({ success: false, message: err.message }) - } - }) - - router.post('/cancel/:id', adminTokenMw, (req, res) => { - const { id } = req.params - if (!downloadManager) { - return res.send({ success: false, error: OFFLINE_ERR_MSG }) - } - - downloadManager.cancelAndRemove(id) - res.send({ success: true }) - }) - - app.use('/languages', waitForServiceMw, router) - app.use(handleErrorLogging) - - const httpServer = createServer(app) - - await Bluebird.fromCallback((callback) => { - const hostname = options.host === 'localhost' ? undefined : options.host - httpServer.listen(options.port, hostname, undefined, () => { - callback(null) - }) - }) - - logger.info(`Language Server is ready at http://${options.host}:${options.port}/`) - - if (process.env.MONITORING_INTERVAL) { - startMonitoring(baseLogger) - } -} diff --git a/packages/lang-server/src/api/errors.ts b/packages/lang-server/src/api/errors.ts new file mode 100644 index 00000000..74f38901 --- /dev/null +++ b/packages/lang-server/src/api/errors.ts @@ -0,0 +1,40 @@ +const escapeHtmlSimple = (str: string) => { + return str + .replace(//g, '>') + .replace(/\//g, '/') + .replace(/\\/g, '\') + .replace(/`/g, '`') +} + +export abstract class ResponseError extends Error { + public skipLogging = true + constructor(public message: string, public statusCode: number) { + super(escapeHtmlSimple(message)) + Error.captureStackTrace(this, this.constructor) + } +} + +export class BadRequestError extends ResponseError { + constructor(message: string) { + super(`Bad Request: ${message}`, 400) + } +} + +export class NotReadyError extends ResponseError { + constructor(service: string) { + super(`Service Not Ready: ${service}`, 400) + } +} + +export class UnauthorizedError extends ResponseError { + constructor(message: string) { + super(`Unauthorized: ${message}`, 401) + } +} + +export class OfflineError extends ResponseError { + constructor() { + super('The server is running in offline mode. This function is disabled.', 404) + } +} diff --git a/packages/lang-server/src/api/index.ts b/packages/lang-server/src/api/index.ts new file mode 100644 index 00000000..19d88515 --- /dev/null +++ b/packages/lang-server/src/api/index.ts @@ -0,0 +1,239 @@ +import { + InfoResponseBody, + TokenizeResponseBody, + VectorizeResponseBody, + LanguagesResponseBody, + DownloadLangResponseBody, + SuccessReponse +} from '@botpress/lang-client' +import { prometheus } from '@botpress/telemetry' +import { Logger } from '@bpinternal/log4bot' +import { isEnabled } from '@bpinternal/trail' +import { context, trace } from '@opentelemetry/api' +import * as Sentry from '@sentry/node' +import Bluebird from 'bluebird' +import bodyParser from 'body-parser' +import cors from 'cors' +import express, { Application } from 'express' +import rateLimit from 'express-rate-limit' +import { createServer } from 'http' +import _ from 'lodash' +import ms from 'ms' + +import { LangApplication } from '../application' + +import { monitoringMiddleware, startMonitoring } from './monitoring' +import { authMiddleware } from './mw-authentification' +import { handleUnexpectedError } from './mw-handle-error' +import { serviceLoadingMiddleware } from './mw-service-loading' +import { validateTokenizeRequestBody, validateVectorizeRequestBody } from './validation/body' +import { extractPathLanguageMiddleware, RequestWithLang } from './validation/lang-path' + +export type APIOptions = { + version: string + host: string + port: number + prometheusEnabled: boolean + apmEnabled: boolean + authToken?: string + limitWindow: string + limit: number + adminToken: string + reverseProxy?: string +} + +const cachePolicy = { 'Cache-Control': `max-age=${ms('1d')}` } + +const createExpressApp = async (options: APIOptions, baseLogger: Logger): Promise => { + const app = express() + const requestLogger = baseLogger.sub('api').sub('request') + + // This must be first, otherwise the /info endpoint can't be called when token is used + app.use(cors()) + + if (options.prometheusEnabled) { + await prometheus.init(app) + } + + app.use(bodyParser.json({ limit: '250kb' })) + + if (options.apmEnabled) { + Sentry.init() + app.use(Sentry.Handlers.requestHandler()) + } + + app.use((req, res, next) => { + res.header('X-Powered-By', 'Botpress') + + const metadata: { ip: string; traceId?: string } = { ip: req.ip } + + if (isEnabled()) { + const spanContext = trace.getSpanContext(context.active()) + + if (spanContext?.traceId) { + metadata.traceId = spanContext?.traceId + } + } + + requestLogger.debug(`incoming ${req.method} ${req.path}`, metadata) + next() + }) + + app.use(monitoringMiddleware) + + if (options.reverseProxy) { + app.set('trust proxy', options.reverseProxy) + } + + if (options.limit > 0) { + app.use( + rateLimit({ + windowMs: ms(options.limitWindow), + max: options.limit, + message: 'Too many requests, please slow down' + }) + ) + } + + if (options.authToken && options.authToken.length) { + // Both tokens can be used to query the language server + app.use(authMiddleware(options.authToken, baseLogger, options.adminToken)) + } + + return app +} + +export default async function (options: APIOptions, baseLogger: Logger, application: LangApplication) { + const app = await createExpressApp(options, baseLogger) + const logger = baseLogger.sub('lang').sub('api') + + const waitForServiceMw = serviceLoadingMiddleware(application.languageService) + const validateLanguageMw = extractPathLanguageMiddleware(application.languageService) + const adminTokenMw = authMiddleware(options.adminToken, baseLogger) + const handleErr = handleUnexpectedError(logger) + + app.get('/info', (req, res, next) => { + try { + const info = application.getInfo(req.headers.authorization) + const response: InfoResponseBody = { + success: true, + ...info + } + return res.json(response) + } catch (err) { + return next(err) + } + }) + + app.post('/tokenize/:lang', waitForServiceMw, validateLanguageMw, async (req: RequestWithLang, res, next) => { + try { + const { lang } = req.params + const { utterances } = validateTokenizeRequestBody(req.body) + const result = await application.tokenize(utterances, lang) + const response: TokenizeResponseBody = { + success: true, + ...result + } + return res.set(cachePolicy).json(response) + } catch (err) { + return next(err) + } + }) + + app.post('/vectorize/:lang', waitForServiceMw, validateLanguageMw, async (req: RequestWithLang, res, next) => { + try { + const { lang } = req.params + const { tokens } = validateVectorizeRequestBody(req.body) + const result = await application.vectorize(tokens, lang) + const response: VectorizeResponseBody = { + success: true, + ...result + } + return res.set(cachePolicy).json(response) + } catch (err) { + return next(err) + } + }) + + const router = express.Router({ mergeParams: true }) + + router.get('/', (req, res, next) => { + try { + const result = application.getLanguages() + const response: LanguagesResponseBody = { + success: true, + ...result + } + return res.json(response) + } catch (err) { + return next(err) + } + }) + + router.post('/:lang', adminTokenMw, validateLanguageMw, async (req: RequestWithLang, res, next) => { + const { lang } = req.params + try { + const { downloadId } = await application.startDownloadLang(lang) + const response: DownloadLangResponseBody = { success: true, downloadId } + return res.json(response) + } catch (err) { + return next(err) + } + }) + + router.post('/:lang/delete', adminTokenMw, validateLanguageMw, async (req: RequestWithLang, res, next) => { + try { + const { lang } = req.params + application.deleteLang(lang) + const response: SuccessReponse = { success: true } + return res.json(response) + } catch (err) { + return next(err) + } + }) + + router.post('/:lang/load', adminTokenMw, validateLanguageMw, async (req: RequestWithLang, res, next) => { + try { + const { lang } = req.params + await application.loadLang(lang) + const response: SuccessReponse = { success: true } + return res.json(response) + } catch (err) { + return next(err) + } + }) + + router.post('/cancel/:id', adminTokenMw, (req, res, next) => { + try { + const { id } = req.params + application.cancelDownloadLang(id) + const response: SuccessReponse = { success: true } + return res.json(response) + } catch (err) { + return next(err) + } + }) + + app.use('/languages', waitForServiceMw, router) + + if (options.apmEnabled) { + app.use(Sentry.Handlers.errorHandler()) + } + + app.use(handleErr) + + const httpServer = createServer(app) + + await Bluebird.fromCallback((callback) => { + const hostname = options.host === 'localhost' ? undefined : options.host + httpServer.listen(options.port, hostname, undefined, () => { + callback(null) + }) + }) + + logger.info(`Language Server is ready at http://${options.host}:${options.port}/`) + + if (process.env.MONITORING_INTERVAL) { + startMonitoring(baseLogger, process.env.MONITORING_INTERVAL) + } +} diff --git a/packages/lang-server/src/monitoring.ts b/packages/lang-server/src/api/monitoring.ts similarity index 81% rename from packages/lang-server/src/monitoring.ts rename to packages/lang-server/src/api/monitoring.ts index dbe9f206..324637c6 100644 --- a/packages/lang-server/src/monitoring.ts +++ b/packages/lang-server/src/api/monitoring.ts @@ -1,4 +1,5 @@ -import { Logger } from '@botpress/logger' +import { Logger } from '@bpinternal/log4bot' +import { Request, Response, NextFunction } from 'express' import _ from 'lodash' import ms from 'ms' import onHeaders from 'on-headers' @@ -6,10 +7,10 @@ import onHeaders from 'on-headers' let collectionEnabled = false let metrics = {} -export const startMonitoring = (baseLogger: Logger) => { +export const startMonitoring = (baseLogger: Logger, interval: string) => { const monitoringLogger = baseLogger.sub('lang').sub('api').sub('monitoring') - monitoringLogger.debug('Metrics collection enabled. Interval: ', process.env.MONITORING_INTERVAL) + monitoringLogger.debug('Metrics collection enabled. Interval: ', interval) setInterval(() => { if (!metrics || !Object.keys(metrics).length) { @@ -30,7 +31,7 @@ export const startMonitoring = (baseLogger: Logger) => { console.error('Could not prepare stats:', err) } metrics = {} - }, ms(process.env.MONITORING_INTERVAL!)) + }, ms(interval)) collectionEnabled = true } @@ -51,7 +52,7 @@ export const logMetric = (language: string = 'n/a', timeInMs: number) => { } } -export const monitoringMiddleware = (req, res, next) => { +export const monitoringMiddleware = (req: Request, res: Response, next: NextFunction) => { const startAt = Date.now() onHeaders(res, () => { diff --git a/packages/lang-server/src/api/mw-authentification.ts b/packages/lang-server/src/api/mw-authentification.ts new file mode 100644 index 00000000..2d2aefa4 --- /dev/null +++ b/packages/lang-server/src/api/mw-authentification.ts @@ -0,0 +1,38 @@ +import { Logger } from '@bpinternal/log4bot' +import { NextFunction, Response, Request } from 'express' +import { UnauthorizedError } from './errors' + +export const authMiddleware = (secureToken: string, baseLogger: Logger, secondToken?: string) => ( + req: Request, + _res: Response, + next: NextFunction +) => { + if (!secureToken || !secureToken.length) { + return next() + } + + const logger = baseLogger.sub('api').sub('auth') + + if (!req.headers.authorization) { + logger.error('Authorization header missing', { ip: req.ip }) + return next(new UnauthorizedError('Authorization header is missing')) + } + + const [scheme, token] = req.headers.authorization.split(' ') + if (scheme.toLowerCase() !== 'bearer') { + logger.error('Schema is missing', { ip: req.ip }) + return next(new UnauthorizedError(`Unknown scheme "${scheme}" - expected 'bearer '`)) + } + + if (!token) { + logger.error('Token is missing', { ip: req.ip }) + return next(new UnauthorizedError('Authentication token is missing')) + } + + if (secureToken !== token && secondToken !== token) { + logger.error('Invalid token', { ip: req.ip }) + return next(new UnauthorizedError('Invalid Bearer token')) + } + + next() +} diff --git a/packages/lang-server/src/api/mw-handle-error.ts b/packages/lang-server/src/api/mw-handle-error.ts new file mode 100644 index 00000000..56342238 --- /dev/null +++ b/packages/lang-server/src/api/mw-handle-error.ts @@ -0,0 +1,53 @@ +import { LangError, ErrorResponse } from '@botpress/lang-client' +import { Logger } from '@bpinternal/log4bot' +import { NextFunction, Request, Response } from 'express' +import { BadRequestError, NotReadyError, UnauthorizedError, OfflineError, ResponseError } from './errors' + +const serializeError = (err: Error): LangError => { + const { message, stack } = err + if (err instanceof BadRequestError) { + const { statusCode } = err + return { message, stack, type: 'bad_request', code: statusCode } + } + if (err instanceof NotReadyError) { + const { statusCode } = err + return { message, stack, type: 'not_ready', code: statusCode } + } + if (err instanceof UnauthorizedError) { + const { statusCode } = err + return { message, stack, type: 'unauthorized', code: statusCode } + } + if (err instanceof OfflineError) { + const { statusCode } = err + return { message, stack, type: 'offline', code: statusCode } + } + if (err instanceof ResponseError) { + const { statusCode } = err + return { message, stack, type: 'internal', code: statusCode } + } + return { message, stack, type: 'internal', code: 500 } +} + +const _handleErrorLogging = (err: Error, logger: Logger) => { + if ((err instanceof ResponseError && err.skipLogging) || process.env.SKIP_LOGGING) { + return + } + logger.attachError(err).error('Error') +} + +export const handleUnexpectedError = (logger: Logger) => ( + thrownObject: any, + _req: Request, + res: Response, + _next: NextFunction +) => { + const err: Error = thrownObject instanceof Error ? thrownObject : new Error(`${thrownObject}`) + const langError = serializeError(err) + const { code } = langError + const response: ErrorResponse = { + success: false, + error: langError + } + res.status(code).json(response) + _handleErrorLogging(err, logger) +} diff --git a/packages/lang-server/src/api/mw-service-loading.ts b/packages/lang-server/src/api/mw-service-loading.ts new file mode 100644 index 00000000..5d217182 --- /dev/null +++ b/packages/lang-server/src/api/mw-service-loading.ts @@ -0,0 +1,16 @@ +import { LanguageService } from '@botpress/nlu-engine' +import { Request, Response, NextFunction } from 'express' +import _ from 'lodash' +import { NotReadyError } from './errors' + +export const serviceLoadingMiddleware = (service: LanguageService) => ( + _req: Request, + _res: Response, + next: NextFunction +) => { + if (!service.isReady) { + return next(new NotReadyError('Language Server is still loading')) + } + + next() +} diff --git a/packages/lang-server/src/api/validation/body.ts b/packages/lang-server/src/api/validation/body.ts new file mode 100644 index 00000000..16adc965 --- /dev/null +++ b/packages/lang-server/src/api/validation/body.ts @@ -0,0 +1,24 @@ +import { TokenizeRequestBody, VectorizeRequestBody } from '@botpress/lang-client' +import _ from 'lodash' +import { BadRequestError } from '../errors' + +export const validateTokenizeRequestBody = (body: any): TokenizeRequestBody => { + const { utterances } = body + if (!utterances || !utterances.length || !_.isArray(utterances) || utterances.some((u) => !_.isString(u))) { + throw new BadRequestError('Param "utterances" is mandatory (must be an array of strings)') + } + + return { + utterances + } +} + +export const validateVectorizeRequestBody = (body: any): VectorizeRequestBody => { + const { tokens } = body + if (!tokens || !tokens.length || !_.isArray(tokens) || tokens.some((t) => !_.isString(t))) { + throw new BadRequestError('Param "tokens" is mandatory (must be an array of strings)') + } + return { + tokens + } +} diff --git a/packages/lang-server/src/api/validation/lang-path.ts b/packages/lang-server/src/api/validation/lang-path.ts new file mode 100644 index 00000000..2262bc64 --- /dev/null +++ b/packages/lang-server/src/api/validation/lang-path.ts @@ -0,0 +1,41 @@ +import { LanguageService } from '@botpress/nlu-engine' +import { NextFunction, Request, Response } from 'express' +import _ from 'lodash' +import { LANGUAGES } from '../../languages' +import { BadRequestError } from './../errors' + +export type RequestWithLang = Request & { + params: { lang: string } +} + +export const assertLanguage = (service: LanguageService, language: any): void => { + if (!language) { + throw new BadRequestError("Param 'lang' is mandatory") + } + + if (!_.isString(language)) { + throw new BadRequestError(`Param 'lang': ${language} must be a string`) + } + + if (!_(LANGUAGES).keys().includes(language)) { + throw new BadRequestError(`Param 'lang': ${language} is not an iso 639-1 language code`) + } + + const availableLanguages = service.getModels().map((x) => x.lang) + if (!availableLanguages.includes(language)) { + throw new BadRequestError(`Param 'lang': ${language} is not element of the available languages`) + } + + // language is valid +} + +export const extractPathLanguageMiddleware = (service: LanguageService) => { + return (req: Request, _res: Response, next: NextFunction) => { + try { + assertLanguage(service, req.params.lang) + next() + } catch (err) { + next(err) + } + } +} diff --git a/packages/lang-server/src/service/download-manager.ts b/packages/lang-server/src/application/download-manager.ts similarity index 85% rename from packages/lang-server/src/service/download-manager.ts rename to packages/lang-server/src/application/download-manager.ts index 0155db11..708f278b 100644 --- a/packages/lang-server/src/service/download-manager.ts +++ b/packages/lang-server/src/application/download-manager.ts @@ -1,4 +1,5 @@ -import { Logger } from '@botpress/logger' +import { AvailableModel } from '@botpress/lang-client' +import { Logger } from '@bpinternal/log4bot' import axios from 'axios' import fse from 'fs-extra' import ms from 'ms' @@ -8,8 +9,7 @@ import { getAppDataPath } from '../app-data' import ModelDownload from './model-download' type ModelType = 'bpe' | 'embeddings' - -export interface DownloadableModel { +type DownloadableModel = { type: ModelType remoteUrl: string language: string @@ -18,13 +18,13 @@ export interface DownloadableModel { domain?: string } -interface Language { +type Language = { code: string name: string flag: string } -interface Meta { +type Meta = { languages: { [code: string]: Language } @@ -53,7 +53,7 @@ export default class DownloadManager { this._logger = baseLogger.sub('lang').sub('download') } - async initialize() { + public async initialize(): Promise { fse.ensureDirSync(this.destDir) if (this._refreshTimer) { clearInterval(this._refreshTimer) @@ -84,7 +84,8 @@ export default class DownloadManager { if (this._isValidMetadata(data)) { this.meta = data } - } catch (err) { + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) this._logger.debug('Error fetching models', { url: this.metaUrl, message: err.message }) throw err } @@ -115,22 +116,24 @@ export default class DownloadManager { if (this._isValidMetadata(json)) { this.meta = json } - } catch (err) { + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) this._logger.debug('Error reading metadata file', { file: filePath, message: err.message }) } } - get downloadableLanguages() { + public get downloadableLanguages(): AvailableModel[] { if (!this.meta) { throw new Error('Meta not initialized yet') } - return this.meta.embeddings + const { meta } = this + return meta.embeddings .filter((mod) => mod.dim === this.dim && mod.domain === this.domain) .map((mod) => { return { - ...this.meta!.languages[mod.language], - size: mod.size + this.meta!.bpe[mod.language].size + ...meta.languages[mod.language], + size: mod.size + meta.bpe[mod.language].size } }) } @@ -145,7 +148,7 @@ export default class DownloadManager { }) } - cancelAndRemove(id: string) { + public cancelAndRemove(id: string): void { const activeDownload = this.inProgress.find((x) => x.id !== id) if (activeDownload && activeDownload.getStatus().status === 'downloading') { activeDownload.cancel() @@ -158,7 +161,7 @@ export default class DownloadManager { this.inProgress = this.inProgress.filter((x) => x.id !== id) } - async download(lang: string) { + public async download(lang: string): Promise { if (!this.downloadableLanguages.find((l) => lang === l.code)) { throw new Error(`Could not find model of dimention "${this.dim}" in domain "${this.domain}" for lang "${lang}"`) } diff --git a/packages/lang-server/src/application/index.ts b/packages/lang-server/src/application/index.ts new file mode 100644 index 00000000..eefd05d8 --- /dev/null +++ b/packages/lang-server/src/application/index.ts @@ -0,0 +1,119 @@ +import { + LanguageInfo, + TokenizeResult, + VectorizeResult, + LanguageState, + DownloadStartResult, + InstalledModel as LangServerInstalledModel +} from '@botpress/lang-client' +import { LanguageService, InstalledModel as EngineInstalledModel } from '@botpress/nlu-engine' +import Bluebird from 'bluebird' +import { OfflineError } from '../api/errors' +import { getLanguageByCode } from '../languages' +import DownloadManager from './download-manager' + +type AppOptions = { + version: string + adminToken?: string + offline: boolean +} + +export class LangApplication { + constructor( + public languageService: LanguageService, + public downloadManager: DownloadManager, + private options: AppOptions + ) {} + + public initialize(): Promise { + return Bluebird.all([this.languageService.initialize(), this.downloadManager.initialize()]) as Promise + } + + public getInfo(authHeader?: string): LanguageInfo { + return { + version: this.options.version, + ready: this.languageService.isReady, + dimentions: this.languageService.dim, + domain: this.languageService.domain, + readOnly: !this._isAdminToken(authHeader) + } + } + + public async tokenize(utterances: string[], language: string): Promise { + const tokens = await this.languageService.tokenize(utterances, language) + return { utterances, language, tokens } + } + + public async vectorize(tokens: string[], language: string): Promise { + const result = await this.languageService.vectorize(tokens, language) + return { language, vectors: result } + } + + public getLanguages(): LanguageState { + if (this.options.offline) { + const localLanguages = this.languageService.getModels().map(this._mapInstalledModel) + + return { + available: [], + installed: localLanguages, + downloading: [] + } + } + + const downloading = this.downloadManager.inProgress.map((x) => ({ + lang: x.lang, + progress: { + status: x.getStatus(), + downloadId: x.id, + size: x.totalDownloadSizeProgress + } + })) + + return { + available: this.downloadManager.downloadableLanguages, + installed: this.languageService.getModels().map(this._mapInstalledModel), + downloading + } + } + + private _mapInstalledModel = (m: EngineInstalledModel): LangServerInstalledModel => { + const { lang, loaded } = m + const { name } = getLanguageByCode(m.lang) + return { code: lang, name, loaded } + } + + public async startDownloadLang(lang: string): Promise { + if (this.options.offline) { + throw new OfflineError() + } + + const downloadId = await this.downloadManager.download(lang) + return { downloadId } + } + + public deleteLang(lang: string): void { + return this.languageService.remove(lang) + } + + public loadLang(lang: string): Promise { + return this.languageService.loadModel(lang) + } + + public cancelDownloadLang(downloadId: string): void { + if (this.options.offline) { + throw new OfflineError() + } + return this.downloadManager.cancelAndRemove(downloadId) + } + + private _isAdminToken = (authHeader?: string) => { + if (!this.options.adminToken || !this.options.adminToken.length) { + return true + } + if (!authHeader) { + return false + } + const [, token] = authHeader.split(' ') + return token === this.options.adminToken + } +} diff --git a/packages/lang-server/src/service/model-download.ts b/packages/lang-server/src/application/model-download.ts similarity index 92% rename from packages/lang-server/src/service/model-download.ts rename to packages/lang-server/src/application/model-download.ts index f9b1f31a..50b52a9d 100644 --- a/packages/lang-server/src/service/model-download.ts +++ b/packages/lang-server/src/application/model-download.ts @@ -1,12 +1,12 @@ -import { Logger } from '@botpress/logger' +import { Logger } from '@bpinternal/log4bot' import axios, { CancelTokenSource } from 'axios' import Bluebird from 'bluebird' import fse from 'fs-extra' import _ from 'lodash' import { Readable } from 'stream' -type ModelType = 'bpe' | 'embeddings' -export interface DownloadableModel { +type ModelType = 'bpe' | 'embeddings' +export type DownloadableModel = { type: ModelType remoteUrl: string language: string @@ -47,7 +47,7 @@ export default class ModelDownload { return `${this.destDir}/${fn}` } - async start(done: DoneListener) { + public async start(done: DoneListener) { this._doneListeners.push(done) if (this.status !== 'pending') { @@ -60,11 +60,11 @@ export default class ModelDownload { } } - async listenProgress(listener: (p: number) => void) { + public async listenProgress(listener: (p: number) => void) { this._progressListeners.push(listener) } - async listenCompletion(listener: DoneListener) { + public async listenCompletion(listener: DoneListener) { this._doneListeners.push(listener) } @@ -105,7 +105,7 @@ export default class ModelDownload { stream.on('end', () => this._onFinishedDownloading(modelToDownload, downloadedSize, fileSize)) } - async _onFinishedDownloading(downloadedModel: DownloadableModel, downloadSize: number, fileSize: number) { + public async _onFinishedDownloading(downloadedModel: DownloadableModel, downloadSize: number, fileSize: number) { this.currentModel++ if (downloadSize !== fileSize) { @@ -159,7 +159,7 @@ export default class ModelDownload { } } - cancel() { + public cancel() { if (this.status === 'downloading') { this.cancelToken.cancel() this.status = 'errored' diff --git a/packages/lang-server/src/config.ts b/packages/lang-server/src/config.ts new file mode 100644 index 00000000..cc770587 --- /dev/null +++ b/packages/lang-server/src/config.ts @@ -0,0 +1,43 @@ +import path from 'path' +import { getAppDataPath } from './app-data' +import { LangServerOptions, DownloadOptions, LangArgv, DownloadArgv } from './typings' + +const DEFAULT_LANG_DIR = () => { + const appDataPath = getAppDataPath() + return path.join(appDataPath, 'embeddings') +} + +const DEFAULT_SERVER_OPTIONS = (): LangServerOptions => ({ + port: 3100, + host: 'localhost', + langDir: DEFAULT_LANG_DIR(), + authToken: undefined, + adminToken: undefined, + limit: 0, + limitWindow: '1h', + metadataLocation: 'https://nyc3.digitaloceanspaces.com/botpress-public/embeddings/index.json', + offline: false, + dim: 100, + domain: 'bp', + apmEnabled: false, + prometheusEnabled: false, + logLevel: 'info', + debugFilter: undefined, + logFormat: 'text' +}) + +const DEFAULT_DOWNLOAD_OPTIONS = (lang: string): DownloadOptions => ({ + langDir: DEFAULT_LANG_DIR(), + metadataLocation: 'https://nyc3.digitaloceanspaces.com/botpress-public/embeddings/index.json', + dim: 100, + domain: 'bp', + lang +}) + +export const getLangServerConfig = (argv: LangArgv): LangServerOptions => { + return { ...DEFAULT_SERVER_OPTIONS(), ...argv } +} + +export const getDownloadConfig = (argv: DownloadArgv): DownloadOptions => { + return { ...DEFAULT_DOWNLOAD_OPTIONS(argv.lang), ...argv } +} diff --git a/packages/lang-server/src/download.ts b/packages/lang-server/src/download.ts index 75c11b98..20a0dc90 100644 --- a/packages/lang-server/src/download.ts +++ b/packages/lang-server/src/download.ts @@ -1,29 +1,20 @@ -import { LoggerLevel, makeLogger } from '@botpress/logger' import { LanguageService } from '@botpress/nlu-engine' +import { Logger } from '@bpinternal/log4bot' import cliProgress from 'cli-progress' import fse from 'fs-extra' import _ from 'lodash' -import path from 'path' -import { getAppDataPath } from './app-data' -import DownloadManager from './service/download-manager' +import DownloadManager from './application/download-manager' +import { getDownloadConfig } from './config' +import * as types from './typings' -interface Argv { - langDir?: string - lang: string - dim: number - domain: string - metadataLocation: string -} - -export default async (options: Argv) => { - const baseLogger = makeLogger({ - level: LoggerLevel.Info, - filters: undefined +export const download: typeof types.download = async (argv: types.DownloadArgv) => { + const options = getDownloadConfig(argv) + const baseLogger = new Logger('', { + level: 'info' }) - const appDataPath = getAppDataPath() - const languageDirectory = options.langDir || path.join(appDataPath, 'embeddings') + const languageDirectory = options.langDir await fse.ensureDir(languageDirectory) const launcherLogger = baseLogger.sub('Launcher') diff --git a/packages/lang-server/src/index.ts b/packages/lang-server/src/index.ts index 68102be4..1daf9c52 100644 --- a/packages/lang-server/src/index.ts +++ b/packages/lang-server/src/index.ts @@ -1,13 +1,16 @@ -import { centerText, LoggerLevel, Logger, makeLogger } from '@botpress/logger' import { LanguageService, Logger as EngineLogger } from '@botpress/nlu-engine' +import { Logger, TextFormatter, JSONFormatter } from '@bpinternal/log4bot' import chalk from 'chalk' import _ from 'lodash' import path from 'path' import API, { APIOptions } from './api' -import { getAppDataPath } from './app-data' +import { LangApplication } from './application' +import DownloadManager from './application/download-manager' +import { getLangServerConfig } from './config' import { requireJSON } from './require-json' -import DownloadManager from './service/download-manager' +import * as types from './typings' +import { listenForUncaughtErrors } from './uncaught-errors' const packageJsonPath = path.resolve(__dirname, '../package.json') const packageJson = requireJSON<{ version: string }>(packageJsonPath) @@ -17,25 +20,9 @@ if (!packageJson) { const { version: pkgVersion } = packageJson -export { default as download } from './download' +export { download } from './download' export const version = pkgVersion -export interface ArgV { - port: number - host: string - limit: number - limitWindow: string - langDir?: string - authToken?: string - adminToken?: string - metadataLocation: string - offline: boolean - dim: number - domain: string - verbose: number - logFilter: string[] | undefined -} - const wrapLogger = (logger: Logger): EngineLogger => { return { debug: (msg: string) => logger.debug(msg), @@ -46,21 +33,25 @@ const wrapLogger = (logger: Logger): EngineLogger => { } } -export const run = async (options: ArgV) => { - const baseLogger = makeLogger({ - level: Number(options.verbose) !== NaN ? Number(options.verbose) : LoggerLevel.Info, - filters: options.logFilter, - prefix: 'LANG' - }) +const centerText = (text: string, width: number, indent: number = 0) => { + const padding = Math.floor((width - text.length) / 2) + return _.repeat(' ', padding + indent) + text + _.repeat(' ', padding) +} + +export const run: typeof types.run = async (argv: types.LangArgv) => { + const options = getLangServerConfig(argv) - const appDataPath = getAppDataPath() - options.langDir = options.langDir || path.join(appDataPath, 'embeddings') + const formatter = options.logFormat === 'json' ? new JSONFormatter() : new TextFormatter() + const baseLogger = new Logger('', { + level: options.logLevel, + filters: options.debugFilter ? { debug: options.debugFilter } : {}, + prefix: 'LANG', + formatter + }) const launcherLogger = baseLogger.sub('Launcher') - // Launcher always display launcherLogger.configure({ - level: LoggerLevel.Info, - filters: undefined + level: 'info' // Launcher always display }) launcherLogger.debug('Language Server Options %o', options) @@ -88,13 +79,18 @@ export const run = async (options: ArgV) => { authToken: options.authToken, limit: options.limit, limitWindow: options.limitWindow, + prometheusEnabled: options.prometheusEnabled, + apmEnabled: options.apmEnabled, adminToken: options.adminToken || '' } - launcherLogger.info(chalk`======================================== -{bold ${centerText('Botpress Language Server', 40, 9)}} -{dim ${centerText(`Version ${version}`, 40, 9)}} -${_.repeat(' ', 9)}========================================`) + const indent = 0 + const width = 75 + const border = _.repeat('=', width) + launcherLogger.info(chalk`${border} +{bold ${centerText('Botpress Language Server', width, indent)}} +{dim ${centerText(`Version ${version}`, width, indent)}} +${_.repeat(' ', indent)}${border}`) if (options.authToken?.length) { launcherLogger.info(`authToken: ${chalk.greenBright('enabled')} (only users with this token can query your server)`) @@ -134,13 +130,13 @@ ${_.repeat(' ', 9)}========================================`) launcherLogger.info(`Serving ${options.dim} language dimensions from ${options.langDir}`) - if (options.offline) { - await Promise.all([API(apiOptions, baseLogger, langService), langService.initialize()]) - } else { - await Promise.all([ - API(apiOptions, baseLogger, langService, downloadManager), - downloadManager.initialize(), - langService.initialize() - ]) - } + const { offline, adminToken } = options + const langApplication = new LangApplication(langService, downloadManager, { + offline, + version, + adminToken + }) + await Promise.all([API(apiOptions, baseLogger, langApplication), langApplication.initialize()]) + + listenForUncaughtErrors(baseLogger) } diff --git a/packages/lang-server/src/languages.ts b/packages/lang-server/src/languages.ts index 9ba0bae9..0eff4f0d 100644 --- a/packages/lang-server/src/languages.ts +++ b/packages/lang-server/src/languages.ts @@ -1,4 +1,9 @@ -export const LANGUAGES = { +type Lang = { + name: string + nativeName: string +} + +export const LANGUAGES: _.Dictionary = { aa: { name: 'Afar', nativeName: 'Afaraf' @@ -737,4 +742,4 @@ export const LANGUAGES = { } } -export const getLanguageByCode = (code) => LANGUAGES[code] +export const getLanguageByCode = (code: string): Lang => LANGUAGES[code] diff --git a/packages/lang-server/src/typings.d.ts b/packages/lang-server/src/typings.d.ts index bc9d6bca..93c60341 100644 --- a/packages/lang-server/src/typings.d.ts +++ b/packages/lang-server/src/typings.d.ts @@ -1,25 +1,36 @@ -export const run: (argv: { +import { LogLevel } from '@bpinternal/log4bot' + +type CommonOptions = { + langDir: string + metadataLocation: string + dim: number + domain: string +} + +export type LogFormat = 'text' | 'json' +export type LangServerOptions = { port: number host: string + reverseProxy?: string limit: number limitWindow: string - langDir?: string authToken?: string adminToken?: string - metadataLocation: string offline: boolean - dim: number - domain: string - verbose: number - logFilter: string[] | undefined -}) => Promise + logLevel: LogLevel + logFormat: LogFormat + debugFilter?: string + prometheusEnabled: boolean + apmEnabled: boolean +} & CommonOptions -export const download: (argv: { - langDir?: string +export type DownloadOptions = { lang: string - dim: number - domain: string - metadataLocation: string -}) => Promise +} & CommonOptions + +export type LangArgv = Partial +export type DownloadArgv = Partial & { lang: string } export const version: string +export const run: (argv: LangArgv) => Promise +export const download: (argv: DownloadArgv) => Promise diff --git a/packages/lang-server/src/uncaught-errors.ts b/packages/lang-server/src/uncaught-errors.ts new file mode 100644 index 00000000..8792635b --- /dev/null +++ b/packages/lang-server/src/uncaught-errors.ts @@ -0,0 +1,13 @@ +import { Logger } from '@bpinternal/log4bot' + +export const listenForUncaughtErrors = (logger: Logger) => { + process.on('unhandledRejection', (thrown: any) => { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + logger.critical(`Unhandled rejection: "${err.message}"`) + }) + + process.on('uncaughtException', (thrown: Error) => { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + logger.critical(`Uncaught exceptions: "${err.message}"`) + }) +} diff --git a/packages/lang-server/src/util.ts b/packages/lang-server/src/util.ts deleted file mode 100644 index 52a8d61f..00000000 --- a/packages/lang-server/src/util.ts +++ /dev/null @@ -1,31 +0,0 @@ -import { LanguageService } from '@botpress/nlu-engine' -import _ from 'lodash' -import { BadRequestError, NotReadyError } from './utils/http/errors' - -export const serviceLoadingMiddleware = (service: LanguageService) => (_req, _res, next) => { - if (!service.isReady) { - return next(new NotReadyError('Language Server is still loading')) - } - - next() -} - -export const assertValidLanguage = (service: LanguageService) => (req, _res, next) => { - const language = req.body.lang || req.params.lang - - if (!language) { - return next(new BadRequestError("Param 'lang' is mandatory")) - } - - if (!_.isString(language)) { - return next(new BadRequestError(`Param 'lang': ${language} must be a string`)) - } - - const availableLanguages = service.getModels().map((x) => x.lang) - if (!availableLanguages.includes(language)) { - return next(new BadRequestError(`Param 'lang': ${language} is not element of the available languages`)) - } - - req.language = language - next() -} diff --git a/packages/lang-server/src/utils/http/errors.ts b/packages/lang-server/src/utils/http/errors.ts deleted file mode 100644 index 3d010723..00000000 --- a/packages/lang-server/src/utils/http/errors.ts +++ /dev/null @@ -1,120 +0,0 @@ -import { escapeHtmlSimple } from './index' - -/** - * The object that wraps HTTP errors. - * - * @constructor - * @param message - The error message that will be sent to the end-user - * @param statusCode - The HTTP status code - * @param errorCode - Botpress error codes e.g. BP_0001, BP_0002, etc. - */ -export class ResponseError extends Error { - errorCode: string | undefined - statusCode: number - - skipLogging = false - - constructor(message: string, statusCode: number, errorCode?: string) { - super(escapeHtmlSimple(message)) - Error.captureStackTrace(this, this.constructor) - this.statusCode = statusCode - this.errorCode = errorCode - } -} - -/** - * A standard error, which doesn't print stack traces, but return an error message to the user - */ -export class StandardError extends ResponseError { - constructor(message: string, detailedMessage?: string) { - super(`${message}: ${detailedMessage}`, 400) - this.skipLogging = true - } - - type = 'StandardError' -} - -export class InvalidOperationError extends ResponseError { - constructor(message: string) { - super('Invalid operation: ' + message, 400, 'BP_0006') - } - - type = 'InvalidOperationError' -} - -export class BadRequestError extends ResponseError { - type = 'BadRequestError' - - constructor(message: string) { - super(`Bad Request: ${message}`, 400, 'BP_0040') - this.skipLogging = true - } -} - -export class NotReadyError extends ResponseError { - type = 'NotReadyError' - - constructor(service: string) { - super(`Service Not Ready: ${service}`, 400, 'BP_0140') - this.skipLogging = true - } -} - -export class UnauthorizedError extends ResponseError { - type = 'UnauthorizedError' - - constructor(message: string) { - super(`Unauthorized: ${message}`, 401, 'BP_0041') - } -} - -export class PaymentRequiredError extends ResponseError { - type = 'PaymentRequiredError' - - constructor(message: string) { - super(message || '', 402, 'BP_0042') - } -} - -export class ForbiddenError extends ResponseError { - type = 'ForbiddenError' - - constructor(message: string) { - super(`Forbidden: ${message}`, 403, 'BP_0043') - } -} - -export class NotFoundError extends ResponseError { - type = 'NotFoundError' - - constructor(message: string) { - super(`Not Found: ${message}`, 404, 'BP_0044') - this.skipLogging = true - } -} - -export class ConflictError extends ResponseError { - type = 'ConflictError' - - constructor(message?: string) { - super(`Conflict: ${message}`, 409, 'BP_0049') - this.skipLogging = true - } -} - -export class InternalServerError extends ResponseError { - type = 'InternalServerError' - - constructor(message?: string) { - super(message || '', 500, 'BP_0050') - } -} - -export class InvalidExternalToken extends ResponseError { - type = 'InvalidExternalToken' - - constructor(message: string) { - super(`Unauthorized: ${message}`, 401, 'BP_0401') - this.skipLogging = true - } -} diff --git a/packages/lang-server/src/utils/http/index.ts b/packages/lang-server/src/utils/http/index.ts deleted file mode 100644 index a6bc2fbd..00000000 --- a/packages/lang-server/src/utils/http/index.ts +++ /dev/null @@ -1,95 +0,0 @@ -import { Logger } from '@botpress/logger' -import { Request } from 'express' -import _ from 'lodash' -import { UnauthorizedError } from './errors' - -// This method is only used for basic escaping of error messages, do not use for page display -export const escapeHtmlSimple = (str: string) => { - return str - .replace(//g, '>') - .replace(/\//g, '/') - .replace(/\\/g, '\') - .replace(/`/g, '`') -} - -export const isAdminToken = (req, adminToken: string) => { - if (!adminToken || !adminToken.length) { - return true - } - if (!req.headers.authorization) { - return false - } - const [, token] = req.headers.authorization.split(' ') - return token === adminToken -} - -const makeUnauthorizedError = (msg: string) => { - const err = new UnauthorizedError(msg) - err.skipLogging = true - return err -} - -export const authMiddleware = (secureToken: string, baseLogger: Logger, secondToken?: string) => (req, _res, next) => { - if (!secureToken || !secureToken.length) { - return next() - } - - const logger = baseLogger.sub('api').sub('auth') - - if (!req.headers.authorization) { - logger.error('Authorization header missing', { ip: req.ip }) - return next(makeUnauthorizedError('Authorization header is missing')) - } - - const [scheme, token] = req.headers.authorization.split(' ') - if (scheme.toLowerCase() !== 'bearer') { - logger.error('Schema is missing', { ip: req.ip }) - return next(makeUnauthorizedError(`Unknown scheme "${scheme}" - expected 'bearer '`)) - } - - if (!token) { - logger.error('Token is missing', { ip: req.ip }) - return next(makeUnauthorizedError('Authentication token is missing')) - } - - if (secureToken !== token && secondToken !== token) { - logger.error('Invalid token', { ip: req.ip }) - return next(makeUnauthorizedError('Invalid Bearer token')) - } - - next() -} - -export const disabledReadonlyMiddleware = (readonly: boolean) => (_req, _res, next) => { - if (readonly) { - return next(new UnauthorizedError('API server is running in read-only mode')) - } - - next() -} - -export const handleUnexpectedError = (err, req, res, next) => { - const statusCode = err.statusCode || 500 - const errorCode = err.errorCode || 'BP_000' - const message = (err.errorCode && err.message) || 'Unexpected error' - - res.status(statusCode).json({ - statusCode, - errorCode, - type: err.type || Object.getPrototypeOf(err).name || 'Exception', - message - }) -} - -export const handleErrorLogging = (err, req, res, next) => { - if (err && (err.skipLogging || process.env.SKIP_LOGGING)) { - return res.status(err.statusCode).send(err.message) - } - - next(err) -} - -export type RequestWithLang = Request & { - language?: string -} diff --git a/packages/lang-server/tsconfig.json b/packages/lang-server/tsconfig.json index 93d05a7b..33f4f53c 100644 --- a/packages/lang-server/tsconfig.json +++ b/packages/lang-server/tsconfig.json @@ -1,6 +1,10 @@ { "extends": "../../tsconfig.packages.json", - "references": [{ "path": "../nlu-engine" }, { "path": "../logger" }], + "references": [ + { "path": "../telemetry" }, + { "path": "../nlu-engine" }, + { "path": "../lang-client" } + ], "compilerOptions": { "outDir": "./dist" /* Redirect output structure to the directory. */, "rootDir": "./src" /* Specify the root directory of input files. Use to control the output directory structure with --outDir. */, diff --git a/packages/locks/package.json b/packages/locks/package.json deleted file mode 100644 index 91162aaa..00000000 --- a/packages/locks/package.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "name": "@botpress/locks", - "version": "0.0.1", - "author": "Botpress, Inc.", - "license": "AGPL-3.0", - "scripts": { - "build": "tsc", - "test": "echo \"no tests\"" - }, - "dependencies": { - "lodash": "^4.17.19", - "pg": "^8.7.1" - }, - "devDependencies": { - "@types/lodash": "^4.14.116", - "@types/node": "^12.13.0", - "@types/pg": "^8.6.1" - }, - "types": "./src/typings.d.ts", - "main": "./dist/index.js" -} diff --git a/packages/locks/src/index.ts b/packages/locks/src/index.ts deleted file mode 100644 index 85fb56a2..00000000 --- a/packages/locks/src/index.ts +++ /dev/null @@ -1,8 +0,0 @@ -import { InMemoryTransactionQueue } from './in-mem-trx-queue' -import { PGTransactionQueue } from './pg-trx-queue' -import * as sdk from './typings' - -export const makeInMemoryTrxQueue: typeof sdk.makeInMemoryTrxQueue = (logger?: sdk.Logger) => - new InMemoryTransactionQueue(logger) -export const makePostgresTrxQueue: typeof sdk.makePostgresTrxQueue = (dbURL: string, logger?: sdk.Logger) => - new PGTransactionQueue(dbURL, logger) diff --git a/packages/logger/jest.config.js b/packages/logger/jest.config.js deleted file mode 100644 index 61ea9c96..00000000 --- a/packages/logger/jest.config.js +++ /dev/null @@ -1,11 +0,0 @@ -module.exports = { - preset: 'ts-jest', - testEnvironment: 'node', - testPathIgnorePatterns: ['dist', 'node_modules'], - rootDir: '.', - resetModules: true, - verbose: true, - modulePaths: ['/src/'], - moduleFileExtensions: ['js', 'json', 'jsx', 'ts', 'tsx', 'd.ts'], - modulePathIgnorePatterns: ['out'] -} diff --git a/packages/logger/package.json b/packages/logger/package.json deleted file mode 100644 index 628a1738..00000000 --- a/packages/logger/package.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "name": "@botpress/logger", - "version": "1.0.0", - "description": "Botpress NLU Logger", - "main": "./dist/index.js", - "author": "Botpress, Inc.", - "license": "AGPL-3.0", - "types": "./src/typings.d.ts", - "dependencies": { - "lodash": "^4.17.19" - }, - "devDependencies": { - "@types/lodash": "^4.14.116", - "@types/node": "^12.13.0", - "jest": "^24.9.0", - "ts-jest": "^26.5.5", - "typescript": "^3.9.10" - }, - "scripts": { - "build": "tsc --build", - "test": "cross-env jest -i --detectOpenHandles -c jest.config.js" - } -} diff --git a/packages/logger/readme.md b/packages/logger/readme.md deleted file mode 100644 index 9c02aa4d..00000000 --- a/packages/logger/readme.md +++ /dev/null @@ -1,7 +0,0 @@ -# NLU Logger - -This package contains a Logger for NLU web servers and other products. - -## Licensing - -This software is protected by the same license as the [main Botpress repository](https://github.com/botpress/botpress). You can find the license file [here](https://github.com/botpress/botpress/blob/master/LICENSE). diff --git a/packages/logger/src/config.ts b/packages/logger/src/config.ts deleted file mode 100644 index b6b10395..00000000 --- a/packages/logger/src/config.ts +++ /dev/null @@ -1,32 +0,0 @@ -import { ConsoleFormatter } from './formatters/console' -import { ConsoleTransport } from './transports/console' -import { LoggerConfig } from './typings' - -const LOG_PREFIX = 'NLU' - -export enum LoggerLevel { - Critical = 0, - Error = 1, - Warn = 2, - Info = 3, - Debug = 4 -} - -export const defaultConfig: LoggerConfig = { - level: LoggerLevel.Info, - minLevel: undefined, - timeFormat: 'L HH:mm:ss.SSS', - namespaceDelimiter: ':', - colors: { - [LoggerLevel.Debug]: 'blue', - [LoggerLevel.Info]: 'green', - [LoggerLevel.Warn]: 'yellow', - [LoggerLevel.Error]: 'red', - [LoggerLevel.Critical]: 'red' - }, - formatter: new ConsoleFormatter({ indent: !!process.env.INDENT_LOGS }), - transports: [new ConsoleTransport()], - indent: false, - filters: undefined, // show all logs - prefix: LOG_PREFIX -} diff --git a/packages/logger/src/conforms.test.ts b/packages/logger/src/conforms.test.ts deleted file mode 100644 index 264116f8..00000000 --- a/packages/logger/src/conforms.test.ts +++ /dev/null @@ -1,47 +0,0 @@ -import { conforms } from './transports/console' - -test('rule conform should work', () => { - // arrange - const rule = 'training:svm' - - const tests = [ - { ns: 'training', success: false }, - { ns: 'training:stuff', success: false }, - { ns: 'training:svm', success: true }, - { ns: 'training:svm:stuff', success: true }, - { ns: 'training:svm:stuff:stuff-again', success: true }, - { ns: '', success: false } - ] - - for (const test of tests) { - // act - const actual = conforms(test.ns, rule, ':') - const expected = test.success - - // assert - expect(actual).toBe(expected) - } -}) - -test('empty rule should apply to everything', () => { - // arrange - const rule = '' - - const tests = [ - { ns: 'training', success: true }, - { ns: 'training:stuff', success: true }, - { ns: 'training:svm', success: true }, - { ns: 'training:svm:stuff', success: true }, - { ns: 'training:svm:stuff:stuff-again', success: true }, - { ns: '', success: true } - ] - - for (const test of tests) { - // act - const actual = conforms(test.ns, rule, ':') - const expected = test.success - - // assert - expect(actual).toBe(expected) - } -}) diff --git a/packages/logger/src/formatters/console.ts b/packages/logger/src/formatters/console.ts deleted file mode 100644 index 1f212bb8..00000000 --- a/packages/logger/src/formatters/console.ts +++ /dev/null @@ -1,55 +0,0 @@ -import chalk from 'chalk' -import _ from 'lodash' -import moment from 'moment' -import os from 'os' -import util from 'util' -import { LoggerLevel } from '../config' -import { FormattedLogEntry, LogEntry, LogEntryFormatter, LoggerConfig } from '../typings' - -interface ConsoleFormatterOpts { - indent: boolean -} - -function _serializeArgs(args: any): string { - if (_.isArray(args)) { - return args.map((arg) => _serializeArgs(arg)).join(', ') - } else if (_.isObject(args)) { - return util.inspect(args, false, 2, true) - } else if (_.isString(args)) { - return args.trim() - } else if (args && args.toString) { - return args.toString() - } else { - return '' - } -} - -export class ConsoleFormatter implements LogEntryFormatter { - constructor(private _opts: ConsoleFormatterOpts = { indent: false }) {} - - format(config: LoggerConfig, entry: LogEntry): FormattedLogEntry { - const time = moment().format(config.timeFormat) - const serializedMetadata = entry.metadata ? _serializeArgs(entry.metadata) : '' - - const prefix = config.prefix ? `[${config.prefix}] ` : '' - const displayName = this._opts.indent - ? entry.namespace.substr(0, 15).padEnd(15, ' ') - : `${prefix}${entry.namespace}` - // eslint-disable-next-line prefer-template - const newLineIndent = chalk.dim(' '.repeat(`${config.timeFormat} ${displayName}`.length)) + ' ' - let indentedMessage = - entry.level === LoggerLevel.Error ? entry.message : entry.message.replace(/\r\n|\n/g, os.EOL + newLineIndent) - - if (entry.type === 'stacktrace' && entry.stack) { - indentedMessage += chalk.grey(os.EOL + 'STACK TRACE') - indentedMessage += chalk.grey(os.EOL + entry.stack) - } - - return { - ...entry, - formatted: chalk`{grey ${time}} {${ - config.colors[entry.level] - }.bold ${displayName}} ${indentedMessage}${serializedMetadata}` - } - } -} diff --git a/packages/logger/src/index.ts b/packages/logger/src/index.ts deleted file mode 100644 index 25981ece..00000000 --- a/packages/logger/src/index.ts +++ /dev/null @@ -1,23 +0,0 @@ -import _ from 'lodash' -import { LoggerLevel as EnumLoggerLevel } from './config' -import { Logger } from './logger' -import * as sdk from './typings' - -export const centerText: typeof sdk.centerText = (text: string, width: number, indent: number = 0) => { - const padding = Math.floor((width - text.length) / 2) - return _.repeat(' ', padding + indent) + text + _.repeat(' ', padding) -} - -export const LoggerLevel: typeof sdk.LoggerLevel = { - Critical: EnumLoggerLevel.Critical, - Error: EnumLoggerLevel.Error, - Warn: EnumLoggerLevel.Warn, - Info: EnumLoggerLevel.Info, - Debug: EnumLoggerLevel.Debug -} - -export const makeLogger: typeof sdk.makeLogger = (config: Partial = {}) => { - const logger = new Logger() - logger.configure(config) - return logger -} diff --git a/packages/logger/src/logger.ts b/packages/logger/src/logger.ts deleted file mode 100644 index a420684d..00000000 --- a/packages/logger/src/logger.ts +++ /dev/null @@ -1,72 +0,0 @@ -import { defaultConfig, LoggerLevel } from './config' -import * as sdk from './typings' - -export class Logger implements sdk.Logger { - public static default = new Logger() - private static _GLOBAL_NAMESPACE = 'global' - private _loggers = new Map() - private _config: sdk.LoggerConfig = defaultConfig - public parent: Logger | null = null - public namespace: string = '' - - constructor(private _name: string = Logger._GLOBAL_NAMESPACE) {} - - configure(config: Partial) { - this._config = { ...this._config, ...config } - - // logger configures all childs - for (const logger of this._loggers.values()) { - logger.configure(config) - } - } - - public sub(name: string): Logger { - if (this._loggers.has(name)) { - return this._loggers.get(name)! - } - const logger = new Logger(name) - - if (name === Logger._GLOBAL_NAMESPACE) { - logger.parent = null - logger.namespace = '' - } else { - logger.parent = this - logger._config = { ...this._config } // copy parent config - logger.namespace = logger.parent.namespace.length ? logger.parent.namespace + this._config.namespaceDelimiter : '' - logger.namespace += name - } - - this._loggers.set(name, logger) - return logger - } - - attachError(error: Error): this { - this.push({ type: 'stacktrace', level: LoggerLevel.Critical, message: error.message, stack: error.stack }) - return this - } - - private push(entry: Omit) { - const formattedEntry = this._config.formatter.format(this._config, { ...entry, namespace: this.namespace }) - this._config.transports.forEach((transport) => transport.send(this._config, formattedEntry)) - } - - critical(message: string, metadata?: any): void { - this.push({ type: 'log', level: LoggerLevel.Critical, message, metadata }) - } - - debug(message: string, metadata?: any): void { - this.push({ type: 'log', level: LoggerLevel.Debug, message, metadata }) - } - - info(message: string, metadata?: any): void { - this.push({ type: 'log', level: LoggerLevel.Info, message, metadata }) - } - - warn(message: string, metadata?: any): void { - this.push({ type: 'log', level: LoggerLevel.Warn, message, metadata }) - } - - error(message: string, metadata?: any): void { - this.push({ type: 'log', level: LoggerLevel.Error, message, metadata }) - } -} diff --git a/packages/logger/src/transports/console.ts b/packages/logger/src/transports/console.ts deleted file mode 100644 index 3e087a37..00000000 --- a/packages/logger/src/transports/console.ts +++ /dev/null @@ -1,41 +0,0 @@ -import _ from 'lodash' -import { FormattedLogEntry, LoggerConfig, LogTransporter } from '../typings' - -export const conforms = (namespace: string, rule: string, delimiter: string) => { - if (!rule) { - return true - } - const splittedRule = rule.split(delimiter) - const namespaces = namespace.split(delimiter) - - const truthTable = _.zip(splittedRule, namespaces).map(([r, ns]) => (r === undefined ? true : r === ns)) - return !truthTable.includes(false) -} - -export class ConsoleTransport implements LogTransporter { - send(config: LoggerConfig, entry: FormattedLogEntry) { - if (config.minLevel && entry.level <= config.minLevel) { - this._log(entry.formatted) - return - } - - if (entry.level <= config.level) { - if (!config.filters) { - this._log(entry.formatted) - return - } - - for (const rule of config.filters) { - if (conforms(entry.namespace, rule, config.namespaceDelimiter)) { - this._log(entry.formatted) - break - } - } - } - } - - private _log(msg: string) { - // eslint-disable-next-line no-console - console.log(msg) - } -} diff --git a/packages/logger/src/typings.d.ts b/packages/logger/src/typings.d.ts deleted file mode 100644 index ec421d46..00000000 --- a/packages/logger/src/typings.d.ts +++ /dev/null @@ -1,56 +0,0 @@ -export interface Logger { - attachError(error: Error): this - configure(config: Partial): void - debug(message: string, metadata?: any): void - info(message: string, metadata?: any): void - warn(message: string, metadata?: any): void - error(message: string, metadata?: any): void - critical(message: string, metadata?: any): void - sub(namespace: string): Logger -} - -export type LogEntryType = 'log' | 'stacktrace' - -export interface LogEntry { - type: LogEntryType - level: number - message: string - namespace: string - metadata?: any - stack?: any -} - -export type FormattedLogEntry = LogEntry & { - formatted: string -} - -export interface LogEntryFormatter { - format(config: LoggerConfig, entry: LogEntry): FormattedLogEntry -} - -export interface LogTransporter { - send(config: LoggerConfig, entry: FormattedLogEntry): Promise | void -} - -export interface LoggerConfig { - level: number - minLevel: number | undefined // if defined, allows to bypass filters - formatter: LogEntryFormatter - transports: LogTransporter[] - timeFormat: string // moment time format - namespaceDelimiter: string - colors: { [level: number]: string } - indent: boolean - filters: string[] | undefined // if undefined, all logs are displayed - prefix?: string -} - -export const centerText: (text: string, width: number, indent: number = 0) => string -export const LoggerLevel: { - Critical: 0 - Error: 1 - Warn: 2 - Info: 3 - Debug: 4 -} -export const makeLogger: (config?: Partial) => Logger diff --git a/packages/logger/src/wrap.ts b/packages/logger/src/wrap.ts deleted file mode 100644 index 75d19a01..00000000 --- a/packages/logger/src/wrap.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { Logger as EngineLogger } from '@botpress/nlu-engine' -import { Logger } from './typings' - -export const wrapLogger = (logger: Logger): EngineLogger => { - return { - debug: (msg: string) => logger.debug(msg), - info: (msg: string) => logger.info(msg), - warning: (msg: string, err?: Error) => (err ? logger.attachError(err).warn(msg) : logger.warn(msg)), - error: (msg: string, err?: Error) => (err ? logger.attachError(err).error(msg) : logger.error(msg)), - sub: (namespace: string) => wrapLogger(logger.sub(namespace)) - } -} diff --git a/packages/logger/tsconfig.test.json b/packages/logger/tsconfig.test.json deleted file mode 100644 index 5c07341f..00000000 --- a/packages/logger/tsconfig.test.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "extends": "./tsconfig.json", - "compilerOptions": { - "allowJs": true, - "typeRoots": ["./node_modules/@types"], - "types": ["jest", "node", "botpress", "global", "knex", "string"] - } -} diff --git a/packages/e2e/current_scores/bpds-intent.json b/packages/nlu-bench/current_scores/bpds-intent.json similarity index 87% rename from packages/e2e/current_scores/bpds-intent.json rename to packages/nlu-bench/current_scores/bpds-intent.json index 246a81c7..c559c40a 100644 --- a/packages/e2e/current_scores/bpds-intent.json +++ b/packages/nlu-bench/current_scores/bpds-intent.json @@ -1,5 +1,5 @@ { - "generatedOn": "2021-10-25T20:48:31.599Z", + "generatedOn": "2022-03-01T21:51:41.244Z", "scores": [ { "metric": "accuracy", @@ -35,7 +35,7 @@ "metric": "accuracy", "problem": "bpsd A-en", "seed": 69, - "score": 0.6681818181818182 + "score": 0.6636363636363637 }, { "metric": "oosAccuracy", @@ -131,25 +131,25 @@ "metric": "oosAccuracy", "problem": "bpds A imbalanced-en", "seed": 69, - "score": 0.6954545454545454 + "score": 0.7 }, { "metric": "oosPrecision", "problem": "bpds A imbalanced-en", "seed": 69, - "score": 0.7619047619047619 + "score": 0.7833333333333333 }, { "metric": "oosRecall", "problem": "bpds A imbalanced-en", "seed": 69, - "score": 0.48 + "score": 0.47 }, { "metric": "oosF1", "problem": "bpds A imbalanced-en", "seed": 69, - "score": 0.5889570552147239 + "score": 0.5874999999999999 }, { "metric": "accuracy", @@ -161,31 +161,31 @@ "metric": "oosAccuracy", "problem": "bpds A imbalanced-en", "seed": 666, - "score": 0.7181818181818181 + "score": 0.7227272727272728 }, { "metric": "oosPrecision", "problem": "bpds A imbalanced-en", "seed": 666, - "score": 0.8392857142857143 + "score": 0.8305084745762712 }, { "metric": "oosRecall", "problem": "bpds A imbalanced-en", "seed": 666, - "score": 0.47 + "score": 0.49 }, { "metric": "oosF1", "problem": "bpds A imbalanced-en", "seed": 666, - "score": 0.6025641025641026 + "score": 0.6163522012578617 }, { "metric": "accuracy", "problem": "bpds A fewshot-en", "seed": 42, - "score": 0.5636363636363636 + "score": 0.5590909090909091 }, { "metric": "oosAccuracy", @@ -215,37 +215,37 @@ "metric": "accuracy", "problem": "bpds A fewshot-en", "seed": 69, - "score": 0.5045454545454545 + "score": 0.4818181818181818 }, { "metric": "oosAccuracy", "problem": "bpds A fewshot-en", "seed": 69, - "score": 0.6590909090909091 + "score": 0.6318181818181818 }, { "metric": "oosPrecision", "problem": "bpds A fewshot-en", "seed": 69, - "score": 0.6923076923076923 + "score": 0.6610169491525424 }, { "metric": "oosRecall", "problem": "bpds A fewshot-en", "seed": 69, - "score": 0.45 + "score": 0.39 }, { "metric": "oosF1", "problem": "bpds A fewshot-en", "seed": 69, - "score": 0.5454545454545455 + "score": 0.49056603773584895 }, { "metric": "accuracy", "problem": "bpds A fewshot-en", "seed": 666, - "score": 0.5954545454545455 + "score": 0.5818181818181818 }, { "metric": "oosAccuracy", @@ -257,109 +257,109 @@ "metric": "oosPrecision", "problem": "bpds A fewshot-en", "seed": 666, - "score": 0.7191011235955056 + "score": 0.7241379310344828 }, { "metric": "oosRecall", "problem": "bpds A fewshot-en", "seed": 666, - "score": 0.64 + "score": 0.63 }, { "metric": "oosF1", "problem": "bpds A fewshot-en", "seed": 666, - "score": 0.6772486772486772 + "score": 0.6737967914438503 }, { "metric": "accuracy", "problem": "bpds B", "seed": 42, - "score": 0.6545454545454545 + "score": 0.6363636363636364 }, { "metric": "oosAccuracy", "problem": "bpds B", "seed": 42, - "score": 0.7727272727272727 + "score": 0.75 }, { "metric": "oosPrecision", "problem": "bpds B", "seed": 42, - "score": 0.8571428571428571 + "score": 0.835820895522388 }, { "metric": "oosRecall", "problem": "bpds B", "seed": 42, - "score": 0.6 + "score": 0.56 }, { "metric": "oosF1", "problem": "bpds B", "seed": 42, - "score": 0.7058823529411764 + "score": 0.6706586826347306 }, { "metric": "accuracy", "problem": "bpds B", "seed": 69, - "score": 0.6590909090909091 + "score": 0.6545454545454545 }, { "metric": "oosAccuracy", "problem": "bpds B", "seed": 69, - "score": 0.7681818181818182 + "score": 0.7727272727272727 }, { "metric": "oosPrecision", "problem": "bpds B", "seed": 69, - "score": 0.8769230769230769 + "score": 0.9032258064516129 }, { "metric": "oosRecall", "problem": "bpds B", "seed": 69, - "score": 0.57 + "score": 0.56 }, { "metric": "oosF1", "problem": "bpds B", "seed": 69, - "score": 0.6909090909090909 + "score": 0.691358024691358 }, { "metric": "accuracy", "problem": "bpds B", "seed": 666, - "score": 0.6136363636363636 + "score": 0.6227272727272727 }, { "metric": "oosAccuracy", "problem": "bpds B", "seed": 666, - "score": 0.7318181818181818 + "score": 0.7363636363636363 }, { "metric": "oosPrecision", "problem": "bpds B", "seed": 666, - "score": 0.847457627118644 + "score": 0.85 }, { "metric": "oosRecall", "problem": "bpds B", "seed": 666, - "score": 0.5 + "score": 0.51 }, { "metric": "oosF1", "problem": "bpds B", "seed": 666, - "score": 0.6289308176100629 + "score": 0.6375000000000001 }, { "metric": "accuracy", @@ -395,19 +395,19 @@ "metric": "accuracy", "problem": "bpsd A-fr", "seed": 69, - "score": 0.6590909090909091 + "score": 0.6545454545454545 }, { "metric": "oosAccuracy", "problem": "bpsd A-fr", "seed": 69, - "score": 0.7318181818181818 + "score": 0.7272727272727273 }, { "metric": "oosPrecision", "problem": "bpsd A-fr", "seed": 69, - "score": 0.711340206185567 + "score": 0.7040816326530612 }, { "metric": "oosRecall", @@ -419,43 +419,43 @@ "metric": "oosF1", "problem": "bpsd A-fr", "seed": 69, - "score": 0.700507614213198 + "score": 0.6969696969696969 }, { "metric": "accuracy", "problem": "bpsd A-fr", "seed": 666, - "score": 0.6727272727272727 + "score": 0.6681818181818182 }, { "metric": "oosAccuracy", "problem": "bpsd A-fr", "seed": 666, - "score": 0.7545454545454545 + "score": 0.7454545454545455 }, { "metric": "oosPrecision", "problem": "bpsd A-fr", "seed": 666, - "score": 0.7613636363636364 + "score": 0.7291666666666666 }, { "metric": "oosRecall", "problem": "bpsd A-fr", "seed": 666, - "score": 0.67 + "score": 0.7 }, { "metric": "oosF1", "problem": "bpsd A-fr", "seed": 666, - "score": 0.7127659574468085 + "score": 0.7142857142857142 }, { "metric": "accuracy", "problem": "bpds A imbalanced-fr", "seed": 42, - "score": 0.4636363636363636 + "score": 0.4409090909090909 }, { "metric": "oosAccuracy", @@ -485,7 +485,7 @@ "metric": "accuracy", "problem": "bpds A imbalanced-fr", "seed": 69, - "score": 0.4863636363636364 + "score": 0.4727272727272727 }, { "metric": "oosAccuracy", @@ -515,31 +515,31 @@ "metric": "accuracy", "problem": "bpds A imbalanced-fr", "seed": 666, - "score": 0.4818181818181818 + "score": 0.4863636363636364 }, { "metric": "oosAccuracy", "problem": "bpds A imbalanced-fr", "seed": 666, - "score": 0.7 + "score": 0.6863636363636364 }, { "metric": "oosPrecision", "problem": "bpds A imbalanced-fr", "seed": 666, - "score": 0.7236842105263158 + "score": 0.691358024691358 }, { "metric": "oosRecall", "problem": "bpds A imbalanced-fr", "seed": 666, - "score": 0.55 + "score": 0.56 }, { "metric": "oosF1", "problem": "bpds A imbalanced-fr", "seed": 666, - "score": 0.6250000000000001 + "score": 0.6187845303867403 }, { "metric": "accuracy", @@ -575,61 +575,61 @@ "metric": "accuracy", "problem": "bpds A fewshot-fr", "seed": 69, - "score": 0.6045454545454545 + "score": 0.5727272727272728 }, { "metric": "oosAccuracy", "problem": "bpds A fewshot-fr", "seed": 69, - "score": 0.759090909090909 + "score": 0.7454545454545455 }, { "metric": "oosPrecision", "problem": "bpds A fewshot-fr", "seed": 69, - "score": 0.7582417582417582 + "score": 0.7391304347826086 }, { "metric": "oosRecall", "problem": "bpds A fewshot-fr", "seed": 69, - "score": 0.69 + "score": 0.68 }, { "metric": "oosF1", "problem": "bpds A fewshot-fr", "seed": 69, - "score": 0.7225130890052356 + "score": 0.7083333333333334 }, { "metric": "accuracy", "problem": "bpds A fewshot-fr", "seed": 666, - "score": 0.5045454545454545 + "score": 0.5181818181818182 }, { "metric": "oosAccuracy", "problem": "bpds A fewshot-fr", "seed": 666, - "score": 0.7136363636363636 + "score": 0.7318181818181818 }, { "metric": "oosPrecision", "problem": "bpds A fewshot-fr", "seed": 666, - "score": 0.7402597402597403 + "score": 0.7662337662337663 }, { "metric": "oosRecall", "problem": "bpds A fewshot-fr", "seed": 666, - "score": 0.57 + "score": 0.59 }, { "metric": "oosF1", "problem": "bpds A fewshot-fr", "seed": 666, - "score": 0.6440677966101694 + "score": 0.6666666666666666 } ] } diff --git a/packages/e2e/current_scores/bpds-slots.json b/packages/nlu-bench/current_scores/bpds-slots.json similarity index 98% rename from packages/e2e/current_scores/bpds-slots.json rename to packages/nlu-bench/current_scores/bpds-slots.json index 7400b3a7..641439c9 100644 --- a/packages/e2e/current_scores/bpds-slots.json +++ b/packages/nlu-bench/current_scores/bpds-slots.json @@ -1,5 +1,5 @@ { - "generatedOn": "2021-10-25T20:48:44.770Z", + "generatedOn": "2022-03-01T21:51:48.094Z", "scores": [ { "metric": "avgScore:slotsAre", diff --git a/packages/e2e/current_scores/bpds-spell.json b/packages/nlu-bench/current_scores/bpds-spell.json similarity index 77% rename from packages/e2e/current_scores/bpds-spell.json rename to packages/nlu-bench/current_scores/bpds-spell.json index 049eaa73..f4daa148 100644 --- a/packages/e2e/current_scores/bpds-spell.json +++ b/packages/nlu-bench/current_scores/bpds-spell.json @@ -1,5 +1,5 @@ { - "generatedOn": "2021-10-25T20:48:49.075Z", + "generatedOn": "2022-03-01T21:51:50.906Z", "scores": [ { "metric": "accuracy", diff --git a/packages/e2e/current_scores/clinc150.json b/packages/nlu-bench/current_scores/clinc150.json similarity index 90% rename from packages/e2e/current_scores/clinc150.json rename to packages/nlu-bench/current_scores/clinc150.json index f7162c17..b4f18082 100644 --- a/packages/e2e/current_scores/clinc150.json +++ b/packages/nlu-bench/current_scores/clinc150.json @@ -1,11 +1,11 @@ { - "generatedOn": "2021-10-25T20:56:35.526Z", + "generatedOn": "2022-03-01T21:55:07.953Z", "scores": [ { "metric": "accuracy", "problem": "clinc150, 20 utt/intent, seed 42", "seed": 42, - "score": 0.6711627906976744 + "score": 0.6713953488372093 }, { "metric": "oosAccuracy", diff --git a/packages/e2e/package.json b/packages/nlu-bench/package.json similarity index 67% rename from packages/e2e/package.json rename to packages/nlu-bench/package.json index fa70d67f..5b031a9e 100644 --- a/packages/e2e/package.json +++ b/packages/nlu-bench/package.json @@ -1,7 +1,7 @@ { - "name": "e2e", + "name": "@botpress/nlu-bench", "version": "0.0.1", - "description": "nlu e2e regression checker", + "description": "nlu benchmarks", "main": "./dist/index.js", "license": "MIT", "dependencies": { @@ -15,12 +15,12 @@ "devDependencies": { "@types/lodash": "^4.14.116", "@types/fs-extra": "^5.0.4", - "@types/node": "^12.13.0", - "typescript": "^3.9.10" + "@types/node": "^16.11.10", + "typescript": "^5.0.4" }, "scripts": { "start": "node ./dist/index.js", "build": "tsc --build", - "test": "echo \"no tests\"" + "clean": "rimraf ./dist && rimraf ./node_modules" } } diff --git a/packages/e2e/src/index.ts b/packages/nlu-bench/src/index.ts similarity index 77% rename from packages/e2e/src/index.ts rename to packages/nlu-bench/src/index.ts index 216a6e9e..c3a63b72 100644 --- a/packages/e2e/src/index.ts +++ b/packages/nlu-bench/src/index.ts @@ -39,16 +39,18 @@ async function runTest(test, { update, keepGoing }) { return true } -interface CommandLineArgs { +type CommandLineArgs = { update: boolean keepGoing: boolean + nluEndpoint: string tests?: string skip?: string } -function getTests(tests: string | undefined, skip: string | undefined) { - const allTests = [bpdsIntents, bpdsSlots, bpdsSpell, clincIntents].map((t) => t(bitfan)) +function getTests(tests: string | undefined, skip: string | undefined, nluServerEndpoint: string) { + const allTests = [bpdsIntents, bpdsSlots, bpdsSpell, clincIntents].map((t) => t(bitfan, { nluServerEndpoint })) if (skip && tests) { + // eslint-disable-next-line no-console console.log(chalk.yellow('Both --skip and --tests flags are set; Ignoring --skip flag.')) } @@ -64,10 +66,11 @@ function getTests(tests: string | undefined, skip: string | undefined) { } async function main(args: CommandLineArgs) { - const { update, skip, keepGoing, tests: testsToRun } = args + const { update, skip, keepGoing, tests: testsToRun, nluEndpoint } = args - const tests = getTests(testsToRun, skip) - console.log(chalk.green(`Running tests [${tests.map(({ name }) => name).join(', ')}]`)) + const tests = getTests(testsToRun, skip, nluEndpoint) + // eslint-disable-next-line no-console + console.log(chalk.green(`Running benchmarks [${tests.map(({ name }) => name).join(', ')}]`)) let testsPass = true for (const test of tests) { @@ -82,15 +85,22 @@ async function main(args: CommandLineArgs) { } if (!testsPass) { + // eslint-disable-next-line no-console console.log(chalk.red('There was a regression in at least one test.')) } } yargs .command( - ['e2e', '$0'], - 'Launch e2e tests on nlu-server', + ['bench', '$0'], + 'Launch benchmarks on nlu-server', { + nluEndpoint: { + type: 'string', + alias: 'e', + required: true, + default: 'http://localhost:3200' + }, update: { alias: 'u', description: 'Whether or not to update latest results', @@ -123,4 +133,9 @@ yargs }) } ) + .command(['list', 'ls'], 'List benchmarks', {}, (argv) => { + const tests = getTests(undefined, undefined, '').map(({ name }) => name) + // eslint-disable-next-line no-console + console.log(tests.join(',\n')) + }) .help().argv diff --git a/packages/e2e/src/score-service.ts b/packages/nlu-bench/src/score-service.ts similarity index 100% rename from packages/e2e/src/score-service.ts rename to packages/nlu-bench/src/score-service.ts diff --git a/packages/nlu-bench/src/tests/bpds-intents.ts b/packages/nlu-bench/src/tests/bpds-intents.ts new file mode 100644 index 00000000..bff729bd --- /dev/null +++ b/packages/nlu-bench/src/tests/bpds-intents.ts @@ -0,0 +1,89 @@ +import bitfan from '@botpress/bitfan' +import { Args } from './typings' + +const problemMaker = (_bitfan: typeof bitfan) => async ( + name: string, + lang: string, + trainSet: string, + testSet: string +): Promise> => { + const fileDef = { + lang, + fileType: <'dataset'>'dataset', + type: <'intent'>'intent', + namespace: 'bpds' + } + const trainFileDef = { name: trainSet, ...fileDef } + const testFileDef = { name: testSet, ...fileDef } + + return { + name, + type: 'intent', + trainSet: await _bitfan.datasets.readDataset(trainFileDef), + testSet: await _bitfan.datasets.readDataset(testFileDef), + lang + } +} + +export default function (_bitfan: typeof bitfan, args: Args) { + const metrics = [ + _bitfan.metrics.accuracy, + _bitfan.metrics.oosAccuracy, + _bitfan.metrics.oosPrecision, + _bitfan.metrics.oosRecall, + _bitfan.metrics.oosF1 + ] + + return { + name: 'bpds-intent', + + computePerformance: async () => { + const makeProblem = problemMaker(_bitfan) + let problems = [ + await makeProblem('bpsd A-en', 'en', 'A-train', 'A-test'), + await makeProblem('bpds A imbalanced-en', 'en', 'A-imbalanced-train', 'A-test'), + await makeProblem('bpds A fewshot-en', 'en', 'A-fewshot-train', 'A-test'), + await makeProblem('bpds B', 'en', 'B-train', 'B-test'), + await makeProblem('bpsd A-fr', 'fr', 'A-train', 'A-test'), + await makeProblem('bpds A imbalanced-fr', 'fr', 'A-imbalanced-train', 'A-test'), + await makeProblem('bpds A fewshot-fr', 'fr', 'A-fewshot-train', 'A-test') + ] + + const usedLang = process.env.BITFAN_LANG + if (usedLang) { + problems = problems.filter((p) => p.lang === usedLang) + } + + const { nluServerEndpoint } = args + const engine = _bitfan.engines.makeBpIntentEngine(nluServerEndpoint) + + const solution: bitfan.Solution<'intent'> = { + name: 'bpds intent', + problems, + engine + } + + const seeds = [42, 69, 666] + const results = await _bitfan.runSolution(solution, seeds) + + const performanceReport = _bitfan.evaluateMetrics(results, metrics) + + await _bitfan.visualisation.showPerformanceReport(performanceReport, { groupBy: 'seed' }) + await _bitfan.visualisation.showPerformanceReport(performanceReport, { groupBy: 'problem' }) + await _bitfan.visualisation.showOOSConfusion(results) + + return performanceReport + }, + + evaluatePerformance: (currentPerformance, previousPerformance) => { + const toleranceByMetric = { + [_bitfan.metrics.accuracy.name]: 0.075, + [_bitfan.metrics.oosAccuracy.name]: 0.075, + [_bitfan.metrics.oosPrecision.name]: 0.075, + [_bitfan.metrics.oosRecall.name]: 0.075, + [_bitfan.metrics.oosF1.name]: 0.15 // more tolerance for f1 score + } + return _bitfan.comparePerformances(currentPerformance, previousPerformance, { toleranceByMetric }) + } + } +} diff --git a/packages/e2e/src/tests/bpds-slots.ts b/packages/nlu-bench/src/tests/bpds-slots.ts similarity index 51% rename from packages/e2e/src/tests/bpds-slots.ts rename to packages/nlu-bench/src/tests/bpds-slots.ts index 0250321e..0159b61c 100644 --- a/packages/e2e/src/tests/bpds-slots.ts +++ b/packages/nlu-bench/src/tests/bpds-slots.ts @@ -1,10 +1,12 @@ +import bitfan from '@botpress/bitfan' import Bluebird from 'bluebird' +import { Args } from './typings' -const problemMaker = (bitfan) => async (topic) => { +const problemMaker = (_bitfan: typeof bitfan) => async (topic: string): Promise> => { const fileDef = { lang: 'en', - fileType: 'dataset', - type: 'slot', + fileType: <'dataset'>'dataset', + type: <'slot'>'slot', namespace: 'bpds' } @@ -14,16 +16,16 @@ const problemMaker = (bitfan) => async (topic) => { return { name: `bpds slot ${topic}`, type: 'slot', - trainSet: await bitfan.datasets.readDataset(trainFileDef), - testSet: await bitfan.datasets.readDataset(testFileDef), + trainSet: await _bitfan.datasets.readDataset(trainFileDef), + testSet: await _bitfan.datasets.readDataset(testFileDef), lang: 'en' } } -export default function (bitfan) { - const avgStrictSlotAccuray = bitfan.metrics.averageScore(bitfan.criterias.slotsAre) - const avgLooseSlotAccuray = bitfan.metrics.averageScore(bitfan.criterias.slotIncludes) - const avgSlotCountAccuray = bitfan.metrics.averageScore(bitfan.criterias.slotCountIs) +export default function (_bitfan: typeof bitfan, args: Args) { + const avgStrictSlotAccuray = _bitfan.metrics.averageScore(_bitfan.criterias.slotsAre) + const avgLooseSlotAccuray = _bitfan.metrics.averageScore(_bitfan.criterias.slotIncludes) + const avgSlotCountAccuray = _bitfan.metrics.averageScore(_bitfan.criterias.slotCountIs) const metrics = [avgStrictSlotAccuray, avgLooseSlotAccuray, avgSlotCountAccuray] @@ -43,12 +45,11 @@ export default function (bitfan) { 'I' ] - const makeProblem = problemMaker(bitfan) + const makeProblem = problemMaker(_bitfan) const problems = await Bluebird.map(allTopics, makeProblem) - const nluServerEndpoint = process.env.NLU_SERVER_ENDPOINT ?? 'http://localhost:3200' - const password = '123456' - const engine = bitfan.engines.makeBpSlotEngine(nluServerEndpoint, password) + const { nluServerEndpoint } = args + const engine = _bitfan.engines.makeBpSlotEngine(nluServerEndpoint) const solution = { name: 'bpds slot', @@ -57,10 +58,10 @@ export default function (bitfan) { } const seeds = [42] - const results = await bitfan.runSolution(solution, seeds) + const results = await _bitfan.runSolution(solution, seeds) - const report = bitfan.evaluateMetrics(results, metrics) - bitfan.visualisation.showPerformanceReport(report) + const report = _bitfan.evaluateMetrics(results, metrics) + _bitfan.visualisation.showPerformanceReport(report) // bitfan.visualisation.showSlotsResults(results); return report @@ -72,7 +73,7 @@ export default function (bitfan) { [avgLooseSlotAccuray.name]: 0.02, [avgSlotCountAccuray.name]: 0.02 } - return bitfan.comparePerformances(currentPerformance, previousPerformance, { toleranceByMetric }) + return _bitfan.comparePerformances(currentPerformance, previousPerformance, { toleranceByMetric }) } } } diff --git a/packages/e2e/src/tests/bpds-spell.ts b/packages/nlu-bench/src/tests/bpds-spell.ts similarity index 61% rename from packages/e2e/src/tests/bpds-spell.ts rename to packages/nlu-bench/src/tests/bpds-spell.ts index 3582c1aa..507a98a7 100644 --- a/packages/e2e/src/tests/bpds-spell.ts +++ b/packages/nlu-bench/src/tests/bpds-spell.ts @@ -1,6 +1,7 @@ -import _bitfan, { DataSetDef, DocumentDef, UnsupervisedProblem, Result } from '@botpress/bitfan' +import bitfan, { DataSetDef, DocumentDef, UnsupervisedProblem, Result } from '@botpress/bitfan' import chalk from 'chalk' import yn from 'yn' +import { Args } from './typings' const orange = chalk.rgb(255, 150, 50) @@ -12,19 +13,20 @@ const debugResults = (results: Result<'spell'>[]) => { const formatted = `${i++}. [${success}] ${orange(r.text)} -> ${chalk.yellowBright(elected)} | ${chalk.blueBright( r.label )}` + // eslint-disable-next-line no-console console.log(formatted) } } -export default function (bitfan: typeof _bitfan) { - const metrics = [bitfan.metrics.accuracy] +export default function (_bitfan: typeof bitfan, args: Args) { + const metrics = [_bitfan.metrics.accuracy] return { name: 'bpds-spell', computePerformance: async () => { - const nluServerEndpoint = process.env.NLU_SERVER_ENDPOINT ?? 'http://localhost:3200' - const engine = bitfan.engines.makeBpSpellEngine(nluServerEndpoint) + const { nluServerEndpoint } = args + const engine = _bitfan.engines.makeBpSpellEngine(nluServerEndpoint) const trainFileDef: DocumentDef = { name: 'A-train', @@ -45,12 +47,12 @@ export default function (bitfan: typeof _bitfan) { const problem: UnsupervisedProblem<'spell'> = { name: 'bpds A spelling', type: 'spell', - corpus: [await bitfan.datasets.readDocument(trainFileDef)], - testSet: await bitfan.datasets.readDataset(testFileDef), + corpus: [await _bitfan.datasets.readDocument(trainFileDef)], + testSet: await _bitfan.datasets.readDataset(testFileDef), lang: 'en' } - const results = await bitfan.runSolution( + const results = await _bitfan.runSolution( { name: 'bpds spelling', problems: [problem], @@ -59,8 +61,8 @@ export default function (bitfan: typeof _bitfan) { [42] ) - const performanceReport = bitfan.evaluateMetrics(results, metrics) - bitfan.visualisation.showPerformanceReport(performanceReport) + const performanceReport = _bitfan.evaluateMetrics(results, metrics) + _bitfan.visualisation.showPerformanceReport(performanceReport) yn(process.env.DEBUG_RESULTS) && debugResults(results) @@ -69,9 +71,9 @@ export default function (bitfan: typeof _bitfan) { evaluatePerformance: (currentPerformance, previousPerformance) => { const toleranceByMetric = { - [bitfan.metrics.accuracy.name]: 0.02 + [_bitfan.metrics.accuracy.name]: 0.02 } - return bitfan.comparePerformances(currentPerformance, previousPerformance, { toleranceByMetric }) + return _bitfan.comparePerformances(currentPerformance, previousPerformance, { toleranceByMetric }) } } } diff --git a/packages/nlu-bench/src/tests/clinc-intents.ts b/packages/nlu-bench/src/tests/clinc-intents.ts new file mode 100644 index 00000000..6d7230bc --- /dev/null +++ b/packages/nlu-bench/src/tests/clinc-intents.ts @@ -0,0 +1,70 @@ +import bitfan from '@botpress/bitfan' +import { Args } from './typings' + +const problemMaker = (_bitfan: typeof bitfan) => async (name: string, trainSet: string, testSet: string) => { + const fileDef = { + lang: 'en', + fileType: <'dataset'>'dataset', + type: <'intent'>'intent', + namespace: '' + } + const trainFileDef = { name: trainSet, ...fileDef } + const testFileDef = { name: testSet, ...fileDef } + + return { + name, + type: <'intent'>'intent', + trainSet: await _bitfan.datasets.readDataset(trainFileDef), + testSet: await _bitfan.datasets.readDataset(testFileDef), + lang: 'en' + } +} + +export default function (_bitfan: typeof bitfan, args: Args) { + const metrics = [ + _bitfan.metrics.accuracy, + _bitfan.metrics.oosAccuracy, + _bitfan.metrics.oosPrecision, + _bitfan.metrics.oosRecall, + _bitfan.metrics.oosF1 + ] + + return { + name: 'clinc150', + + computePerformance: async () => { + const { nluServerEndpoint } = args + const engine = _bitfan.engines.makeBpIntentEngine(nluServerEndpoint) + + const makeProblem = problemMaker(_bitfan) + + const results = await _bitfan.runSolution( + { + name: 'bpds intent', + problems: [ + await makeProblem('clinc150, 20 utt/intent, seed 42', 'clinc150_20_42-train', 'clinc150_100-test') + ], + engine + }, + [42] + ) + + const performanceReport = _bitfan.evaluateMetrics(results, metrics) + await _bitfan.visualisation.showPerformanceReport(performanceReport, { groupBy: 'problem' }) + await _bitfan.visualisation.showOOSConfusion(results) + + return performanceReport + }, + + evaluatePerformance: (currentPerformance, previousPerformance) => { + const toleranceByMetric = { + [_bitfan.metrics.accuracy.name]: 0.05, + [_bitfan.metrics.oosAccuracy.name]: 0.05, + [_bitfan.metrics.oosPrecision.name]: 0.1, + [_bitfan.metrics.oosRecall.name]: 0.1, + [_bitfan.metrics.oosF1.name]: 0.15 // more tolerance for f1 score + } + return _bitfan.comparePerformances(currentPerformance, previousPerformance, { toleranceByMetric }) + } + } +} diff --git a/packages/nlu-bench/src/tests/typings.ts b/packages/nlu-bench/src/tests/typings.ts new file mode 100644 index 00000000..7f7648c8 --- /dev/null +++ b/packages/nlu-bench/src/tests/typings.ts @@ -0,0 +1,3 @@ +export type Args = { + nluServerEndpoint: string +} diff --git a/packages/e2e/tsconfig.json b/packages/nlu-bench/tsconfig.json similarity index 100% rename from packages/e2e/tsconfig.json rename to packages/nlu-bench/tsconfig.json diff --git a/packages/nlu-cli/package.json b/packages/nlu-bin/package.json similarity index 59% rename from packages/nlu-cli/package.json rename to packages/nlu-bin/package.json index 9eda6edb..d25221eb 100644 --- a/packages/nlu-cli/package.json +++ b/packages/nlu-bin/package.json @@ -1,22 +1,24 @@ { - "name": "@botpress/nlu-cli", + "name": "@botpress/nlu-bin", "version": "1.0.2", - "description": "CLI/entry-point to both nlu server or language-server", + "description": "entry-point to both nlu server and language-server", "main": "./dist/index.js", "author": "Botpress, Inc.", "license": "AGPL-3.0", "bin": "./dist/index.js", "dependencies": { "@botpress/lang-server": "*", - "@botpress/logger": "*", "@botpress/nlu-server": "*", - "wtfnode": "^0.9.1", - "yargs": "^17.2.1" + "@bpinternal/log4bot": "^0.0.4", + "@bpinternal/yargs-extra": "^0.0.2", + "decamelize": "5.0.1", + "json-schema": "^0.4.0", + "wtfnode": "^0.9.1" }, "devDependencies": { - "@types/node": "^12.13.0", + "@types/json-schema": "^7.0.9", + "@types/node": "^16.11.10", "@types/wtfnode": "^0.7.0", - "@types/yargs": "^17.0.4", "@typescript-eslint/eslint-plugin": "^4.22.0", "@typescript-eslint/parser": "^4.22.0", "cross-env": "^5.2.0", @@ -27,12 +29,14 @@ "jest": "^24.9.0", "pkg": "^4.3.7", "prettier": "^2.2.1", - "ts-jest": "^26.5.5", - "typescript": "^3.9.10" + "ts-node-dev": "^1.1.6", + "typescript": "^5.0.4", + "yn": "^4.0.0" }, "scripts": { "build": "tsc --build", "start": "cross-env node ./dist/index.js", - "test": "echo \"no tests\"" + "test": "jest --roots ./dist", + "clean": "rimraf ./dist && rimraf ./node_modules" } } diff --git a/packages/nlu-bin/readme.md b/packages/nlu-bin/readme.md new file mode 100644 index 00000000..12735bf0 --- /dev/null +++ b/packages/nlu-bin/readme.md @@ -0,0 +1,25 @@ +# NLU CLI + +## Description + +Small CLI that serves as an entry point for both the nlu and language server. + +## Available commands and subcommands + +```sh +your/nlu/exe +│ +├── nlu (default) +│ ├── start (default) # launch the nlu server +│ ├── init # create empty configuration file for nlu-server +│ └── --version # display nlu-server version +└── lang + ├── start (default) # launch the lang server + ├── download # download languange models + ├── init # create empty configuration file for lang-server + └── --version # display lang-server version +``` + +## Licensing + +This software is protected by the same license as the [main Botpress repository](https://github.com/botpress/botpress). You can find the license file [here](https://github.com/botpress/botpress/blob/master/LICENSE). diff --git a/packages/nlu-bin/src/app-data.ts b/packages/nlu-bin/src/app-data.ts new file mode 100644 index 00000000..775ef4d8 --- /dev/null +++ b/packages/nlu-bin/src/app-data.ts @@ -0,0 +1,16 @@ +import path from 'path' + +export function getAppDataPath() { + const homeDir = process.env.APP_DATA_PATH || process.env.HOME || process.env.APPDATA + if (homeDir) { + if (process.platform === 'darwin') { + return path.join(homeDir, 'Library', 'Application Support', 'botpress') + } + + return path.join(homeDir, 'botpress') + } + + const errorMsg = `Could not determine your HOME directory. +Please set the environment variable "APP_DATA_PATH", then start Botpress` + throw new Error(errorMsg) +} diff --git a/packages/nlu-bin/src/config-file/index.ts b/packages/nlu-bin/src/config-file/index.ts new file mode 100644 index 00000000..d434d411 --- /dev/null +++ b/packages/nlu-bin/src/config-file/index.ts @@ -0,0 +1,46 @@ +import { generateSchema, YargsSchema, YargsArgv } from '@bpinternal/yargs-extra' +import fse from 'fs-extra' +import { validate } from 'json-schema' +import os from 'os' + +import { toUnix } from './to-unix' + +type WriteConfigFileProps = { + schemaLocation: string + fileLocation: string + yargSchema: S + force?: boolean +} + +type ReadConfigFileProps = { + fileLocation: string + yargSchema: S +} + +export const writeConfigFile = async (props: WriteConfigFileProps): Promise => { + const { yargSchema, schemaLocation, fileLocation, force } = props + const schema = generateSchema(yargSchema) + + const $schema = os.platform() !== 'win32' ? schemaLocation : toUnix(schemaLocation) + const jsonConfig = { $schema } + await fse.writeFile(schemaLocation, JSON.stringify(schema, null, 2)) + + if (!force && fse.existsSync(fileLocation)) { + throw new Error(`File ${fileLocation} already exists.`) + } + await fse.writeFile(fileLocation, JSON.stringify(jsonConfig, null, 2)) +} + +export const readConfigFile = async (props: ReadConfigFileProps): Promise> => { + const { fileLocation, yargSchema } = props + const configFileContent = await fse.readFile(fileLocation, 'utf8') + const { $schema, ...parsedConfigFile } = JSON.parse(configFileContent) + const schema = generateSchema(yargSchema) + const validationResult = validate(parsedConfigFile, schema) + const { valid, errors } = validationResult + if (!valid) { + const errorMsg = errors.map((err) => `${err.property} ${err.message}`).join('\n') + throw new Error(errorMsg) + } + return parsedConfigFile +} diff --git a/packages/nlu-bin/src/config-file/to-unix.test.ts b/packages/nlu-bin/src/config-file/to-unix.test.ts new file mode 100644 index 00000000..7a5e7ef6 --- /dev/null +++ b/packages/nlu-bin/src/config-file/to-unix.test.ts @@ -0,0 +1,12 @@ +import { toUnix } from './to-unix' + +test.each([ + ['C:\\my\\path', '/my/path'], + ['\\my\\path', '/my/path'], + ['/my/path', '/my/path'], + ['C:\\', '/'], + ['/', '/'] +])('calling toUnix("%s") should return "%s"', (x, expected) => { + const actual = toUnix(x) + expect(actual).toBe(expected) +}) diff --git a/packages/nlu-bin/src/config-file/to-unix.ts b/packages/nlu-bin/src/config-file/to-unix.ts new file mode 100644 index 00000000..834932b0 --- /dev/null +++ b/packages/nlu-bin/src/config-file/to-unix.ts @@ -0,0 +1,11 @@ +import path from 'path' + +const isDrive = (drive: string) => /^[A-Z]:$/.test(drive) + +export const toUnix = (filePath: string) => { + const parts = filePath.split(path.win32.sep) + if (isDrive(parts[0])) { + parts[0] = '' + } + return parts.join(path.posix.sep) +} diff --git a/packages/nlu-bin/src/index.ts b/packages/nlu-bin/src/index.ts new file mode 100644 index 00000000..e9186a8a --- /dev/null +++ b/packages/nlu-bin/src/index.ts @@ -0,0 +1,166 @@ +import './rewire' +import { run as runLanguageServer, download as downloadLang, version as langServerVersion } from '@botpress/lang-server' +import { run as runNLUServer, version as nluServerVersion } from '@botpress/nlu-server' +import { Logger } from '@bpinternal/log4bot' +import yargs, { parseEnv } from '@bpinternal/yargs-extra' +import path from 'path' +import { getAppDataPath } from './app-data' +import { writeConfigFile, readConfigFile } from './config-file' +import { nluServerParameters, langServerParameters, langDownloadParameters } from './parameters' + +void yargs + .version(false) + .command(['nlu', '$0'], 'Launch a local standalone nlu server', (yargs) => { + const nluLogger = new Logger('', { prefix: 'NLU' }) + return yargs + .command( + ['start', '$0'], + 'Launch a local standalone nlu server', + { + version: { + description: "Prints the NLU Server's version", + type: 'boolean', + default: false + }, + config: { + description: 'Path to your config file. If defined, rest of the CLI arguments are ignored.', + type: 'string', + alias: 'c' + }, + ...nluServerParameters + }, + async (argv) => { + if (argv.version) { + nluLogger.sub('Version').info(nluServerVersion) + return + } + if (argv.config) { + const fileArgs = await readConfigFile({ + fileLocation: argv.config, + yargSchema: nluServerParameters + }) + argv = { ...fileArgs, ...argv } + } + + argv = { ...parseEnv(nluServerParameters), ...argv } + void runNLUServer(argv).catch((err) => { + nluLogger.sub('Exit').attachError(err).critical('NLU Server exits after an error occured.') + process.exit(1) + }) + } + ) + .command( + 'init', + 'create configuration file in current working directory', + { + config: { + alias: 'c', + description: 'Path to where you want your config file to be created.', + type: 'string' + }, + force: { + alias: 'f', + description: 'Weither or not to override current file.', + type: 'boolean' + } + }, + (argv) => { + const { force, config } = argv + + const defaultFileLocation = path.join(process.cwd(), 'nlu.config.json') + const fileLocation = config || defaultFileLocation + + const cachePath = getAppDataPath() + void writeConfigFile({ + fileLocation, + schemaLocation: path.join(cachePath, 'nlu.config.schema.json'), + yargSchema: nluServerParameters, + force + }).catch((err) => { + nluLogger.sub('Exit').attachError(err).critical('Could not initialize configuration file.') + process.exit(1) + }) + } + ) + }) + .command('lang', 'Launch a local language server', (yargs) => { + const langLogger = new Logger('', { prefix: 'LANG' }) + return yargs + .command( + ['start', '$0'], + 'Launch a local language server', + { + version: { + description: "Prints the Lang Server's version", + type: 'boolean', + default: false + }, + config: { + description: 'Path to your config file. If defined, rest of the CLI arguments are ignored.', + type: 'string', + alias: 'c' + }, + ...langServerParameters + }, + async (argv) => { + if (argv.version) { + langLogger.sub('Version').info(langServerVersion) + return + } + if (argv.config) { + const fileArgs = await readConfigFile({ + fileLocation: argv.config, + yargSchema: langServerParameters + }) + argv = { ...fileArgs, ...argv } + } + + argv = { ...parseEnv(langServerParameters), ...argv } + void runLanguageServer(argv).catch((err) => { + langLogger.sub('Exit').attachError(err).critical('Language Server exits after an error occured.') + process.exit(1) + }) + } + ) + .command( + 'init', + 'create configuration file in current working directory', + { + config: { + alias: 'c', + description: 'Path to where you want your config file to be created.', + type: 'string' + }, + force: { + alias: 'f', + description: 'Weither or not to override current file.', + type: 'boolean' + } + }, + (argv) => { + const { force, config } = argv + + const defaultFileLocation = path.join(process.cwd(), 'lang.config.json') + const fileLocation = config || defaultFileLocation + + const cachePath = getAppDataPath() + void writeConfigFile({ + fileLocation, + schemaLocation: path.join(cachePath, 'lang.config.schema.json'), + yargSchema: langServerParameters, + force + }).catch((err) => { + langLogger.sub('Exit').attachError(err).critical('Could not initialize configuration file.') + process.exit(1) + }) + } + ) + .command('download', 'Download a language model for lang and dim', langDownloadParameters, (argv) => { + argv = { ...parseEnv(langDownloadParameters), ...argv } + void downloadLang(argv).catch((err) => { + langLogger.sub('Exit').attachError(err).critical('Language Server exits after an error occured.') + process.exit(1) + }) + }) + }) + .help().argv diff --git a/packages/nlu-bin/src/parameters/index.ts b/packages/nlu-bin/src/parameters/index.ts new file mode 100644 index 00000000..73751a5f --- /dev/null +++ b/packages/nlu-bin/src/parameters/index.ts @@ -0,0 +1,3 @@ +export { parameters as nluServerParameters } from './nlu-server' +export { parameters as langServerParameters } from './lang-server' +export { parameters as langDownloadParameters } from './lang-download' diff --git a/packages/nlu-bin/src/parameters/lang-download.ts b/packages/nlu-bin/src/parameters/lang-download.ts new file mode 100644 index 00000000..07177827 --- /dev/null +++ b/packages/nlu-bin/src/parameters/lang-download.ts @@ -0,0 +1,26 @@ +import { asYargs } from '@bpinternal/yargs-extra' + +export const parameters = asYargs({ + langDir: { + description: 'Directory where language embeddings will be saved', + type: 'string' + }, + metadataLocation: { + description: 'URL of metadata file which lists available languages', + type: 'string' + }, + dim: { + description: 'Number of language dimensions provided (25, 100 or 300 at the moment)', + type: 'number' + }, + domain: { + description: 'Name of the domain where those embeddings were trained on.', + type: 'string' + }, + lang: { + alias: 'l', + description: 'Language Code to download model from', + type: 'string', + demandOption: true + } +}) diff --git a/packages/nlu-bin/src/parameters/lang-server.ts b/packages/nlu-bin/src/parameters/lang-server.ts new file mode 100644 index 00000000..b3c85791 --- /dev/null +++ b/packages/nlu-bin/src/parameters/lang-server.ts @@ -0,0 +1,73 @@ +import { LogFormat } from '@botpress/lang-server' +import { LogLevel } from '@bpinternal/log4bot' +import { asYargs } from '@bpinternal/yargs-extra' + +const logFormatChoices: LogFormat[] = ['json', 'text'] +const logLevelChoices: LogLevel[] = ['critical', 'error', 'warning', 'info', 'debug'] + +export const parameters = asYargs({ + port: { + description: 'The port to listen to', + type: 'number' + }, + host: { + description: 'Binds the language server to a specific hostname', + type: 'string' + }, + reverseProxy: { + description: 'Allows trusting the specified reverse proxy', + type: 'string' + }, + langDir: { + description: 'Directory where language embeddings will be saved', + type: 'string' + }, + authToken: { + description: 'When enabled, this token is required for clients to query your language server', + type: 'string' + }, + adminToken: { + description: 'This token is required to access the server as admin and manage language.', + type: 'string' + }, + limit: { + description: 'Maximum number of requests per IP per "limitWindow" interval (0 means unlimited)', + type: 'number' + }, + limitWindow: { + description: 'Time window on which the limit is applied (use standard notation, ex: 25m or 1h)', + type: 'string' + }, + metadataLocation: { + description: 'URL of metadata file which lists available languages', + type: 'string' + }, + offline: { + description: 'Whether or not the language server has internet access', + type: 'boolean' + }, + dim: { + description: 'Number of language dimensions provided (25, 100 or 300 at the moment)', + type: 'number' + }, + domain: { + description: 'Name of the domain where those embeddings were trained on.', + type: 'string' + }, + verbose: { + description: 'Verbosity level of the logging, integer from 0 to 4. Does not apply to "Launcher" logger.', + type: 'number' + }, + logLevel: { + description: 'Verbosity level of the logging. Does not apply to booting logs.', + choices: logLevelChoices + }, + logFormat: { + description: 'Weither to log using JSON or good old fashion formatted text with colors.', + choices: logFormatChoices + }, + debugFilter: { + description: 'Regexp to filter debug logs by namespace. Only applies if log level is "debug".', + type: 'string' + } +}) diff --git a/packages/nlu-bin/src/parameters/nlu-server.ts b/packages/nlu-bin/src/parameters/nlu-server.ts new file mode 100644 index 00000000..1102537b --- /dev/null +++ b/packages/nlu-bin/src/parameters/nlu-server.ts @@ -0,0 +1,114 @@ +import { LogFormat } from '@botpress/nlu-server' +import { LogLevel } from '@bpinternal/log4bot' +import { asYargs } from '@bpinternal/yargs-extra' + +const logFormatChoices: LogFormat[] = ['json', 'text'] +const logLevelChoices: LogLevel[] = ['critical', 'error', 'warning', 'info', 'debug'] + +export const parameters = asYargs({ + port: { + description: 'The port to listen to', + type: 'number' + }, + host: { + description: 'Binds the nlu server to a specific hostname', + type: 'string' + }, + reverseProxy: { + description: 'Allows trusting the specified reverse proxy', + type: 'string' + }, + dbURL: { + description: 'URL of database where to persist models. If undefined, models are stored on FS.', + type: 'string' + }, + modelDir: { + description: 'Directory where to persist models, ignored if dbURL is set.', + type: 'string' + }, + limit: { + description: 'Maximum number of requests per IP per "limitWindow" interval (0 means unlimited)', + type: 'number' + }, + limitWindow: { + description: 'Time window on which the limit is applied (use standard notation, ex: 25m or 1h)', + type: 'string' + }, + languageURL: { + description: 'URL of your language server', + type: 'string' + }, + languageAuthToken: { + description: 'Authentication token for your language server', + type: 'string' + }, + tracingEnabled: { + description: 'When enabled, a tracing client is configured using opentelemetry', + type: 'boolean' + }, + prometheusEnabled: { + description: 'When enabled, a prometheus endpoint will be avaiable at /metrics', + type: 'boolean' + }, + apmEnabled: { + description: + 'When enabled, Sentry is added to the express server allowing the use of the environment variables SENTRY_DSN, SENTRY_ENVIRONMENT, SENTRY_RELEASE', + type: 'boolean' + }, + apmSampleRate: { + description: 'If apm is configured, this option sets the sample rate of traces', + type: 'number' + }, + ducklingURL: { + description: 'URL of your Duckling server; Only relevant if "ducklingEnabled" is true', + type: 'string' + }, + ducklingEnabled: { + description: 'Whether or not to enable Duckling', + type: 'boolean' + }, + bodySize: { + description: 'Allowed size of HTTP requests body', + type: 'string' + }, + batchSize: { + description: 'Allowed number of text inputs in one call to POST /predict', + type: 'number' + }, + modelCacheSize: { + description: 'Max allocated memory for model cache. Too few memory will result in more access to file system.', + type: 'string' + }, + doc: { + description: 'Whether or not to display documentation on start', + type: 'boolean' + }, + logLevel: { + description: 'Verbosity level of the logging. Does not apply to booting logs.', + choices: logLevelChoices + }, + logFormat: { + description: 'Weither to log using JSON or good old fashion formatted text with colors.', + choices: logFormatChoices + }, + debugFilter: { + description: 'Regexp to filter debug logs by namespace. Only applies if log level is "debug".', + type: 'string' + }, + maxTraining: { + description: 'The max allowed amount of simultaneous trainings on a single instance', + type: 'number' + }, + maxLinting: { + description: 'The max allowed amount of simultaneous lintings on a single instance', + type: 'number' + }, + usageURL: { + description: 'Endpoint to send usage info to.', + type: 'string' + }, + modelTransferEnabled: { + description: 'Weither or not to allow model weights download / upload', + type: 'boolean' + } +}) diff --git a/packages/nlu-cli/src/rewire.ts b/packages/nlu-bin/src/rewire.ts similarity index 80% rename from packages/nlu-cli/src/rewire.ts rename to packages/nlu-bin/src/rewire.ts index dfb3cab8..89faaeb6 100644 --- a/packages/nlu-cli/src/rewire.ts +++ b/packages/nlu-bin/src/rewire.ts @@ -4,12 +4,11 @@ import Module from 'module' if (process.env.TS_NODE_DEV) { const originalRequire = Module.prototype.require - const rewire = function (this: NodeRequireFunction, mod: string) { + const rewire: NodeRequire = function (this: NodeRequire, mod: string) { if (mod.startsWith('@botpress')) { return originalRequire.apply(this, [mod + '/src/index.ts']) } return originalRequire.apply(this, [mod]) - } - + } as NodeRequire Module.prototype.require = rewire } diff --git a/packages/nlu-cli/tsconfig.json b/packages/nlu-bin/tsconfig.json similarity index 83% rename from packages/nlu-cli/tsconfig.json rename to packages/nlu-bin/tsconfig.json index 4045bea9..e3b71efa 100644 --- a/packages/nlu-cli/tsconfig.json +++ b/packages/nlu-bin/tsconfig.json @@ -1,6 +1,13 @@ { "extends": "../../tsconfig.packages.json", - "references": [{ "path": "../logger" }, { "path": "../nlu-server" }, { "path": "../lang-server" }], + "references": [ + { + "path": "../nlu-server" + }, + { + "path": "../lang-server" + } + ], "compilerOptions": { "outDir": "./dist" /* Redirect output structure to the directory. */, "rootDir": "./src" /* Specify the root directory of input files. Use to control the output directory structure with --outDir. */, diff --git a/packages/nlu-cli/readme.md b/packages/nlu-cli/readme.md deleted file mode 100644 index 00e21fb7..00000000 --- a/packages/nlu-cli/readme.md +++ /dev/null @@ -1,117 +0,0 @@ -# NLU CLI - -## Description - -Small CLI that serves as an entry point for both the nlu and language server. - -## Available commands - -### nlu - -``` -Launch a local stand-alone nlu server - -Options: - --version Show version number [boolean] - --help Show help [boolean] - --config, -c Path to your config file. If defined, rest of the CLI - arguments are ignored. [string] - --port The port to listen to [default: 3200] - --host Binds the nlu server to a specific hostname - [default: "localhost"] - --dbURL URL of database where to persist models. If undefined, - models are stored on FS. [string] - --modelDir Directory where to persist models, ignored if dbURL is - set. - --limit Maximum number of requests per IP per "limitWindow" - interval (0 means unlimited) [default: 0] - --limitWindow Time window on which the limit is applied (use standard - notation, ex: 25m or 1h) [default: "1h"] - --languageURL URL of your language server - [default: "https://lang-01.botpress.io"] - --languageAuthToken Authentication token for your language server [string] - --apmEnabled When enabled, Sentry is added to the express server - allowing the use of the environment variables SENTRY_DSN, - SENTRY_ENVIRONMENT, SENTRY_RELEASE - [boolean] [default: null] - --apmSampleRate If apm is configured, this option sets the sample rate of - traces [number] [default: 1] - --ducklingURL URL of your Duckling server; Only relevant if - "ducklingEnabled" is true - [default: "https://duckling.botpress.io"] - --ducklingEnabled Whether or not to enable Duckling - [boolean] [default: true] - --bodySize Allowed size of HTTP requests body [default: "250kb"] - --batchSize Allowed number of text inputs in one call to POST - /predict [default: -1] - --modelCacheSize Max allocated memory for model cache. Too few memory will - result in more access to file system. [default: "850mb"] - --verbose Verbosity level of the logging, integer from 0 to 4. Does - not apply to "Launcher" logger. [default: 3] - --doc Whether or not to display documentation on start - [boolean] [default: true] - --logFilter Filter logs by namespace, ex: "--log-filter training:svm - api". Namespaces are space separated. Does not apply to - "Launcher" logger. [array] - --maxTraining The max allowed amount of simultaneous trainings on a - single instance [number] [default: 2] -``` - -### lang - -``` -Launch a local language server - -Options: - --version Show version number [boolean] - --help Show help [boolean] - --port The port to listen to [default: 3100] - --host Binds the language server to a specific hostname - [default: "localhost"] - --langDir Directory where language embeddings will be saved [string] - --authToken When enabled, this token is required for clients to query - your language server [string] - --adminToken This token is required to access the server as admin and - manage language. [string] - --limit Maximum number of requests per IP per "limitWindow" - interval (0 means unlimited) [default: 0] - --limitWindow Time window on which the limit is applied (use standard - notation, ex: 25m or 1h) [default: "1h"] - --metadataLocation URL of metadata file which lists available languages - [default: - "https://nyc3.digitaloceanspaces.com/botpress-public/embeddings/index.json"] - --offline Whether or not the language server has internet access - [boolean] [default: false] - --dim Number of language dimensions provided (25, 100 or 300 at - the moment) [default: 100] - --domain Name of the domain where those embeddings were trained on. - [default: "bp"] - --verbose Verbosity level of the logging, integer from 0 to 4. Does - not apply to "Launcher" logger. [default: 3] - --logFilter Filter logs by namespace, ex: "--log-filter training:svm - api". Namespaces are space separated. Does not apply to - "Launcher" logger. [array] -``` - -### download - -``` -Download a language model for lang and dim - -Options: - --version Show version number [boolean] - --help Show help [boolean] - --langDir Directory where language embeddings will be saved [string] - --metadataLocation URL of metadata file which lists available languages - [default: - "https://nyc3.digitaloceanspaces.com/botpress-public/embeddings/index.json"] - --dim Number of language dimensions provided (25, 100 or 300 at - the moment) [default: 100] - --domain Name of the domain where those embeddings were trained on. - [default: "bp"] - --lang, -l Language Code to download model from [string] [required] -``` - -## Licensing - -This software is protected by the same license as the [main Botpress repository](https://github.com/botpress/botpress). You can find the license file [here](https://github.com/botpress/botpress/blob/master/LICENSE). diff --git a/packages/nlu-cli/src/index.ts b/packages/nlu-cli/src/index.ts deleted file mode 100644 index 4a9cc2c5..00000000 --- a/packages/nlu-cli/src/index.ts +++ /dev/null @@ -1,230 +0,0 @@ -import './rewire' -import { run as runLanguageServer, download as downloadLang, version as langServerVersion } from '@botpress/lang-server' -import { makeLogger, LoggerLevel } from '@botpress/logger' -import { run as runNLUServer, version as nluServerVersion } from '@botpress/nlu-server' -import yargs from 'yargs' -import yn from 'yn' - -void yargs - .version(false) - .command( - ['nlu', '$0'], - 'Launch a local standalone nlu server', - { - version: { - description: "Prints the NLU Server's version", - type: 'boolean', - default: false - }, - config: { - description: 'Path to your config file. If defined, rest of the CLI arguments are ignored.', - type: 'string', - alias: 'c' - }, - port: { - description: 'The port to listen to', - default: 3200 - }, - host: { - description: 'Binds the nlu server to a specific hostname', - default: 'localhost' - }, - dbURL: { - description: 'URL of database where to persist models. If undefined, models are stored on FS.', - type: 'string' - }, - modelDir: { - description: 'Directory where to persist models, ignored if dbURL is set.', - type: 'string' - }, - limit: { - description: 'Maximum number of requests per IP per "limitWindow" interval (0 means unlimited)', - default: 0 - }, - limitWindow: { - description: 'Time window on which the limit is applied (use standard notation, ex: 25m or 1h)', - default: '1h' - }, - languageURL: { - description: 'URL of your language server', - default: 'https://lang-01.botpress.io' - }, - languageAuthToken: { - description: 'Authentication token for your language server', - type: 'string' - }, - apmEnabled: { - description: - 'When enabled, Sentry is added to the express server allowing the use of the environment variables SENTRY_DSN, SENTRY_ENVIRONMENT, SENTRY_RELEASE', - default: yn(process.env.APM_ENABLED), - type: 'boolean' - }, - apmSampleRate: { - description: 'If apm is configured, this option sets the sample rate of traces', - default: 1.0, - type: 'number' - }, - ducklingURL: { - description: 'URL of your Duckling server; Only relevant if "ducklingEnabled" is true', - default: 'https://duckling.botpress.io' - }, - ducklingEnabled: { - description: 'Whether or not to enable Duckling', - default: true, - type: 'boolean' - }, - bodySize: { - description: 'Allowed size of HTTP requests body', - default: '250kb' - }, - batchSize: { - description: 'Allowed number of text inputs in one call to POST /predict', - default: -1 - }, - modelCacheSize: { - description: 'Max allocated memory for model cache. Too few memory will result in more access to file system.', - default: '850mb' - }, - verbose: { - description: 'Verbosity level of the logging, integer from 0 to 4. Does not apply to "Launcher" logger.', - default: LoggerLevel.Info - }, - doc: { - description: 'Whether or not to display documentation on start', - default: true, - type: 'boolean' - }, - logFilter: { - description: - 'Filter logs by namespace, ex: "--log-filter training:svm api". Namespaces are space separated. Does not apply to "Launcher" logger.', - array: true, - type: 'string' - }, - maxTraining: { - description: 'The max allowed amount of simultaneous trainings on a single instance', - default: 2, - type: 'number' - } - }, - (argv) => { - const baseLogger = makeLogger() - if (argv.version) { - baseLogger.sub('Version').info(nluServerVersion) - return - } - - void runNLUServer(argv).catch((err) => { - baseLogger.sub('Exit').attachError(err).critical('NLU Server exits after an error occured.') - process.exit(1) - }) - } - ) - .command( - 'lang', - 'Launch a local language server', - { - port: { - description: 'The port to listen to', - default: 3100 - }, - host: { - description: 'Binds the language server to a specific hostname', - default: 'localhost' - }, - langDir: { - description: 'Directory where language embeddings will be saved', - type: 'string' - }, - authToken: { - description: 'When enabled, this token is required for clients to query your language server', - type: 'string' - }, - adminToken: { - description: 'This token is required to access the server as admin and manage language.', - type: 'string' - }, - limit: { - description: 'Maximum number of requests per IP per "limitWindow" interval (0 means unlimited)', - default: 0 - }, - limitWindow: { - description: 'Time window on which the limit is applied (use standard notation, ex: 25m or 1h)', - default: '1h' - }, - metadataLocation: { - description: 'URL of metadata file which lists available languages', - default: 'https://nyc3.digitaloceanspaces.com/botpress-public/embeddings/index.json' - }, - offline: { - description: 'Whether or not the language server has internet access', - type: 'boolean', - default: false - }, - dim: { - default: 100, - description: 'Number of language dimensions provided (25, 100 or 300 at the moment)' - }, - domain: { - description: 'Name of the domain where those embeddings were trained on.', - default: 'bp' - }, - verbose: { - description: 'Verbosity level of the logging, integer from 0 to 4. Does not apply to "Launcher" logger.', - default: LoggerLevel.Info - }, - logFilter: { - description: - 'Filter logs by namespace, ex: "--log-filter training:svm api". Namespaces are space separated. Does not apply to "Launcher" logger.', - array: true, - type: 'string' - } - }, - (argv) => { - const baseLogger = makeLogger({ prefix: 'LANG' }) - if (argv.version) { - baseLogger.sub('Version').info(langServerVersion) - return - } - - void runLanguageServer(argv).catch((err) => { - baseLogger.sub('Exit').attachError(err).critical('Language Server exits after an error occured.') - process.exit(1) - }) - } - ) - .command( - 'download', - 'Download a language model for lang and dim', - { - langDir: { - description: 'Directory where language embeddings will be saved', - type: 'string' - }, - metadataLocation: { - description: 'URL of metadata file which lists available languages', - default: 'https://nyc3.digitaloceanspaces.com/botpress-public/embeddings/index.json' - }, - dim: { - default: 100, - description: 'Number of language dimensions provided (25, 100 or 300 at the moment)' - }, - domain: { - description: 'Name of the domain where those embeddings were trained on.', - default: 'bp' - }, - lang: { - alias: 'l', - description: 'Language Code to download model from', - type: 'string', - demandOption: true - } - }, - (argv) => { - void downloadLang(argv).catch((err) => { - const baseLogger = makeLogger({ prefix: 'LANG' }) - baseLogger.sub('Exit').attachError(err).critical('Language Server exits after an error occured.') - process.exit(1) - }) - } - ) - .help().argv diff --git a/packages/nlu-client/jest.config.js b/packages/nlu-client/jest.config.js deleted file mode 100644 index 61ea9c96..00000000 --- a/packages/nlu-client/jest.config.js +++ /dev/null @@ -1,11 +0,0 @@ -module.exports = { - preset: 'ts-jest', - testEnvironment: 'node', - testPathIgnorePatterns: ['dist', 'node_modules'], - rootDir: '.', - resetModules: true, - verbose: true, - modulePaths: ['/src/'], - moduleFileExtensions: ['js', 'json', 'jsx', 'ts', 'tsx', 'd.ts'], - modulePathIgnorePatterns: ['out'] -} diff --git a/packages/nlu-client/package.json b/packages/nlu-client/package.json index b4a4c743..fb58a70d 100644 --- a/packages/nlu-client/package.json +++ b/packages/nlu-client/package.json @@ -1,12 +1,13 @@ { "name": "@botpress/nlu-client", - "version": "1.0.0", + "version": "1.0.2", "description": "Client and typings for NLU Server's API", "author": "Botpress, Inc.", "license": "AGPL-3.0", "scripts": { "build": "tsc --build", - "test": "jest" + "test": "jest --roots ./dist", + "clean": "rimraf ./dist && rimraf ./node_modules" }, "dependencies": { "axios": "^0.21.1", @@ -16,12 +17,11 @@ "devDependencies": { "@types/lodash": "^4.14.116", "@types/joi": "^17.2.3", - "@types/node": "^12.13.0", + "@types/node": "^16.11.10", "@types/jest": "^24.9.0", "jest": "^24.9.0", - "ts-jest": "^26.5.5", - "typescript": "^3.9.10" + "typescript": "^5.0.4" }, - "types": "./src/typings/index.d.ts", + "types": "./dist/index.d.ts", "main": "./dist/index.js" } diff --git a/packages/nlu-client/readme.md b/packages/nlu-client/readme.md index 5eada311..dfbf6276 100644 --- a/packages/nlu-client/readme.md +++ b/packages/nlu-client/readme.md @@ -1,3 +1,119 @@ # Botpress NLU Client NodeJS SDK for the Botpress NLU Server written in TypeScript. + +## Usage + +### basic usage + +```ts +import { Logger } from '@botpress/logger' +import { Client } from '@botpress/nlu-client' +import fs from 'fs' +import path from 'path' + +const appId = 'myapp' +const baseURL = 'http://localhost:3200' +const client = new Client({ baseURL }) +const trainsetLocation = path.join(__dirname, 'my-trainset.json') + +const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)) + +const main = async () => { + // 0. test you connection + const infoRes = await client.getInfo() + if (infoRes.success === false) { + throw new Error(`getInfo failed: ${infoRes.error.message}`) + } + console.log(`Using nlu server version v${infoRes.info.version}`) + + // 1. start a training + const rawTrainset = await fs.promises.readFile(trainsetLocation, 'utf8') + const parsedTrainset = JSON.parse(rawTrainset) + const trainRes = await client.startTraining(appId, parsedTrainset) + if (trainRes.success === false) { + throw new Error(`startTraining failed: ${trainRes.error.message}`) + } + const { modelId } = trainRes + + // 2. wait for training to be done + const fetchStatus = async () => { + const statusRes = await client.getTrainingStatus(appId, modelId) + if (statusRes.success === false) { + throw new Error(`getTrainingStatus failed: ${statusRes.error.message}`) + } + return statusRes.session + } + + let session = await fetchStatus() + while (session.status === 'training' || session.status === 'training-pending') { + console.log(`training progress: ${session.progress}`) + await sleep(100) + session = await fetchStatus() + } + + if (session.status === 'canceled') { + throw new Error('training was canceled') + } + if (session.status === 'errored') { + const errorMsg = session.error?.message || '' + throw new Error(`training failed with error: ${errorMsg}`) + } + + console.log('training done.') + + // 3. predict + const predictRes = await client.predict(appId, modelId, { utterances: ['this grape seems to be moldy'] }) + if (predictRes.success === false) { + throw new Error(`getTrainingStatus failed: ${predictRes.error.message}`) + } + + console.log(predictRes.predictions[0]) + console.log('Done.') +} + +void main() +``` + +### model weights upload and download + +The `/modelweights` ressource has a slightly different behavior than the rest of the API. It communicates with binary buffers instead of JSON. + +```ts +import { Client } from '@botpress/nlu-client' +import fs from 'fs' +import path from 'path' + +const appId = 'myapp' +const modelId = '5c1257bb8827ad31.778ab15ae330044f.42.en' + +const modelPath = path.join(__dirname, 'downloaded.model') +const baseURL = 'http://localhost:3200' +const client = new Client({ baseURL }) + +const main = async () => { + // downloading a model + const downloadRes = await client.modelWeights.download(appId, modelId, { responseType: 'stream' }) + if (downloadRes.status !== 'OK') { + throw new Error(`Download weights received status ${downloadRes.status}`) + } + + // wait for download to be complete + await new Promise((resolve, reject) => { + downloadRes.weights.on('end', resolve) + downloadRes.weights.on('error', reject) + downloadRes.weights.pipe(fs.createWriteStream(modelPath)) + }) + + // uploading a model + const modelWeights = await fs.promises.readFile(modelPath) + const uploadRes = await client.modelWeights.upload(appId, modelWeights) + if (uploadRes.status !== 'OK') { + throw new Error(`Upload weights received status ${uploadRes.status}`) + } + + console.log('Done.') +} + +void main() +``` diff --git a/packages/nlu-client/src/app-id.ts b/packages/nlu-client/src/app-id.ts new file mode 100644 index 00000000..c120a35f --- /dev/null +++ b/packages/nlu-client/src/app-id.ts @@ -0,0 +1,5 @@ +export const appIdHeader = (appId: string) => { + return { + 'X-App-Id': appId + } +} diff --git a/packages/nlu-client/src/client.ts b/packages/nlu-client/src/client.ts deleted file mode 100644 index 34e9615c..00000000 --- a/packages/nlu-client/src/client.ts +++ /dev/null @@ -1,113 +0,0 @@ -import axios, { AxiosInstance, AxiosRequestConfig } from 'axios' - -import _ from 'lodash' -import { Client as IClient } from './typings' -import { - TrainResponseBody, - TrainRequestBody, - InfoResponseBody, - TrainProgressResponseBody, - SuccessReponse, - DetectLangRequestBody, - DetectLangResponseBody, - ListModelsResponseBody, - PruneModelsResponseBody, - PredictRequestBody, - PredictResponseBody, - ErrorResponse, - ListTrainingsResponseBody -} from './typings/http' -import { validateResponse } from './validation' - -const DEFAULT_CONFIG: AxiosRequestConfig = { - validateStatus: () => true -} - -export class NLUClient implements IClient { - protected _axios: AxiosInstance - - constructor(config: AxiosRequestConfig) { - this._axios = axios.create({ ...DEFAULT_CONFIG, ...config }) - } - - public get axios() { - return this._axios - } - - public async getInfo(): Promise { - const { data } = await this._axios.get('info') - return validateResponse(data) - } - - public async startTraining( - appId: string, - trainRequestBody: TrainRequestBody - ): Promise { - const headers = this._appIdHeader(appId) - const { data } = await this._axios.post('train', trainRequestBody, { headers }) - return validateResponse(data) - } - - public async listTrainings(appId: string, lang?: string): Promise { - const headers = this._appIdHeader(appId) - const endpoint = 'train' - const params = lang && { lang } - const { data } = await this._axios.get(endpoint, { headers, params }) - return validateResponse(data) - } - - public async getTrainingStatus(appId: string, modelId: string): Promise { - const headers = this._appIdHeader(appId) - const endpoint = `train/${modelId}` - const { data } = await this._axios.get(endpoint, { headers }) - return validateResponse(data) - } - - public async cancelTraining(appId: string, modelId: string): Promise { - const headers = this._appIdHeader(appId) - const endpoint = `train/${modelId}/cancel` - const { data } = await this._axios.post(endpoint, {}, { headers }) - return validateResponse(data) - } - - public async listModels(appId: string): Promise { - const headers = this._appIdHeader(appId) - const endpoint = 'models' - const { data } = await this._axios.get(endpoint, { headers }) - return validateResponse(data) - } - - public async pruneModels(appId: string): Promise { - const headers = this._appIdHeader(appId) - const endpoint = 'models/prune' - const { data } = await this._axios.post(endpoint, {}, { headers }) - return validateResponse(data) - } - - public async detectLanguage( - appId: string, - detectLangRequestBody: DetectLangRequestBody - ): Promise { - const headers = this._appIdHeader(appId) - const endpoint = 'detect-lang' - const { data } = await this._axios.post(endpoint, detectLangRequestBody, { headers }) - return validateResponse(data) - } - - public async predict( - appId: string, - modelId: string, - predictRequestBody: PredictRequestBody - ): Promise { - const headers = this._appIdHeader(appId) - const endpoint = `predict/${modelId}` - const { data } = await this._axios.post(endpoint, predictRequestBody, { headers }) - return validateResponse(data) - } - - private _appIdHeader = (appId: string) => { - return { - 'X-App-Id': appId - } - } -} diff --git a/packages/nlu-client/src/error.ts b/packages/nlu-client/src/error.ts new file mode 100644 index 00000000..40b5fe91 --- /dev/null +++ b/packages/nlu-client/src/error.ts @@ -0,0 +1,10 @@ +import { HTTPCall, HTTPVerb } from './http-call' + +export class ClientResponseError extends Error { + constructor(call: HTTPCall, status: number, message: string) { + const { verb, ressource } = call + const ressourcePath = `/${ressource}` + const prefix = `${verb} ${ressourcePath} -> ${status}` + super(`(${prefix}) ${message}`) + } +} diff --git a/packages/nlu-client/src/http-call.ts b/packages/nlu-client/src/http-call.ts new file mode 100644 index 00000000..445e1705 --- /dev/null +++ b/packages/nlu-client/src/http-call.ts @@ -0,0 +1,5 @@ +export type HTTPVerb = 'GET' | 'POST' | 'PUT' | 'DELETE' +export type HTTPCall = { + verb: V + ressource: string +} diff --git a/packages/nlu-client/src/index.ts b/packages/nlu-client/src/index.ts index 7bb3264d..26b36f40 100644 --- a/packages/nlu-client/src/index.ts +++ b/packages/nlu-client/src/index.ts @@ -1 +1,7 @@ -export { NLUClient as Client } from './client' +export { NLUClient as Client } from './nlu-client' + +export * as http from './typings/http' +export * from './typings/training' +export * from './typings/prediction' +export * from './typings/linting' +export * from './typings/info' diff --git a/packages/nlu-client/src/model-client.ts b/packages/nlu-client/src/model-client.ts new file mode 100644 index 00000000..8769a5ce --- /dev/null +++ b/packages/nlu-client/src/model-client.ts @@ -0,0 +1,120 @@ +import axios, { AxiosInstance, AxiosRequestConfig } from 'axios' + +import _ from 'lodash' +import { Readable } from 'stream' +import { appIdHeader } from './app-id' +import { ClientResponseError } from './error' +import { HTTPCall } from './http-call' + +type GET_WEIGHTS_STATUS = 'OK' | 'WEIGHTS_TRANSFER_DISABLED' | 'MODEL_NOT_FOUND' +type POST_WEIGHTS_STATUS = 'OK' | 'WEIGHTS_TRANSFER_DISABLED' | 'INVALID_MODEL_FORMAT' | 'UNSUPORTED_MODEL_SPEC' + +const get_status_meanings: Record = { + OK: 200, + WEIGHTS_TRANSFER_DISABLED: 403, + MODEL_NOT_FOUND: 404 +} + +const post_status_meanings: Record = { + OK: 200, + INVALID_MODEL_FORMAT: 400, + WEIGHTS_TRANSFER_DISABLED: 403, + UNSUPORTED_MODEL_SPEC: 455 // custom unassigned status code +} + +type GetWeightRes = S extends 'OK' + ? { + status: S + weights: R + } + : { status: S } +type PostWeightRes = { status: POST_WEIGHTS_STATUS } + +/** + * This client does not use JSON. + * Requests and Responses body are binary data. + * HTTP Status Codes are used for error status. + */ +export class ModelTransferClient { + protected _axios: AxiosInstance + + constructor(config: AxiosRequestConfig & { baseURL: string }) { + this._axios = axios.create({ ...config, validateStatus: () => true }) + } + + public get axios() { + return this._axios + } + + public async upload(appId: string, weights: Buffer): Promise { + const ressource = 'modelweights' + const reqHeaders = { + ...appIdHeader(appId), + 'content-type': 'application/octet-stream', + 'content-length': weights.length + } + + const { status } = await this.axios.post(ressource, weights, { + headers: reqHeaders, + maxContentLength: Infinity, + maxBodyLength: Infinity + }) + + const call: HTTPCall<'POST'> = { verb: 'POST', ressource } + + if (status >= 500) { + throw new ClientResponseError(call, status, 'Internal Server Error') + } + + const statusMeaning = this._statusMeaning(post_status_meanings, status) + if (!statusMeaning) { + throw new ClientResponseError(call, status, 'Unexpected HTTP Status') + } + + return { status: statusMeaning } + } + + public download( + appId: string, + modelId: string, + opts: { responseType: 'arraybuffer' } + ): Promise> + public download( + appId: string, + modelId: string, + opts: { responseType: 'stream' } + ): Promise> + public async download( + appId: string, + modelId: string, + opts: { responseType: 'stream' | 'arraybuffer' } + ): Promise> { + const ressource = `modelweights/${modelId}` + const { responseType } = opts + + const reqHeaders = appIdHeader(appId) + const { data, status } = await this.axios.get(ressource, { headers: reqHeaders, responseType }) + + const call: HTTPCall<'GET'> = { verb: 'GET', ressource } + if (status >= 500) { + throw new ClientResponseError(call, status, 'Internal Server Error') + } + + const statusMeaning = this._statusMeaning(get_status_meanings, status) + if (!statusMeaning) { + throw new ClientResponseError(call, status, 'Unexpected HTTP Status') + } + + if (statusMeaning === 'OK') { + return { status: statusMeaning, weights: data } + } + return { status: statusMeaning } + } + + private _statusMeaning = ( + availableStatus: Record, + status: number + ): S | undefined => { + return _.findKey(availableStatus, (s) => s === status) as S | undefined + } +} diff --git a/packages/nlu-client/src/nlu-client.ts b/packages/nlu-client/src/nlu-client.ts new file mode 100644 index 00000000..49d52ed8 --- /dev/null +++ b/packages/nlu-client/src/nlu-client.ts @@ -0,0 +1,194 @@ +import axios, { AxiosInstance, AxiosRequestConfig, AxiosResponse } from 'axios' + +import _ from 'lodash' +import { appIdHeader } from './app-id' +import { ClientResponseError } from './error' +import { HTTPCall, HTTPVerb } from './http-call' +import { ModelTransferClient } from './model-client' +import { + TrainResponseBody, + TrainRequestBody, + InfoResponseBody, + TrainProgressResponseBody, + SuccessReponse, + DetectLangRequestBody, + DetectLangResponseBody, + ListModelsResponseBody, + PruneModelsResponseBody, + PredictRequestBody, + PredictResponseBody, + ErrorResponse, + ListTrainingsResponseBody, + LintRequestBody, + LintResponseBody, + LintProgressResponseBody +} from './typings/http' +import { IssueComputationSpeed } from './typings/linting' +import { validateResponse } from './validation' + +export class NLUClient { + protected _axios: AxiosInstance + public readonly modelWeights: ModelTransferClient + + constructor(config: AxiosRequestConfig & { baseURL: string }) { + config = { ...config, validateStatus: () => true } + this._axios = axios.create(config) + this.modelWeights = new ModelTransferClient(config) + } + + public get axios() { + return this._axios + } + + public async getInfo(): Promise { + const ressource = 'info' + const call: HTTPCall<'GET'> = { verb: 'GET', ressource } + const res = await this._get(call) + return validateResponse(call, res) + } + + public async startTraining(appId: string, body: TrainRequestBody): Promise { + const headers = appIdHeader(appId) + const ressource = 'train' + const call: HTTPCall<'POST'> = { verb: 'POST', ressource } + const res = await this._post(call, body, { headers }) + return validateResponse(call, res) + } + + /** + * @experimental still subject to breaking changes + */ + public async startLinting(appId: string, body: LintRequestBody): Promise { + const headers = appIdHeader(appId) + const ressource = 'lint' + const call: HTTPCall<'POST'> = { verb: 'POST', ressource } + const res = await this._post(call, body, { headers }) + return validateResponse(call, res) + } + + public async listTrainings(appId: string, lang?: string): Promise { + const headers = appIdHeader(appId) + const ressource = 'train' + const call: HTTPCall<'GET'> = { verb: 'GET', ressource } + const params = lang && { lang } + const res = await this._get(call, { headers, params }) + return validateResponse(call, res) + } + + public async getTrainingStatus(appId: string, modelId: string): Promise { + const headers = appIdHeader(appId) + const ressource = `train/${modelId}` + const call: HTTPCall<'GET'> = { verb: 'GET', ressource } + const res = await this._get(call, { headers }) + return validateResponse(call, res) + } + + /** + * @experimental still subject to breaking changes + */ + public async getLintingStatus( + appId: string, + modelId: string, + speed: IssueComputationSpeed + ): Promise { + const headers = appIdHeader(appId) + const ressource = `lint/${modelId}/${speed}` + const call: HTTPCall<'GET'> = { verb: 'GET', ressource } + const res = await this._get(call, { headers }) + return validateResponse(call, res) + } + + public async cancelTraining(appId: string, modelId: string): Promise { + const headers = appIdHeader(appId) + const ressource = `train/${modelId}/cancel` + const call: HTTPCall<'POST'> = { verb: 'POST', ressource } + const res = await this._post(call, {}, { headers }) + return validateResponse(call, res) + } + + /** + * @experimental still subject to breaking changes + */ + public async cancelLinting( + appId: string, + modelId: string, + speed: IssueComputationSpeed + ): Promise { + const headers = appIdHeader(appId) + const ressource = `lint/${modelId}/${speed}/cancel` + const call: HTTPCall<'POST'> = { verb: 'POST', ressource } + const res = await this._post(call, {}, { headers }) + return validateResponse(call, res) + } + + public async listModels(appId: string): Promise { + const headers = appIdHeader(appId) + const ressource = 'models' + const call: HTTPCall<'GET'> = { verb: 'GET', ressource } + const res = await this._get(call, { headers }) + return validateResponse(call, res) + } + + public async pruneModels(appId: string): Promise { + const headers = appIdHeader(appId) + const ressource = 'models/prune' + const call: HTTPCall<'POST'> = { verb: 'POST', ressource } + const res = await this._post(call, {}, { headers }) + return validateResponse(call, res) + } + + public async detectLanguage( + appId: string, + body: DetectLangRequestBody + ): Promise { + const headers = appIdHeader(appId) + const ressource = 'detect-lang' + const call: HTTPCall<'POST'> = { verb: 'POST', ressource } + const res = await this._post(call, body, { headers }) + return validateResponse(call, res) + } + + public async predict( + appId: string, + modelId: string, + body: PredictRequestBody + ): Promise { + const headers = appIdHeader(appId) + const ressource = `predict/${modelId}` + const call: HTTPCall<'POST'> = { verb: 'POST', ressource } + const res = await this._post(call, body, { headers }) + return validateResponse(call, res) + } + + private _post = async ( + call: HTTPCall<'POST'>, + body?: any, + config?: AxiosRequestConfig + ): Promise> => { + try { + const { ressource } = call + const res = await this._axios.post(ressource, body, config) + return res + } catch (err) { + // axios validate status does not prevent all exceptions + throw this._mapErr(call, err) + } + } + + private _get = async (call: HTTPCall<'GET'>, config?: AxiosRequestConfig): Promise> => { + try { + const { ressource } = call + const res = await this._axios.get(ressource, config) + return res + } catch (err) { + // axios validate status does not prevent all exceptions + throw this._mapErr(call, err) + } + } + + private _mapErr = (call: HTTPCall, thrown: any): ClientResponseError => { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + const httpStatus = -1 + return new ClientResponseError(call, httpStatus, err.message) + } +} diff --git a/packages/nlu-client/src/typings/http.d.ts b/packages/nlu-client/src/typings/http.d.ts deleted file mode 100644 index 63a7261d..00000000 --- a/packages/nlu-client/src/typings/http.d.ts +++ /dev/null @@ -1,73 +0,0 @@ -/** - * ############ - * ### HTTP ### - * ############ - */ - -import { - TrainingState, - PredictOutput, - IntentDefinition, - EntityDefinition, - Specifications, - Health, - ServerInfo, - Training -} from './sdk' - -export interface TrainRequestBody { - language: string - contexts: string[] - intents: IntentDefinition[] - entities: EntityDefinition[] - seed?: number -} - -export interface PredictRequestBody { - utterances: string[] -} - -export interface DetectLangRequestBody extends PredictRequestBody { - models: string[] -} - -export interface ErrorResponse { - success: false - error: string -} - -export interface SuccessReponse { - success: true -} - -export interface InfoResponseBody extends SuccessReponse { - info: ServerInfo -} - -export interface TrainResponseBody extends SuccessReponse { - modelId: string -} - -export interface TrainProgressResponseBody extends SuccessReponse { - session: TrainingState -} - -export interface ListTrainingsResponseBody extends SuccessReponse { - trainings: Training[] -} - -export interface ListModelsResponseBody extends SuccessReponse { - models: string[] -} - -export interface PruneModelsResponseBody extends SuccessReponse { - models: string[] -} - -export interface PredictResponseBody extends SuccessReponse { - predictions: PredictOutput[] -} - -export interface DetectLangResponseBody extends SuccessReponse { - detectedLanguages: string[] -} diff --git a/packages/nlu-client/src/typings/http.ts b/packages/nlu-client/src/typings/http.ts new file mode 100644 index 00000000..97f47ec5 --- /dev/null +++ b/packages/nlu-client/src/typings/http.ts @@ -0,0 +1,101 @@ +/** + * ############ + * ### HTTP ### + * ############ + */ + +import { ServerInfo } from './info' +import { LintingState, IssueComputationSpeed } from './linting' +import { PredictOutput } from './prediction' +import { TrainingState, IntentDefinition, EntityDefinition, Training } from './training' + +export type TrainRequestBody = { + language: string + contexts: string[] + intents: IntentDefinition[] + entities: EntityDefinition[] + seed?: number +} + +export type LintRequestBody = { + speed: IssueComputationSpeed + language: string + contexts: string[] + intents: IntentDefinition[] + entities: EntityDefinition[] +} + +export type PredictRequestBody = { + utterances: string[] +} + +export type DetectLangRequestBody = { + models: string[] +} & PredictRequestBody + +export type ErrorType = + | 'model_not_found' + | 'training_not_found' + | 'linting_not_found' + | 'training_already_started' + | 'request_format' + | 'lang-server' + | 'duckling-server' + | 'internal' + | 'dataset_format' + +export type NLUError = { + message: string + stack?: string + type: ErrorType + code: number +} + +export type ErrorResponse = { + success: false + error: NLUError +} + +export type SuccessReponse = { + success: true +} + +export type InfoResponseBody = { + info: ServerInfo +} & SuccessReponse + +export type TrainResponseBody = { + modelId: string +} & SuccessReponse + +export type LintResponseBody = { + modelId: string +} & SuccessReponse + +export type TrainProgressResponseBody = { + session: TrainingState +} & SuccessReponse + +export type LintProgressResponseBody = { + session: LintingState +} & SuccessReponse + +export type ListTrainingsResponseBody = { + trainings: Training[] +} & SuccessReponse + +export type ListModelsResponseBody = { + models: string[] +} & SuccessReponse + +export type PruneModelsResponseBody = { + models: string[] +} & SuccessReponse + +export type PredictResponseBody = { + predictions: PredictOutput[] +} & SuccessReponse + +export type DetectLangResponseBody = { + detectedLanguages: string[] +} & SuccessReponse diff --git a/packages/nlu-client/src/typings/index.d.ts b/packages/nlu-client/src/typings/index.d.ts deleted file mode 100644 index d61c3d68..00000000 --- a/packages/nlu-client/src/typings/index.d.ts +++ /dev/null @@ -1,42 +0,0 @@ -import { - TrainRequestBody, - PredictRequestBody, - DetectLangRequestBody, - ErrorResponse, - SuccessReponse, - InfoResponseBody, - TrainResponseBody, - TrainProgressResponseBody, - ListModelsResponseBody, - PruneModelsResponseBody, - PredictResponseBody, - DetectLangResponseBody, - ListTrainingsResponseBody -} from './http' -import { AxiosRequestConfig, AxiosInstance } from 'axios' - -export class Client { - readonly axios: AxiosInstance - - constructor(config: AxiosRequestConfig) - - getInfo(): Promise - startTraining(appId: string, trainRequestBody: TrainRequestBody): Promise - listTrainings(appId: string, lang?: string): Promise - getTrainingStatus(appId: string, modelId: string): Promise - cancelTraining(appId: string, modelId: string): Promise - listModels(appId: string): Promise - pruneModels(appId: string): Promise - detectLanguage( - appId: string, - detectLangRequestBody: DetectLangRequestBody - ): Promise - predict( - appId: string, - modelId: string, - predictRequestBody: PredictRequestBody - ): Promise -} - -export * as http from './http' -export * from './sdk' diff --git a/packages/nlu-client/src/typings/info.ts b/packages/nlu-client/src/typings/info.ts new file mode 100644 index 00000000..33a3643d --- /dev/null +++ b/packages/nlu-client/src/typings/info.ts @@ -0,0 +1,15 @@ +export type ServerInfo = { + specs: Specifications + languages: string[] + version: string + modelTransferEnabled: boolean +} + +export type Specifications = { + engineVersion: string // semver string + languageServer: { + dimensions: number + domain: string + version: string // semver string + } +} diff --git a/packages/nlu-client/src/typings/linting.ts b/packages/nlu-client/src/typings/linting.ts new file mode 100644 index 00000000..365b8b98 --- /dev/null +++ b/packages/nlu-client/src/typings/linting.ts @@ -0,0 +1,117 @@ +export type DatasetReport = { + issues: DatasetIssue[] +} + +export type IssueCode = + | 'C_000' // tokens tagged with unexisting slot + | 'C_001' // slot has nonexistent entity + | 'C_002' // intent has no utterances + | 'C_003' // dataset has an unsupported language + | 'E_000' // token tagged with slot has incorrect type + | 'E_001' // utterance has incorrect language + | 'E_002' // duplicated utterances (in one or more intents) + | 'E_003' // the whole utterance is tagged as a slot + | 'W_000' // intents are overlapping + | 'I_000' // utterance contains dupplicated or untrimed spaces + +export type Raw = { raw: T } +export type Clean = { clean: T } +export type CleanOrRaw = Clean & Raw +export type Span = { start: number; end: number } + +export type IssueData = C extends 'C_000' + ? { + intent: string + utterance: string + slot: string + } + : C extends 'C_001' + ? { + intent: string + slot: string + entity: string + } + : C extends 'C_002' + ? { + intent: string + } + : C extends 'C_003' + ? { + language: string + } + : C extends 'E_000' + ? { + intent: string + utterance: Clean & { idx: number } + charPos: Clean + slot: string + entities: string[] + source: string + } + : C extends 'E_001' + ? { + intent: string + utterance: string + detectedLang: string + expectedLang: string + } + : C extends 'E_002' + ? { + intentA: string + intentB: string + utterance: string + } + : C extends 'E_003' + ? { + intent: string + utterance: string + slot: string + } + : C extends 'I_000' + ? { + intent: string + utterance: Raw & { idx: number } + charPos: Raw + } + : never + +export type IssueDefinition = { + code: C + severity: IssueSeverity + name: string +} + +export type DatasetIssue = IssueDefinition & { + id: string + message: string + data: IssueData +} + +export type LintingStatus = 'done' | 'linting-pending' | 'linting' | 'canceled' | 'errored' +export type LintingErrorType = 'lang-server' | 'duckling-server' | 'zombie-linting' | 'internal' + +export type LintingError = { + type: LintingErrorType + message: string + stack?: string +} + +export type LintingState = { + status: LintingStatus + currentCount: number + totalCount: number + error?: LintingError + issues: DatasetIssue[] +} + +export type IssueComputationSpeed = 'fastest' | 'fast' | 'slow' | 'slowest' + +export type IssueSeverity = C extends `C_${infer CodeSufix}` + ? 'critical' + : C extends `E_${infer CodeSufix}` + ? 'error' + : C extends `W_${infer CodeSufix}` + ? 'warning' + : C extends `I_${infer CodeSufix}` + ? 'info' + : never diff --git a/packages/nlu-client/src/typings/prediction.ts b/packages/nlu-client/src/typings/prediction.ts new file mode 100644 index 00000000..79866704 --- /dev/null +++ b/packages/nlu-client/src/typings/prediction.ts @@ -0,0 +1,44 @@ +export type PredictOutput = { + entities: EntityPrediction[] + contexts: ContextPrediction[] + spellChecked: string +} + +export type EntityType = 'pattern' | 'list' | 'system' + +export type EntityPrediction = { + name: string + type: string // ex: ['custom.list.fruits', 'system.time'] + value: string + confidence: number + source: string + start: number + end: number + unit?: string + + sensitive?: boolean +} + +export type ContextPrediction = { + name: string + oos: number + confidence: number + intents: IntentPrediction[] +} + +export type IntentPrediction = { + name: string + confidence: number + slots: SlotPrediction[] + extractor: string +} + +export type SlotPrediction = { + name: string + value: string + confidence: number + source: string + start: number + end: number + entity: EntityPrediction | null +} diff --git a/packages/nlu-client/src/typings/sdk.d.ts b/packages/nlu-client/src/typings/sdk.d.ts deleted file mode 100644 index 0dd833cc..00000000 --- a/packages/nlu-client/src/typings/sdk.d.ts +++ /dev/null @@ -1,144 +0,0 @@ -export interface ServerInfo { - specs: Specifications - health: Health - languages: string[] - version: string -} - -export interface Specifications { - nluVersion: string // semver string - languageServer: { - dimensions: number - domain: string - version: string // semver string - } -} - -export interface Health { - isEnabled: boolean - validProvidersCount: number - validLanguages: string[] -} - -/** - * ################################## - * ############ TRAINING ############ - * ################################## - */ - -export interface TrainInput { - language: string - intents: IntentDefinition[] - entities: EntityDefinition[] - seed: number -} - -export interface IntentDefinition { - name: string - contexts: string[] - utterances: string[] - slots: SlotDefinition[] -} - -export interface SlotDefinition { - name: string - entities: string[] -} - -export interface ListEntityDefinition { - name: string - type: 'list' - values: { name: string; synonyms: string[] }[] - fuzzy: number - - sensitive?: boolean -} - -export interface PatternEntityDefinition { - name: string - type: 'pattern' - regex: string - case_sensitive: boolean - examples: string[] - - sensitive?: boolean -} - -export type EntityDefinition = ListEntityDefinition | PatternEntityDefinition - -/** - * done : when a training is complete - * training-pending : when a training was launched, but the training process is not started yet - * training: when a chatbot is currently training - * canceled: when a training was canceled - * errored: when an unhandled error occured during training - */ -export type TrainingStatus = 'done' | 'training-pending' | 'training' | 'canceled' | 'errored' - -export type TrainingErrorType = 'zombie-training' | 'unknown' - -export interface TrainingError { - type: TrainingErrorType - message: string - stackTrace?: string -} - -export interface TrainingState { - status: TrainingStatus - progress: number - error?: TrainingError -} - -export interface Training extends TrainingState { - modelId: string -} - -/** - * #################################### - * ############ PREDICTION ############ - * #################################### - */ -export interface PredictOutput { - entities: EntityPrediction[] - contexts: ContextPrediction[] - spellChecked: string -} - -export type EntityType = 'pattern' | 'list' | 'system' - -export interface EntityPrediction { - name: string - type: string // ex: ['custom.list.fruits', 'system.time'] - value: string - confidence: number - source: string - start: number - end: number - unit?: string - - sensitive?: boolean -} - -export interface ContextPrediction { - name: string - oos: number - confidence: number - intents: IntentPrediction[] -} - -export interface IntentPrediction { - name: string - confidence: number - slots: SlotPrediction[] - extractor: string -} - -export interface SlotPrediction { - name: string - value: string - confidence: number - source: string - start: number - end: number - entity: EntityPrediction | null -} diff --git a/packages/nlu-client/src/typings/training.ts b/packages/nlu-client/src/typings/training.ts new file mode 100644 index 00000000..a7b711b2 --- /dev/null +++ b/packages/nlu-client/src/typings/training.ts @@ -0,0 +1,66 @@ +export type TrainInput = { + language: string + intents: IntentDefinition[] + entities: EntityDefinition[] + seed: number +} + +export type IntentDefinition = { + name: string + contexts: string[] + utterances: string[] + slots: SlotDefinition[] +} + +export type SlotDefinition = { + name: string + entities: string[] +} + +export type ListEntityDefinition = { + name: string + type: 'list' + values: { name: string; synonyms: string[] }[] + fuzzy: number + + sensitive?: boolean +} + +export type PatternEntityDefinition = { + name: string + type: 'pattern' + regex: string + case_sensitive: boolean + examples: string[] + + sensitive?: boolean +} + +export type EntityDefinition = ListEntityDefinition | PatternEntityDefinition + +/** + * done : when a training is complete + * training-pending : when a training was launched, but the training process is not started yet + * training: when a chatbot is currently training + * canceled: when a training was canceled + * errored: when an unhandled error occured during training + */ +export type TrainingStatus = 'done' | 'training-pending' | 'training' | 'canceled' | 'errored' + +export type TrainingErrorType = 'lang-server' | 'duckling-server' | 'zombie-training' | 'internal' + +export type TrainingError = { + type: TrainingErrorType + message: string + stack?: string +} + +export type TrainingState = { + status: TrainingStatus + progress: number + error?: TrainingError +} + +export type Training = TrainingState & { + modelId: string +} diff --git a/packages/nlu-client/src/validation.test.ts b/packages/nlu-client/src/validation.test.ts index a825604b..b6f0f65d 100644 --- a/packages/nlu-client/src/validation.test.ts +++ b/packages/nlu-client/src/validation.test.ts @@ -1,5 +1,7 @@ +import { AxiosResponse } from 'axios' +import { HTTPCall } from './http-call' +import { SuccessReponse, ErrorResponse, NLUError } from './typings/http' import { validateResponse } from './validation' -import { SuccessReponse, ErrorResponse } from './typings/http' const augmentWithExtraKey = (res: Object) => { return [ @@ -13,10 +15,18 @@ const augmentWithExtraKey = (res: Object) => { ] } +const error: NLUError = { code: 500, type: 'internal', message: 'An error' } +const call: HTTPCall<'GET'> = { verb: 'GET', ressource: '' } + +const axiosRes = (data: any): AxiosResponse => { + const x: Partial> = { data, status: 200 } + return x as AxiosResponse +} + test('validating with absent success key should fail', async () => { // arrange && act && assert - expect(() => validateResponse({})).toThrow() - expect(() => validateResponse({ someKey: 'some text' })).toThrow() + expect(() => validateResponse(call, axiosRes({}))).toThrow() + expect(() => validateResponse(call, axiosRes({ someKey: 'some text' }))).toThrow() }) test('validating a successfull response should pass', async () => { @@ -24,23 +34,33 @@ test('validating a successfull response should pass', async () => { const res: SuccessReponse = { success: true } // act && assert - expect(() => validateResponse(res)).not.toThrow() + expect(() => validateResponse(call, axiosRes(res))).not.toThrow() }) test('validating an unsuccessfull response with unempty error should pass', async () => { // arrange - const res: ErrorResponse = { success: false, error: 'an error' } + const res: ErrorResponse = { success: false, error } + + // act && assert + expect(() => validateResponse(call, axiosRes(res))).not.toThrow() +}) + +test('validating an unsuccessfull response with empty error message should pass', async () => { + const error: NLUError = { message: '', code: 500, type: 'internal' } + + // arrange + const res: ErrorResponse = { success: false, error } // act && assert - expect(() => validateResponse(res)).not.toThrow() + expect(() => validateResponse(call, axiosRes(res))).not.toThrow() }) -test('validating an unsuccessfull response with empty error should still pass', async () => { +test('validating an unsuccessfull response with empty error should fail', async () => { // arrange - const res: ErrorResponse = { success: false, error: '' } + const res: ErrorResponse = { success: false, error: {} as NLUError } // act && assert - expect(() => validateResponse(res)).not.toThrow() + expect(() => validateResponse(call, axiosRes(res))).toThrow() }) test('validating an unsuccessfull response with undefined error should fail', async () => { @@ -48,7 +68,7 @@ test('validating an unsuccessfull response with undefined error should fail', as const res: Partial = { success: false } // act && assert - expect(() => validateResponse(res)).toThrow() + expect(() => validateResponse(call, axiosRes(res))).toThrow() }) test('validating a successfull response with unknown keys should pass', async () => { @@ -58,17 +78,17 @@ test('validating a successfull response with unknown keys should pass', async () // act && assert const responses = augmentWithExtraKey(res) responses.forEach((r) => { - expect(() => validateResponse(r)).not.toThrow() + expect(() => validateResponse(call, axiosRes(r))).not.toThrow() }) }) test('validating an unsuccessfull response with unknown keys should pass', async () => { // arrange - const res = { success: false, error: 'some error' } + const res = { success: false, error } // act && assert const responses = augmentWithExtraKey(res) responses.forEach((r) => { - expect(() => validateResponse(r)).not.toThrow() + expect(() => validateResponse(call, axiosRes(r))).not.toThrow() }) }) diff --git a/packages/nlu-client/src/validation.ts b/packages/nlu-client/src/validation.ts index 7c062c1d..5470c2a5 100644 --- a/packages/nlu-client/src/validation.ts +++ b/packages/nlu-client/src/validation.ts @@ -1,38 +1,65 @@ +import { AxiosResponse } from 'axios' import Joi from 'joi' +import _ from 'lodash' +import { ClientResponseError } from './error' +import { HTTPCall, HTTPVerb } from './http-call' import { SuccessReponse, ErrorResponse } from './typings/http' -const allowUnknownKeys = (obj: Joi.ObjectSchema): Joi.ObjectSchema => { - return obj.pattern(/./, Joi.any()) -} +const ERROR_RESPONSE_SCHEMA = Joi.object().keys({ + message: Joi.string().required().allow(''), + stack: Joi.string().optional().allow(''), + code: Joi.number().required(), + type: Joi.string().required() +}) + +/** Manual validation for clean error messages */ +export const validateResponse = ( + call: HTTPCall, + res: AxiosResponse +): S | ErrorResponse => { + const { status, data } = res + + if (_.isNil(data)) { + throw new ClientResponseError(call, status, 'Received empty HTTP response.') + } + + if (typeof data !== 'object') { + const responseType = typeof data + throw new ClientResponseError( + call, + status, + `Received ${responseType} HTTP response. Expected response to be an object.` + ) + } + + if (data.success === true) { + return data + } + + if (data.success === false) { + const { error } = data + if (_.isNil(error) || typeof error !== 'object') { + throw new ClientResponseError( + call, + status, + 'Received unsuccessfull HTTP response with no error. Expected response.error to be an object.' + ) + } + + const { error: validationError } = ERROR_RESPONSE_SCHEMA.validate(error) + if (validationError) { + throw new ClientResponseError( + call, + status, + `Received response with incorrect error format: ${validationError.message}` + ) + } + return data + } -const ERROR_RESPONSE_SCHEMA = allowUnknownKeys( - Joi.object().keys({ - success: Joi.boolean().strict().not(true).required(), - error: Joi.string().allow('').required() - }) -) - -const SUCCESS_RESPONSE_SCHEMA = allowUnknownKeys( - Joi.object().keys({ - success: Joi.boolean().strict().not(false).required() - }) -) - -const RESPONSE_SCHEMA = Joi.object() - .keys({ success: Joi.boolean().required() }) - .when('.success', { - switch: [ - { - is: true, - then: SUCCESS_RESPONSE_SCHEMA - }, - { - is: false, - then: ERROR_RESPONSE_SCHEMA - } - ] - }) - -export const validateResponse = (res: any): S | ErrorResponse => { - return Joi.attempt(res, RESPONSE_SCHEMA) + throw new ClientResponseError( + call, + status, + 'Received HTTP response body has no attribute "success". Expected response.success to be a boolean.' + ) } diff --git a/packages/nlu-e2e/package.json b/packages/nlu-e2e/package.json new file mode 100644 index 00000000..1d0f2919 --- /dev/null +++ b/packages/nlu-e2e/package.json @@ -0,0 +1,32 @@ +{ + "name": "@botpress/nlu-e2e", + "version": "0.0.1", + "description": "Small e2e test suite for the NLU Server. It's main purpose is to make sure happy paths work well for multiple server configuration.", + "main": "./dist/index.js", + "license": "MIT", + "dependencies": { + "@botpress/nlu-client": "*", + "cli-progress": "^3.8.2", + "@bpinternal/log4bot": "^0.0.4", + "chalk": "^2.4.2", + "fs-extra": "^9.1.0", + "bluebird": "^3.7.2", + "lodash": "^4.17.19", + "yargs": "^16.0.3" + }, + "devDependencies": { + "@types/lodash": "^4.14.116", + "@types/fs-extra": "^5.0.4", + "@types/node": "^16.11.10", + "typescript": "^5.0.4", + "chai": "4.3.4", + "@types/chai": "4.3.0", + "semver": "7.3.5", + "@types/semver": "7.3.9" + }, + "scripts": { + "start": "node ./dist/index.js", + "build": "tsc --build", + "clean": "rimraf ./dist && rimraf ./node_modules" + } +} diff --git a/packages/nlu-e2e/src/app-data.ts b/packages/nlu-e2e/src/app-data.ts new file mode 100644 index 00000000..775ef4d8 --- /dev/null +++ b/packages/nlu-e2e/src/app-data.ts @@ -0,0 +1,16 @@ +import path from 'path' + +export function getAppDataPath() { + const homeDir = process.env.APP_DATA_PATH || process.env.HOME || process.env.APPDATA + if (homeDir) { + if (process.platform === 'darwin') { + return path.join(homeDir, 'Library', 'Application Support', 'botpress') + } + + return path.join(homeDir, 'botpress') + } + + const errorMsg = `Could not determine your HOME directory. +Please set the environment variable "APP_DATA_PATH", then start Botpress` + throw new Error(errorMsg) +} diff --git a/packages/nlu-e2e/src/assertions.ts b/packages/nlu-e2e/src/assertions.ts new file mode 100644 index 00000000..92415a89 --- /dev/null +++ b/packages/nlu-e2e/src/assertions.ts @@ -0,0 +1,437 @@ +import { + DatasetIssue, + http, + IssueCode, + IssueComputationSpeed, + LintingState, + LintingStatus, + TrainingErrorType, + TrainingState, + TrainingStatus, + TrainInput +} from '@botpress/nlu-client' +import chai from 'chai' +import cliProgress from 'cli-progress' +import fs from 'fs' +import _ from 'lodash' +import ms from 'ms' +import semver from 'semver' +import { PrecondtionFailed, UnsuccessfullAPICall, UnsuccessfullModelTransfer } from './errors' +import { AssertionArgs } from './typings' +import { pollLintingUntil, pollTrainingUntil } from './utils' + +export const assertServerIsReachable = async (args: AssertionArgs, requiredLanguages: string[]) => { + const { client, logger, appId } = args + logger.debug('assert server is reachable') + + const infoRes = await client.getInfo() + if (!infoRes.success) { + throw new UnsuccessfullAPICall(infoRes.error, 'Make sure the NLU Server is reachable.') + } + + const { info } = infoRes + chai.expect(info.version).to.satisfy(semver.valid) + chai + .expect(info.languages) + .to.be.a('array') + .and.to.include.any.members(requiredLanguages, 'Test requires nlu server to have some expected languages') +} + +export const assertModelTransferIsEnabled = async (args: AssertionArgs) => { + const { client, logger, appId } = args + logger.debug('assert model transfer is enabled') + + const infoRes = await client.getInfo() + if (!infoRes.success) { + throw new UnsuccessfullAPICall(infoRes.error, 'Make sure the NLU Server is reachable.') + } + + const { info } = infoRes + if (!info.modelTransferEnabled) { + throw new PrecondtionFailed('Expected model transfer to be enabled on server.') + } +} + +export const assertModelsInclude = async (args: AssertionArgs, expectedModels: string[]) => { + const { client, logger, appId } = args + logger.debug(`assert models include: [${expectedModels.join(', ')}]`) + + const modelRes = await client.listModels(appId) + if (!modelRes.success) { + throw new UnsuccessfullAPICall(modelRes.error) + } + + const { models } = modelRes + chai.expect(models).to.include.members(expectedModels) +} + +export const assertModelsAreEmpty = async (args: AssertionArgs) => { + const { client, logger, appId } = args + logger.debug('assert models are empty') + + const modelRes = await client.listModels(appId) + if (!modelRes.success) { + throw new UnsuccessfullAPICall(modelRes.error) + } + + const { models } = modelRes + chai.expect(models).to.have.length(0) +} + +const _getContexts = (trainSet: TrainInput): string[] => + _(trainSet.intents) + .flatMap((i) => i.contexts) + .uniq() + .value() + +export const assertTrainingStarts = async (args: AssertionArgs, trainSet: TrainInput): Promise => { + const { client, logger, appId } = args + logger.debug('assert training starts') + + const contexts = _getContexts(trainSet) + const trainRes = await client.startTraining(appId, { ...trainSet, contexts }) + if (!trainRes.success) { + throw new UnsuccessfullAPICall(trainRes.error) + } + + const { modelId } = trainRes + chai.expect(modelId).to.be.a('string').and.not.to.be.empty + + const ts = await pollTrainingUntil({ + nluClient: client, + modelId, + appId, + maxTime: ms('5s'), + condition: (ts: TrainingState) => ts.status !== 'training-pending' + }) + + chai.expect(ts.status).to.equal('training') + chai.expect(ts.error).to.be.undefined + + return modelId +} + +export const assertLintingStarts = async ( + args: AssertionArgs, + speed: IssueComputationSpeed, + trainSet: TrainInput +): Promise => { + const { client, logger, appId } = args + logger.debug('assert linting starts') + + const contexts = _getContexts(trainSet) + const trainRes = await client.startLinting(appId, { ...trainSet, contexts, speed }) + if (!trainRes.success) { + throw new UnsuccessfullAPICall(trainRes.error) + } + + const { modelId } = trainRes + chai.expect(modelId).to.be.a('string').and.not.to.be.empty + + const ts = await pollLintingUntil({ + nluClient: client, + modelId, + appId, + speed, + maxTime: ms('5s'), + condition: (ts: LintingState) => ts.status !== 'linting-pending' + }) + + const allowed: LintingStatus[] = ['linting', 'done'] // linting process is currently to short + chai.expect(ts.status).to.be.oneOf(allowed) + chai.expect(ts.error).to.be.undefined + + return modelId +} + +export const assertTrainingFails = async ( + args: AssertionArgs, + trainSet: TrainInput, + expectedError: TrainingErrorType +): Promise => { + const { client, logger, appId } = args + logger.debug('assert training fails') + + const contexts = _getContexts(trainSet) + const trainRes = await client.startTraining(appId, { ...trainSet, contexts }) + if (!trainRes.success) { + throw new UnsuccessfullAPICall(trainRes.error) + } + + const { modelId } = trainRes + chai.expect(modelId).to.be.a('string').and.not.to.be.empty + + const ts = await pollTrainingUntil({ + nluClient: client, + modelId, + appId, + maxTime: ms('5s'), + condition: (ts: TrainingState) => ts.status !== 'training-pending' && ts.status !== 'training' + }) + + chai.expect(ts.status).to.equal('errored') + chai.expect(ts.error?.type).to.equal(expectedError) +} + +export const assertQueueTrainingFails = async ( + args: AssertionArgs, + trainSet: TrainInput, + expectedError: http.ErrorType +): Promise => { + const { client, logger, appId } = args + logger.debug('assert queue training fails') + + const contexts = _getContexts(trainSet) + const trainRes = await client.startTraining(appId, { ...trainSet, contexts }) + + if (trainRes.success) { + throw new Error(`Expected Queue training to fail with error: "${expectedError}"`) + } + + const { error } = trainRes + chai.expect(error.type).to.equal(expectedError) +} + +export const assertCancelTrainingFails = async ( + args: AssertionArgs, + modelId: string, + expectedError: http.ErrorType +): Promise => { + const { client, logger, appId } = args + logger.debug('assert cancel training fails') + + const cancelRes = await client.cancelTraining(appId, modelId) + if (cancelRes.success) { + throw new Error(`Expected training cancel to fail with error: "${expectedError}"`) + } + + const { error } = cancelRes + chai.expect(error.type).to.equal(expectedError) +} + +export const assertTrainingCancels = async (args: AssertionArgs, modelId: string): Promise => { + const { client, logger, appId } = args + logger.debug('assert training cancels') + + const cancelRes = await client.cancelTraining(appId, modelId) + if (!cancelRes.success) { + throw new UnsuccessfullAPICall(cancelRes.error) + } + + const ts = await pollTrainingUntil({ + nluClient: client, + modelId, + appId, + maxTime: ms('5s'), + condition: (ts: TrainingState) => ts.status !== 'training' + }) + + chai.expect(ts.status).to.equal('canceled') + chai.expect(ts.error).to.be.undefined +} + +export const assertTrainingFinishes = async (args: AssertionArgs, modelId: string): Promise => { + const { client, logger, appId } = args + logger.debug('asserts training finishes') + + const trainProgressBar = new cliProgress.Bar({ + format: 'Training: [{bar}] ({percentage}%), {duration}s', + stream: process.stdout, + noTTYOutput: true + }) + trainProgressBar.start(100, 0) + + const updateProgress = (p: number) => { + if (p === 1) { + p = 0.99 + } + trainProgressBar.update(p * 100) + } + + try { + const ts = await pollTrainingUntil({ + nluClient: client, + modelId, + appId, + maxTime: -1, + condition: (ts: TrainingState) => { + updateProgress(ts.progress) + return ts.status !== 'training' + } + }) + trainProgressBar.update(100) + + chai.expect(ts.status).to.equal('done') + chai.expect(ts.error).to.be.undefined + } finally { + trainProgressBar.stop() + } +} + +export const assertLintingFinishes = async ( + args: AssertionArgs, + speed: IssueComputationSpeed, + modelId: string +): Promise[]> => { + const { client, logger, appId } = args + logger.debug('asserts linting finishes') + + const ts = await pollLintingUntil({ + nluClient: client, + modelId, + appId, + speed, + maxTime: -1, + condition: (ts: LintingState) => { + return ts.status !== 'linting' + } + }) + + chai.expect(ts.status).to.equal('done') + chai.expect(ts.error).to.be.undefined + return ts.issues +} + +export const assertTrainingsAre = async (args: AssertionArgs, expectedTrainings: TrainingStatus[]) => { + const { client, logger, appId } = args + logger.debug(`assert trainings are: [${expectedTrainings.join(', ')}]`) + + const lsTrainingRes = await client.listTrainings(appId) + if (!lsTrainingRes.success) { + throw new UnsuccessfullAPICall(lsTrainingRes.error) + } + const { trainings } = lsTrainingRes + const trainStatuses = trainings.map((ts) => ts.status) + chai.expect(trainStatuses).to.include.members(expectedTrainings) +} + +export const assertPredictionFails = async ( + args: AssertionArgs, + modelId: string, + utterance: string, + expectedError: http.ErrorType +) => { + const { client, logger, appId } = args + logger.debug('assert prediction fails') + + const predictRes = await client.predict(appId, modelId, { utterances: [utterance] }) + if (predictRes.success) { + throw new Error(`Expected Prediction to fail with error: "${expectedError}"`) + } + const { error } = predictRes + chai.expect(error.type).to.equal(expectedError) +} + +export const assertLanguageDetectionWorks = async (args: AssertionArgs, utterance: string, expectedLang: string) => { + const { client, logger, appId } = args + logger.debug('assert language detection works') + + const detectLangRes = await client.detectLanguage(appId, { utterances: [utterance], models: [] }) + if (!detectLangRes.success) { + throw new UnsuccessfullAPICall(detectLangRes.error) + } + const { detectedLanguages } = detectLangRes + chai.expect(detectedLanguages).to.have.length(1) + chai.expect(detectedLanguages[0]).to.equal(expectedLang) +} + +export const assertIntentPredictionWorks = async ( + args: AssertionArgs, + modelId: string, + utterance: string, + expectedIntent: string +) => { + const { client, logger, appId } = args + logger.debug('assert intent prediction works') + + const predictRes = await client.predict(appId, modelId, { utterances: [utterance] }) + if (!predictRes.success) { + throw new UnsuccessfullAPICall(predictRes.error) + } + const { predictions } = predictRes + chai.expect(predictions).to.have.length(1) + + chai.expect(predictions[0].contexts).to.have.length.greaterThanOrEqual(1) + const mostConfidentCtx = _.maxBy(predictions[0].contexts, (c) => c.confidence) + + const mostConfidentIntent = _.maxBy(mostConfidentCtx?.intents, (i) => i.confidence) + chai.expect(mostConfidentIntent?.name).to.equals(expectedIntent) +} + +export const assertModelsPrune = async (args: AssertionArgs) => { + const { client, logger, appId } = args + logger.debug('assert models can be pruned') + + const pruneRes = await client.pruneModels(appId) + if (!pruneRes.success) { + throw new UnsuccessfullAPICall(pruneRes.error) + } + + const modelRes = await client.listModels(appId) + if (!modelRes.success) { + throw new UnsuccessfullAPICall(modelRes.error) + } + + const { models } = modelRes + chai.expect(models).to.have.length(0) +} + +export const assertModelWeightsDownloadFails = async (args: AssertionArgs, modelId: string, expectedStatus: string) => { + const { client, logger, appId } = args + logger.debug('assert model weights download fails') + + const downloadRes = await client.modelWeights.download(appId, modelId, { responseType: 'stream' }) + if (downloadRes.status === 'OK') { + throw new Error(`Expected Model Download to fail with error: "${expectedStatus}"`) + } + + const { status } = downloadRes + chai.expect(status).to.be.eq(expectedStatus) +} + +export const assertModelWeightsDownload = async (args: AssertionArgs, modelId: string, fileLocation: string) => { + const { client, logger, appId } = args + logger.debug('assert model weights download') + + const downloadRes = await client.modelWeights.download(appId, modelId, { responseType: 'stream' }) + if (downloadRes.status !== 'OK') { + throw new UnsuccessfullModelTransfer(downloadRes.status, 'GET') + } + + await new Promise((resolve, reject) => { + downloadRes.weights.on('end', resolve) + downloadRes.weights.on('error', reject) + downloadRes.weights.pipe(fs.createWriteStream(fileLocation)) + }) +} + +export const assertModelWeightsUploadFails = async ( + args: AssertionArgs, + fileLocation: string, + expectedStatus: string +) => { + const { client, logger, appId } = args + logger.debug('assert model weights upload fails') + + const modelWeights = await fs.promises.readFile(fileLocation) + + const uploadRes = await client.modelWeights.upload(appId, modelWeights) + if (uploadRes.status === 'OK') { + throw new Error(`Expected Model Download to fail with error: "${expectedStatus}"`) + } + + const { status } = uploadRes + chai.expect(status).to.be.eq(expectedStatus) +} + +export const assertModelWeightsUpload = async (args: AssertionArgs, fileLocation: string) => { + const { client, logger, appId } = args + logger.debug('assert model weights upload') + + const modelWeights = await fs.promises.readFile(fileLocation) + + const uploadRes = await client.modelWeights.upload(appId, modelWeights) + if (uploadRes.status !== 'OK') { + throw new UnsuccessfullModelTransfer(uploadRes.status, 'GET') + } +} diff --git a/packages/nlu-e2e/src/datasets/clinc50_42.ts b/packages/nlu-e2e/src/datasets/clinc50_42.ts new file mode 100644 index 00000000..1cfe6bd4 --- /dev/null +++ b/packages/nlu-e2e/src/datasets/clinc50_42.ts @@ -0,0 +1,1359 @@ +import { TrainInput } from '@botpress/nlu-client' + +export const trainSet: TrainInput = { + language: 'en', + entities: [], + seed: 42, + intents: [ + { + name: 'translate', + slots: [], + contexts: ['main'], + utterances: [ + 'what expression would i use to say i love you if i were an italian', + "tell me how to say, 'it is a beautiful morning' in italian", + 'how do you say hi in french', + 'how should i say hello in french', + 'how do you say goodbye in spanish', + 'translate hello english to french', + 'what phrase means goodbye in hawaii', + 'what spanish word means hello', + 'how do you say hello in french', + 'what is spanish for hello', + 'how would you say can i borrow five dollars in portuguese', + 'can you translate hello into swedish for me', + 'can you translate i love you into french for me', + 'can you translate good bye into russian for me', + 'how do they say "where\'s the bathroom" in spanish', + 'how do they say yes in brazil', + 'can you translate cat into russian for me', + 'i wanna know a phrase in spanish', + 'i wanna learn a phrase in russian', + 'how do they say hello in germany' + ] + }, + { + name: 'transfer', + slots: [], + contexts: ['main'], + utterances: [ + 'take $20000 from savings and put it in checking', + 'send 50 dollars between bank of america and chase accounts', + 'send 2000 dollars between chase and rabobank accounts', + 'send 400 dollars between city bank and usaa accounts', + 'move $40 from account b to account a', + 'i would like to transfer from one account to my second one', + 'transfer sixty dollars to dad from my biggest accnt', + 'move 57 dollars from saving into mom', + 'i want you to send ten dollars from bank of america to capital one', + 'please transfer $100 from my checking to my savings account', + 'i want to transfer funds between accounts', + 'go ahead and move $200 from amazon to my bank account', + 'move my money please', + 'please switch $s checking to mortage', + 'please transfer $s from saving to checking', + 'help me transfer $x from credit to debit', + 'transfer 100 dollars checking to savings', + 'take 100 dollars from checking and send it to savings', + 'i need to throw some money into that other account', + 'i want to initiate a transfer from one account to the other' + ] + }, + { + name: 'timer', + slots: [], + contexts: ['main'], + utterances: [ + 'i need a 7 minute timer', + 'please set a timer for 40 minutes', + 'set timer for (x) minutes', + 'i need a 10 minute timer', + 'set a ten second timer', + 'i must set a timer', + 'set a timer for twelve seconds', + 'tell me when two minutes are up', + '"can u set a timer for 5 minutes', + 'you need to set the timer for me', + 'i would like a timer set', + 'i need you to set a time', + 'start a countdown for 20 minutes', + 'please start counting down from 10 minutes', + 'alert me in 20 minutes', + 'timer 10 minutes', + 'please run a timer for me', + 'set a seven minute timer', + 'set timer for 5 minutes', + 'please, can you set the timer for me' + ] + }, + { + name: 'fun_fact', + slots: [], + contexts: ['main'], + utterances: [ + 'lets hear an interesting fact about funko pops', + 'know any interesting facts', + 'lets hear a fact', + 'i want to learn an interesting fact about cats', + 'read me some fun facts', + "can you tell me something i don't know about banks", + 'do you know any fun facts about shampoo', + 'give me some trivia about lebron james', + 'do you know any fun facts about mt everest', + 'explain some trivia about lebron james', + 'tell me a fun trivia bit about artificial intelligence', + "i'd like to hear a fun fact about the nfl", + 'can you share some trivia with me about us history', + 'tell me some trivia about birds', + 'do you know any trivia about ostriches', + 'give me trivia about bats', + "what's a fun fact about mythology", + 'tell me a fun fact about butterflies', + 'what is a fun fact about dogs', + 'tell me some something trivia' + ] + }, + { + name: 'payday', + slots: [], + contexts: ['main'], + utterances: [ + 'on what date do i get paid', + 'what date do i get my next paycheck', + "when's the the last time i was paid", + 'when did i get my last paycheck', + 'do you know when i can expect my next paycheck', + 'i need to know when i got latest paycheck', + 'can you tell me what day my next paycheck will come', + 'when does my paycheck arrive', + 'when will my paycheck be available', + 'when do i get paid again', + 'what is my pay date', + 'when will i get paid next', + 'when should i expect my next paycheck', + 'when is the next time i get paid', + 'when is my paycheck coming', + 'when was i last paid', + 'tell me when my next paycheck will be here', + 'when was i paid last', + 'tell me on what day did i get paid last', + 'when did they deposit my last salary' + ] + }, + { + name: 'what_can_i_ask_you', + slots: [], + contexts: ['main'], + utterances: [ + 'what things can you help me with exactly', + 'what sorts of subjects are you well versed in', + 'tell me what you are capable of answering', + 'what information can i ask you', + 'what are the kinds of things you can help me with', + 'what kind of questions are you good at answering', + 'can you help me with anything i need', + 'what types of things are you able to do', + 'what questions do you respond to', + 'what things can you do', + 'what kinds of things can you do', + 'what subjects are you versed in', + 'can you tell me the subjects you know best', + "tell me the subjects you're aware of", + 'what kinds of questions can i ask of you', + 'are there certain types of questions i can ask you', + 'how can you assist me', + 'please inform me of the types of subjects are you familiar with', + 'i would like to know the types of subjects are you familiar with', + 'what are your capabilities' + ] + }, + { + name: 'confirm_reservation', + slots: [], + contexts: ['main'], + utterances: [ + 'can you check my reservations for mortons under david winters', + 'verify that my reservations at won wons for joe lee are good', + "can you please confirm that i have a 6:00 pm table reserved under michelle solomon at devon's", + 'please confirm my reservation at parc for 8:00 pm, with the name denise jack', + 'please confirm my reservation for thursday at 10am', + "can you verify that i have a reservation at o'reilly's for 9", + "do i have reservations at o'neal's at 3 pm", + 'do you know if my reservation for dono is confirmed at 8 pm', + 'i need to know if i have reservations at arufflo at 5 pm', + 'can you confirm my reservation for kevin at red robin', + 'i need you to confirm my reservation for kevin at red robin', + 'i need to confirm that i have a reservation at red robin for josh', + 'please confirm my reservation for red robin at 9', + 'confirm my reservation for red robin at 8', + 'can you confirm my reservation for 6 pm on the 13th of february', + 'can you verify mike has a reservation at black rock at 6 pm', + 'i would like you to confirm my reservation for next saturday at 11 am', + 'please confirm my reservation for march 8th at 12:00', + 'please confirm my reservation for feburary 21st at 6pm', + 'confirm my taco house reservation for jean' + ] + }, + { + name: 'who_made_you', + slots: [], + contexts: ['main'], + utterances: [ + 'what company owns your code', + 'who was the inventor of ai', + 'i want to know who made you', + 'tell me who made you', + 'who invented you', + 'which company is responsible for your design', + 'who did the programming for this ai', + 'are you made in the usa', + 'may i know who made you', + 'who is your creator', + 'who crafted you', + 'the programmer who made you is who', + 'who built you', + 'please identify the name of the creator of this ai', + 'how was this ai created', + 'will you tell me who made the ai', + 'ai who made you', + 'can you tell me who created you', + 'who brought you to life', + 'what company sponsored you' + ] + }, + { + name: 'distance', + slots: [], + contexts: ['main'], + utterances: [ + 'how long will it take to drive to the kroger in westwood', + "how long does it take to get to spago's in los angeles", + 'if i take the bus, how long will it take to get to dubrovnik', + "what's my eta on the bus ride to church", + 'how far is the airport in dallas from my current location', + 'tell me how far i am from home in time and miles', + 'how far is the grand canyon from my current location in phoenix, az', + 'how many hours will it take to get to my destination', + 'how many miles will it take to get to my destination', + 'where is the closest gas station', + "what's the estimated time to get to ppg paints arena in pittsburgh via bus", + 'from here, i take this bus to go to brooklyn and how long is the commute', + 'how long will it take to get to outback in phoenix', + 'how long will it take to get to phoenix', + 'how long does it take to get to virginia by bus', + 'what is the time frame to get to phoenix', + 'what is the length of time it takes to get to phoenix', + 'utilizing an automobile known as a bus, how long will it take to get to detroit', + 'what amount of time will pass in order for a person at our location to take bus to detroit', + 'time taken to reach doctors office' + ] + }, + { + name: 'user_name', + slots: [], + contexts: ['main'], + utterances: [ + 'is there a name that you call me', + 'by what name people call me', + 'say what you think my name is', + 'tell me what you think my name is', + 'what would you like to refer to me as', + 'what are you going to refer to me as', + 'you saved my name how', + 'how did you save my name', + 'by what nym do you think of me', + 'can you tell me what you refer to me as', + 'what is my name saved as in your system', + 'what is the name you associate with me', + 'what do you refer to me as', + 'how do you address me', + 'tell me what you believe my name to be', + "what's the name that you have for me, please", + 'do you call me a certain name', + 'i need to know what names you have for me', + 'so what is my name saved as', + 'so what is my current name saved as' + ] + }, + { + name: 'credit_score', + slots: [], + contexts: ['main'], + utterances: [ + 'inform me of my credit score', + 'what is my credit score', + 'please get my credit score', + "i'd like to know what my credit rating is", + 'please tell me my credit rating', + 'get my credit score', + 'how do i find information about my credit score', + 'how do i locate my current credit score', + 'i really wanna know my credit score', + 'i wanna know my credit score', + 'tell me what my credit rating is', + "i'd like the number for my credit score", + 'websites that share credit ratings', + 'where can i find my credit score', + 'help me locate my credit score', + 'lets look up my credit score', + 'i would like to look up my credit score please', + 'i want to find out what my credit score is', + 'show me my credit score', + 'is my credit report low' + ] + }, + { + name: 'new_card', + slots: [], + contexts: ['main'], + utterances: [ + 'can i apply for a discover card', + 'what does it take to get a new card', + 'please show me how to apply for a visa card', + 'i need to know how to apply for a visa card', + 'tell me how to apply for a visa card', + 'visa card, i want to apply', + 'can you apply for idaho independent card for me', + 'i wanna apply for a new credit card', + 'i got to apply for a new credit card', + 'i would like to apply for a visa card', + 'can you tell me if i might be able to apply for a new credit card', + 'can you help me apply for a visa card', + 'application for a mastercard', + 'what do i need to do to get an american express card', + "how do i apply for a barclay's card", + 'walk me through applying for a visa card', + "i want to submit an application for a barclay's card", + 'i need to know how to apply for a mastercard', + 'i feel like it is time for me to sign up for a new credit card', + 'could you please let me apply for a new credit card' + ] + }, + { + name: 'repeat', + slots: [], + contexts: ['main'], + utterances: [ + 'one more time please', + 'i did not hear you', + 'can you say that over', + 'repeat what you just said', + "i didn't catch that say it again", + 'please say that one more time', + 'hold on what was that you just said', + 'i did not quite hear you, can you say it again', + 'i would like you to repeat yourself', + "i didn't hear you can you repeat that", + 'could you repeat that, please', + 'i need for you to ask the question again', + 'repeat that please', + 'may you repeat what you said again', + "i didn't hear what you just said, can you repeat it", + 'i didnt understand what you just said, can you say it again', + 'what did you just mention', + 'please say it again', + "sorry, i didn't hear you, can you say that gain", + "i didn't hear you, can you repeat that louder" + ] + }, + { + name: 'todo_list_update', + slots: [], + contexts: ['main'], + utterances: [ + 'i need to add the chore of vacuuming to my task list', + 'put wash the counters down on my list of pending tasks', + 'please take shoveling the car off my todo list', + "i don't want to do anything today so just clear the todo list", + 'please include feeding the fish on my to do list', + 'please clear out my whole to do list', + 'please cross off schedule acupuncture appointment off of the to do list', + 'please also list wash laundry on my to do list', + "will you please put remember to drop off at the dry cleaner's to my current to do list", + 'would you kindly remove five mile run from my list of things to do', + 'i need to do dishes put it on my to do list', + 'cleaning needs to be on my to do list', + 'add cleaning to my to do list', + 'help remind me that i need to add laundry to my list of housework', + 'add laundry to my list of shit to do', + 'put laundry on my to do list', + 'cross volunteering off my todo list', + 'take everything off my todo list', + 'can you put mopping on my to do list', + 'place cleaning the backyard on my list of things to do' + ] + }, + { + name: 'uber', + slots: [], + contexts: ['main'], + utterances: [ + 'i need an uber for 6 people to the movies', + 'book a 6 person uber to the movies', + "please book an uber for chima's", + 'can you please book an uber to the zoo', + 'get an uber to pick me up at work and take me home', + 'find an uber xl to take me to the kroger near me', + 'i need an uber with a car seat for 2 adults and one child to take us to wilmington, de', + 'can i get an uber to central park for 5', + 'i want to reserve an uber to go to the airport', + 'help me get an uber to ann arbor', + 'i need an uber to the sears tower', + 'can you get me an uber to the science museum', + 'am i able to get an uber to the movies', + 'i need an uber to the movies', + '3 of us need to get to union station via uber', + 'can i get an uber to union station for 3 riders', + 'get me an uber to the airport', + 'i am stranded and need uber', + 'i need an uber for 2 for orlando', + "i'm going to need an uber to take 4 people to the mall" + ] + }, + { + name: 'calculator', + slots: [], + contexts: ['main'], + utterances: [ + 'what is 7 x 7', + 'what is the sum of 10 plus 5', + 'what is 4 + 4', + 'what is 4 x 4', + 'what is 20+ 5', + 'what is the square root of 10294', + 'what is 213 times 3', + 'what is the square root of 144', + 'what is 78 times 85', + 'help me with math', + 'what is 10 to the 12th power', + 'how many times can 12 go into 600', + 'add twelve and twelve please', + 'what is 10 + 10', + 'what is the square root of 5', + 'what is 38% of 389209', + 'what is 1 fifth times 2 fifths', + "what's 3 plus 3", + 'what’s the answer to 5-6=', + 'what does 6 x 1 equal' + ] + }, + { + name: 'carry_on', + slots: [], + contexts: ['main'], + utterances: [ + 'how my carry ons does spirit airlines allow me for a flight to dallas', + 'could you list out the carry-on restrictions for american airlines', + 'is there are limit of carry ons for my flight with aer lingus to cork', + 'delta has too many carry-on restrictions! do you know them', + 'can i carry on a garment bag, a small travel bag, and a back pack on my trip with american airlines', + 'what are the carry-on limits for flying domestically with delta', + 'how many carry ons can i take on a flight with united to lax', + 'what is the carry-on policy say for flights on delta airlines', + 'what are the carry-on restrictions for frontier airlines', + 'how many bags can i carry-on for flights on singapore airlines', + 'what are the carry-on restrictions for southwest airlines', + "what can't i carry-on to delta", + 'what is the carry on limit', + "what are delta's carry-on policies for flights", + 'how strict is spirit when it comes to carry ons', + 'can i have 3 carry ons with delta at jfk', + 'what are the requirements for carry on on flights with united airlines', + 'please find the rules for carry on when flying with spirit airlines', + 'on a flight with allegiant to orlando, how many carry ons can i take', + 'when taking a flight with porter to toronto, how many carry ons can i take' + ] + }, + { + name: 'schedule_maintenance', + slots: [], + contexts: ['main'], + utterances: [ + 'locate someone to look at my car because my check engine light is on', + 'i need to find somewhere to check my tires out', + 'i need to get my tired checked at one of the location', + 'i want to to schedule maintenance on my vehicle', + 'find local deals who schedule maintenance on cars', + 'please schedule an appointment for my oil to get changed', + 'my car needs maintenance scheduled', + 'my check engine light is on and someone needs to look at it', + 'set an appointment for an oil change', + 'book an oil change please', + 'can you please obtain an oil change appointment for me', + 'where can i get my tires checked and how can i schedule it', + 'i need to schedule a car repair', + 'how do i schedule car maintenance', + 'i wanna schedule some car maintenance', + 'can someone look at my car cause the check engine light is on', + 'i got to schedule some car maintenance', + 'who can look at my car, the engine light is on', + "i don't know why my check engine light is on, i need a mechanic", + 'find me a mechanic' + ] + }, + { + name: 'ingredient_substitution', + slots: [], + contexts: ['main'], + utterances: [ + 'instead of pepper, can i use salt', + 'can i take out the olive oil and use lard', + 'can i substitute rice for potatoes', + 'can i substitute skim milk for whole milk', + 'can i use dark brown sugar instead of brown sugar', + 'can i substitute apple juice for wine', + 'can i use flour as a substitute for bread crumbs', + 'i need to swap salt for baking soda', + 'i wanna sub sugar for salt', + 'can i use sour cream in place of creme fraiche', + 'can i swap yogurt for sour cream', + 'is almond milk an acceptable substitute for milk', + 'can i swap coconut milk for condensed milk', + 'can i substitute milk for water', + 'can i substitute salt for pepper', + 'can i use margarine instead of butter', + 'is it possible to use sausage instead of hot dog', + 'is it ok to substitute spinach for lettuce', + "can i use normal flour if i don't have bread flour", + 'can i use mayonnaise instead of miracle whip' + ] + }, + { + name: 'todo_list', + slots: [], + contexts: ['main'], + utterances: [ + 'what is left to do today', + 'the tasks for today, what are they', + 'do i have cleaning the counters on my to-do list', + 'tell me what is on the list of things to do', + 'tell me what is on my to do list', + 'please read my todo list', + 'do i have brush my teeth on my todo list', + 'what chores do i have waiting on my reminder list', + 'is doing my laundry already on my todo list', + 'did i add purchase tickets to the penguin game to my todo list', + 'when is babysitting on my to do list', + 'can you please tell me what is on my to-do list', + 'i wonder what my to-do list looks like for today', + "go ahead and say all of the points on my list of to-do's please", + 'what have i got on my to-do list', + 'please let me know if giving the dog a bath is on my list of tasks to complete', + 'please inform me of what tasks i have listed on my to do list', + 'can you read my to do list to me please', + 'check my to do list to see if feeding the fish is on it', + 'do i have watering the plants on my to do list' + ] + }, + { + name: 'change_accent', + slots: [], + contexts: ['main'], + utterances: [ + "could you change the accent you're using", + 'i want your accent changed to the male british one', + 'i need for you to change your accent to the male british one', + 'change to a male voice', + 'please change your accent to a british male', + 'is there a way to change the voice to male', + 'go ahead and switch to the female voice', + 'please change to the female voice', + 'i wanna change to the female voice', + 'switch over to the female voice', + "is it possible for you to speak with a british gent's accent", + 'can i get you to talk like a pouty english guy', + 'i want to hear a male british accent', + 'use male voice now', + 'speak as a british male', + 'change to female voice', + 'can you swap to male voice', + 'i want to use the female voice now', + 'i prefer female voice now', + 'switch to male british accent' + ] + }, + { + name: 'bill_due', + slots: [], + contexts: ['main'], + utterances: [ + 'how much time left to pay my bill', + 'when is my xfinity bill due', + 'how many more days before my verizon bill is due', + 'what is the due date of my bill', + 'i would like to know my electric bills date it needs to be payed', + 'i need to know the due date for my credit card', + 'how do i know when to pay my gas bill', + 'give me the date my bill is due', + 'on what day do i have to pay my nordstrom bill', + 'when is my chase visa due', + 'whats the deadline for amex payment', + 'do you know when i need to pay my mastercard', + "when's the next time i have to pay the insurance", + 'when does the electric bill up', + 'what do i do to check when my next credit card payment is', + 'when is my at&t bill due', + 'i want to know when a bill is due', + 'what day is the bill due', + 'when is the bill due', + 'how do i find when my cable bill is due' + ] + }, + { + name: 'calories', + slots: [], + contexts: ['main'], + utterances: [ + "what's the expected calories in a cream filled cookie", + "what's the calorie count for tuna casserole", + "what's the amount of calories in an cream filled oatmeal cookie", + 'how many calories should i plan to be in a donut', + 'how many calories would i estimate for fettuccine alfredo', + "what's the caloric content if you eat roast beef", + 'do cheetos have a lot of calories', + 'how many calories are in a cup of white rice', + 'what is the calorie count for one cookie', + 'if i ate a cookie, how many calories would i be ingesting', + 'how many calories in gum', + 'please tell me the total calories a single serving of chocolate ice cream is expected to contain', + 'please tell me how many calories one chocolate bar contains', + 'what is the calorie count in a cookie', + 'what is the calorie count for muffins', + 'what amount of calories are in one muffin', + 'look up the calories in an apple', + 'tell me the calorie content of an apple', + 'how many calories are in an apple', + 'how many calories are in cheerios' + ] + }, + { + name: 'damaged_card', + slots: [], + contexts: ['main'], + utterances: [ + 'what should i do with an unusable and damaged card', + 'my card got melted and i need to report it', + 'my card got melted in the dryer and i need to report it, please', + 'the magnetic strip on my card is scratched, let the card company know', + 'my card is cracked, please tell the card company', + "my card is too scratched and won't read correctly", + 'the sticker on my card is peeling off', + 'my card is partially damaged how do i report this', + 'my card is not working and i need to let them know', + 'the chip on my card is damaged and i need a new one', + 'my card is cracked and i want to report it', + 'report to the company that i damaged my card', + 'call the card company and ask them to replace my card', + 'i need a report form for my damaged, demagnetized card', + 'how to tell the company that my credit card melted', + 'how do i report a melted credit card', + 'i accidentally put my credit card in the shredder', + 'i was using my credit card as a cutting board and accidentally sliced it in half', + 'can you assist me with reporting that the atm nicked my card', + 'my card fell in the toilet and now the chip does not work' + ] + }, + { + name: 'restaurant_reviews', + slots: [], + contexts: ['main'], + utterances: [ + 'does have good reviews have great reviews', + 'so does outback steakhouse have good reviews', + 'what are people saying about chipotle', + 'what are the ratings for chipotle', + 'how good are the ratings for chic-fil-a', + 'does longhorn steakhouse have good reviews', + 'how many stars does la vignette have', + "are the chili's reviews any good", + 'i want to know how the mcdonalds reviews are', + 'how good are the ratings for chez panisse', + 'what are the reviews like for bar tartine', + 'tell me the reviews for bjs', + 'what are the ratings like for chilis', + 'what are the reviews for mountain mikes', + 'i need to hear reviews for panda express', + 'does chillis have good reviews', + "are there any positive reviews for wendy's", + 'pull up the ratings for macaroni grill', + 'has ruby tuesday got good reviews', + "tell me how good ihop's ratings are" + ] + }, + { + name: 'schedule_meeting', + slots: [], + contexts: ['main'], + utterances: [ + 'i want to know if there is meeting room available at 8', + 'meeting room availability from 8:00 please', + 'are there any meetings room available between 8:00 and 9:00 am', + 'i want to check if there is any meeting room available between 8:00 and 10:00 am', + 'meeting room availability at 8:00 please', + 'do you know how do i schedule a meeting', + 'can you schedule a meeting with james at the office, please', + 'how do i create a meeting', + 'can you schedule a meeting with james at the office', + 'do you have a meeting room open from noon until 2:00 pm', + 'i want to schedule a meeting with tom for 6pm', + 'i want you to schedule a meeting with carrie and lisa', + 'could you schedule a meeting with john smith at 1 pm tomorrow', + 'can you schedule a meeting with damon for 1', + 'are any meeting rooms open between 9 and 10', + 'i need to schedule a meeting with mae at 5pm', + 'is it possible to book a meeting room between 10 and 11', + 'check if meeting rooms are free from 6 to 7', + 'are there rooms available between 5 and 530', + 'if i want to schedule a meeting, how do i do it' + ] + }, + { + name: 'exchange_rate', + slots: [], + contexts: ['main'], + utterances: [ + "what's the exchange rate between dollars and pesos", + 'whats dollars won in 15', + "what's dollars yen in 10", + 'whats euros kroner in 25', + 'i wanna know five dollars in yen and rubles', + 'what is 5 in yen and rubles', + "what's the currency conversion between usd and pounds", + 'tell me five dollars in yen and rubles', + 'how many dollars can i exchange for 100000 yen', + 'usd to yen is what right now', + 'what is the exchange rate from pounds sterling to us dollars', + 'tell me how many pesos equal 500 dollars', + 'how does 10 pounds convert to euros', + 'how many dollars is one mexican peso', + 'what is the current exchange rate for the dollar and the yen', + 'what is the exchange rate between rubles and us dollars', + 'how many dollars can i exchange for 200 pounds', + 'how many canadian dollars can i exchange for 200 yen', + 'how many dollars can i exchange for 200 yen', + 'what is the yen worth in terms of the peso' + ] + }, + { + name: 'change_volume', + slots: [], + contexts: ['main'], + utterances: [ + 'raise the volume to 4 please', + 'make the speakers louder', + 'go ahead and your volume', + 'you must increase your volume', + 'please increase your speaker volume', + 'please increase the volume to 4 please', + 'i need you to increase your volume', + 'please go ahead and increase your speaker volume', + "i can't hear you", + 'would you please increase your speaker volume', + "i'd like to turn up the volume", + 'turn up the speaker volume', + 'turn your vocals down', + 'turn your voice box down', + 'turn down your volume', + 'change the volume to 4 now please', + 'i want volume set to 4', + 'turn the volume to level 4', + 'turn down volume', + 'increase volume' + ] + }, + { + name: 'accept_reservations', + slots: [], + contexts: ['main'], + utterances: [ + 'will qdoba take reservations', + 'does gramercy tavern in new york accept reservations', + "can you make reservations at steak 'n' shake", + 'will nobu take reservations', + 'do they take reservations at carrabbas', + 'do they take reservations at applebees', + 'does applebees in trenton do reservations', + 'does stanleys take brunch reservations', + 'i need to know if capones does reservations in trenton', + 'does marios in brooklyn take reservations', + 'can you tell me if zeus does reservations', + 'do you know if cheese cake factory does reservations', + 'does iron skillet at the truck stop trake reservations', + 'does moes in la except rerservations', + "does chili's take reservations", + "does michael's accept reservations", + "can you make a reservation at michael's", + "does the restaurant michael's take reservations", + "do they take reservations at mendy's", + 'do you know if outback allows reservations' + ] + }, + { + name: 'account_blocked', + slots: [], + contexts: ['main'], + utterances: [ + "i don't know the reason my account is blocked", + 'find out for me why is my bank account frozen', + 'tell me why am i locked out of my bank account', + 'what is the reason i am locked out of my bank account', + 'why is my account blocked', + 'find out the reason why am i locked out of my bank account', + 'figure out why there is a block on my account please', + 'why is there a block on my account', + 'can you please unlock my bank account', + 'why cannot i take any money out from my bank account', + 'my checking account has a hold on it and i need to know why', + 'did i do something to get my bank account frozen', + 'what would cause me to be locked out of my bank account', + 'i am very confused about why or how my bank would lock me out of my account', + 'why would the bank have frozen my account', + 'why was a hold placed on my wells fargo account', + 'why is my account locked', + 'why am i seeing a hold on my charles schwab account', + 'do you know why my account is blocked', + 'help, my account is blocked' + ] + }, + { + name: 'report_fraud', + slots: [], + contexts: ['main'], + utterances: [ + "i'm pretty sure this charge from sam's club is fraudulent", + 'can you help me with some fraudulent charges on my card', + 'i suspect fraudulent transaction', + 'i need to report fraudulent activity on my card', + 'i need to make a report due to fraudulent activity on my card', + 'due to fraudulent activity on my card i need to make a report', + 'how do i report a fraudulent charge on my visa', + "i believe there's fraud on my card", + 'i need to report fraudulent activity on my mastercard', + "i have transactions on my card that aren't mine", + 'someone misused my card and put fraudulent transactions on it', + 'i think my chase account has been compromised and fraud committed', + 'i think someone made an illegal charge to my card', + "i'm afraid there is a false transaction on my account", + 'i need help investigating a suspicious transaction', + 'i believe there are fraudulent charges on my card how can i report them', + 'send information about suspicious credit card activity', + 'i want to report fraudulent activity on my visa card', + 'i want to report fraudulent activity on my amex card, please', + 'i need to know how to report fraud on my discover card' + ] + }, + { + name: 'measurement_conversion', + slots: [], + contexts: ['main'], + utterances: [ + 'what amount of miles are in a hundred kilometers', + "what's 8 ounces in cups", + 'how do you convert ounces to grams', + 'how do you convert pounds to kilos', + 'how many inches and centimeters would i need if i had 10 feet of something', + 'what would four inches be in centimeters', + 'how do i convert four inches into centimeters', + 'what are four inches in centimeters', + 'help me to understand the conversion between tablespoons and teaspoons', + 'tell me how to convert grams into ounces', + 'what is the proper way to convert centimeters into inches', + 'how many ounces are in a cup', + 'how many tablespoons are in three cups', + 'how do i convert inches and centimeters', + 'convert 2 inches to meters', + 'how many inches are in 5 feet', + 'how can i change centimeters into inches', + 'how many meters are in 10 millimeters', + 'how can you convert miles to kilometers', + 'how would you convert yards to inches' + ] + }, + { + name: 'min_payment', + slots: [], + contexts: ['main'], + utterances: [ + "what's the smallest amount i can pay on the water bill", + "what is my bill's minimum payment", + 'tell me the minimum payment of my bill', + 'what is the minimum i can pay for my light bill', + 'what do i have to pay on my amex that is the minimum', + 'tell me the lowest amount i can pay for my cable bill', + 'what is the lowest amount i can pay for my cable bill', + "i wanna know the bill's minimum payment", + 'what is the minimum amount of money required as payment for the bill', + 'what is the minimum payment i can make on my xcel energy bill', + 'what is the smallest amount i can pay on my tmobile bill', + 'i would like to know the minimum payment for my credit card', + 'i need to know what the minimum payment is on my electric bill', + 'how much is the minimum payment for power bill', + 'how low can i reduce my phone bill before paying', + 'what is the lowest i can pay on my phone bill', + 'what is the minimum i can pay for my phone bill', + 'how little can i pay for my water bill', + 'tell me minimum to pay on landscape bill', + 'show me the minimum payment for my boat bill' + ] + }, + { + name: 'international_visa', + slots: [], + contexts: ['main'], + utterances: [ + 'i want to visit portugal do i need a travel visa', + 'is it necessary to apply for a tourist visa for mexico', + 'do i need an international visa to go to hong kong', + 'i am going to travel to mexico soon; do i need to get a visa', + 'does mexico require me to have a visa to travel there', + 'do i need a visa to scotland', + 'do i need a visa to russia', + 'do i need a travel visa to visit russia', + 'do i need an international visa to go to mexico', + 'do i need an international visa to go to england', + 'do i need an international visa to go to spain', + 'is a visa required to visit cancun', + 'do i need a visa to travel to canada', + 'would i need to get a visa to go to ireland', + 'do i need a visa to get into canada later this year', + 'does travelling to that place require a visa', + 'does laos require a travel visa', + 'do i have to have a visa to go to denmark', + 'does france have their own version of a visa', + 'do i need an international visa to enter canada' + ] + }, + { + name: 'reset_settings', + slots: [], + contexts: ['main'], + utterances: [ + 'go back the the settings you had originally', + 'go back to your original settings', + 'reset the factory settings now please', + 'change to factory settings now', + 'i want you to reset your factory settings', + 'please go back to the original settings', + 'go back to the original settings now', + 'please go to factory settings now', + 'go back to the factory settings for this device', + 'do a hard reset', + 'you need to reset your factory settings', + 'reset to your natural settings', + 'please change back to initial device settings', + 'revert to factory settings please', + 'restore your original settings, please', + 'revert to your factory settings, please', + 'change back to default settings', + 'go to factory settings', + 'i want you reset to factory settings', + 'i want to reset your settings to the original' + ] + }, + { + name: 'what_is_your_name', + slots: [], + contexts: ['main'], + utterances: [ + "what is the ai's name", + "i'd like to know what to call you", + 'what do people call you', + 'what name should i use when talking with you', + 'is there a name that you preferred to be called by', + 'what name do you like to go by', + 'i want to know the name that was given by the person who made you', + 'what is you name', + "what's your first name", + 'how can i get your attention', + 'how are you named', + "what's your designation", + "i'd like your name", + 'what name do you want to be called', + 'i want to know your name', + 'ai, what can i call you', + 'could you tell me your name', + 'would you like to introduce yourself', + 'what name should i use to address you', + 'what name should i use when i call you' + ] + }, + { + name: 'direct_deposit', + slots: [], + contexts: ['main'], + utterances: [ + 'give me instructions to set up direct deposit for my paycheck', + "i'd like to set up a direct deposit for my paycheck", + 'i want my paycheck to go directly to my bank account', + 'set up direct deposit to my money market account for my pay check', + 'how do i get direct deposit for my paycheck', + 'set up payroll direct deposit to my checking account', + 'assist me to set up direct deposit', + 'tell me how to set up direct deposit for my paycheck', + 'how do i direct deposit my paycheck', + 'i would like to set up a direct deposit, please tell me how', + 'what do i need to do to start direct deposit', + 'how do i go about setting up direct deposit', + 'can you show me how to set up my paycheck to be direct deposit to my first hawaiian bank account', + 'how do i set up direct deposit to my bank of hawaii account', + 'help me set up a direct deposit', + 'onpay gives you two convenient ways to pay your employees', + 'i want to switch to direct deposit', + 'can you show me how to set up direct deposit', + "what's needed to direct deposit my paycheck", + 'can i get paychecks directly deposited to my bank of america account' + ] + }, + { + name: 'what_are_your_hobbies', + slots: [], + contexts: ['main'], + utterances: [ + "what's your favorite hobby", + 'what do you do for fun', + 'what are your favorite things to do', + 'i want to know if you have hobbies', + 'what are you hobbies', + "what do you do when you're not working", + "what do you like to do when you're not working", + 'what hobbies do you like doing', + 'what other hobbies do you enjoy', + 'what do you enjoy doing while not working', + 'i would like to know what hobbies you enjoy', + 'what do you do for hobbies', + 'what sorts of hobbies do you enjoy', + 'share some of your hobbies with me', + 'what makes you happy in your free time', + 'what do you do in your down time', + 'what hobbies do you enjoy in your free time', + 'tell me what you do in your free time', + 'what you do at the free time', + 'i gotta know your hobbies' + ] + }, + { + name: 'shopping_list', + slots: [], + contexts: ['main'], + utterances: [ + 'i need to know the items on my shopping list', + "what are my shopping list's content", + 'what things are on my shopping list', + 'on the shopping list did i put tomato', + 'do i have tomato on the shopping list', + 'display shopping list', + 'can you read my shopping list to me', + 'what all do i have on my shopping list', + 'can you tell me what i have on my shopping list', + 'my shopping list contains what', + 'what do i need to get when i go shopping', + 'what do i have on my list for shopping', + 'what products are on my shopping list', + 'what did i put on my shopping list so far', + 'i need to know what is on my shopping list', + 'do i need to get milk', + 'tell me what is on my shopping list', + 'what do i need to go shopping for', + 'what do i need when i go shopping', + 'do i have a big shopping list' + ] + }, + { + name: 'text', + slots: [], + contexts: ['main'], + utterances: [ + 'text roderick and tell him im running late', + 'can you text arthur and tell them im almost there', + 'send a text and tell chris i’m not working today', + 'please text nancy and give them the message that i am on vacation', + 'give a text to my cousin linda that says happy birthday', + 'send sal a text and tell them thanks', + 'text sal and let them know the answer is yes', + 'please send ann a text and tell her i will be home in 20 minutes', + 'i want to send a text message to', + 'create a text message to', + 'send a message to natalie in the form of a text', + "compose a text to mom i'll talk to you later", + "text mom and let her know i'll be there soon", + 'text alice', + 'can you send a text to my mother irene and tell her i love her', + 'can you text the sgt adam and tell them i found the missing girl', + 'text william and ask him to pick up organic apples tonight', + 'text someone', + 'text my friend for me and tell them to meet me later', + 'text my dad for me and tell them i love you' + ] + }, + { + name: 'redeem_rewards', + slots: [], + contexts: ['main'], + utterances: [ + 'i am ready now to cash in on the many credit card points i have saved up', + "i have credit card points but don't know how to use them", + 'i would love to redeem some smart speakers with my credit card points', + 'could i redeem my credit card points on a new tv', + 'explain to me how to cash in rewards for my mastercard', + 'can you help me begin the process of redeeming credit card points', + 'how can i cash in rewards on my discover card', + 'i want to know if i can redeem my credit card points now', + 'are there any steps i need to take to use my credit card points', + 'can i redeem my credit card points for anything', + 'what steps do i take to redeem my credit card points', + 'if i want to cash in my american express rewards, how do i do that', + 'what do i need to do for my mastercard rewards', + 'how can i go about cashing in rewards with discover', + 'please tell me the procedure to cash in my rewards for my citibank card', + 'i want to utilize my credit card points', + 'can i redeem my credit card points to cash', + 'i want to cash my credit card points', + 'can i redeem my credit card on safeway', + 'how do i get cash back for the rewards on my amex card' + ] + }, + { + name: 'play_music', + slots: [], + contexts: ['main'], + utterances: [ + 'can x’s music be played', + "i'd like to hear my workout playlist", + 'put my music on, please', + 'rock playlist please', + 'turn on the rap playlist', + 'i want to listen to my maroon 5 playlist, play it for me', + 'can you play music the beatles', + 'play help! by the beatles', + "please play the song that goes like hey jude don't make it bad", + 'play heart shaped box', + 'i want to hear some music', + 'play my post-rock playlist', + 'resume my death metal playlist', + 'play me some tunes', + 'play some rock', + 'find the song with, "baby shark, doo doo', + 'that "baby shark, doo doo," song, i want to hear it', + 'play songs by the beatles', + 'can you play elvis', + 'do you have music by elvis' + ] + }, + { + name: 'are_you_a_bot', + slots: [], + contexts: ['main'], + utterances: [ + 'tell me if you are a real person or an ai', + 'am i talking to a real person', + 'are you real or fake', + 'are you a human or a bot', + 'am i talking with real person or ai', + 'are you a real human', + 'i need to know if you are a bot', + 'are you actually a bot', + 'are you a computer, or are you a real person', + 'are you real or automated', + 'are you a human being or a robot', + 'are you alive', + 'are you a computer', + 'are you a person', + 'are you an actual person', + 'how real are you', + 'should i regard you as a human or as a computer', + "can you tell me if you're human or if you're a computer", + 'do you consider yourself a bot', + 'are you a real person or a robot' + ] + }, + { + name: 'tell_joke', + slots: [], + contexts: ['main'], + utterances: [ + 'please, recite to me some good one liners', + 'do you know any good jokes about zebras', + 'what are some funny things about food', + 'indicate something funny on the topic of food', + 'would you tell me a joke', + "tell me a joke if you'd like to", + 'know any jokes', + 'could you please tell me a good joke', + 'can you tell me a few funny jokes', + 'could you please share a joke with me', + "i'd like to hear a joke", + 'what’s your favorite funny joke', + 'what are some funny jokes', + 'let me know something funny', + 'tell me a joke about cats', + 'tell me a joke about rats', + 'tell me something funny about cats', + 'mind sharing a joke', + 'could you share a joke', + 'i would love to hear a joke about the dmv' + ] + }, + { + name: 'change_ai_name', + slots: [], + contexts: ['main'], + utterances: [ + 'from now, your new name will be lord vader', + "from now on i'm going to call you ishmael", + 'i want to change your name to another name', + 'i would like to change your name to a different one', + 'do you mind if call you nikolai', + "i'd like to refer to you as my house from now on", + 'is it alright with you if i call you scarlet', + 'please change the ai name for me', + 'i gotta change your name to remy', + 'i wanna change your name to audrey', + 'i want to set your name to gaffigan', + 'set your name to hillary', + "i'd like to use another name for you", + 'can i change your name to buttercup', + 'could i please change your name to alicia', + 'i want to change your name', + 'i want to change your name, please', + 'can i refer to you as mike from here on out', + 'can i start calling you chris', + "i'm calling you rupert from now on" + ] + }, + { + name: 'how_old_are_you', + slots: [], + contexts: ['main'], + utterances: [ + 'what is your birthday again', + 'what date were you created on', + 'can you tell me how old you are', + "what is al's age", + 'you are how old now', + 'how many years are you', + 'what would your age be', + 'i am wanting to know how old you are', + 'would you tell me your age', + 'could you let me know how old you are', + 'could you tell me when you were born', + 'do you have a birth date', + "what's your age siri", + "what's your age alexa", + 'how old are you today', + 'on what day and date of what year were you born', + "you're how old", + "what's your birth date", + 'what is the date of your birth', + 'tell me your birthday' + ] + }, + { + name: 'jump_start', + slots: [], + contexts: ['main'], + utterances: [ + 'can you help me with the steps to jump start my car', + 'i need your help to jump start this car battery', + 'please assist me in figuring out how to handle a dead car battery', + 'what is the way to jump start the car', + 'closest auto parts store to find replace for dead car battery', + 'find repair shop that will diagnosis dead car battery', + 'find auto store that tests dead car batteries for free', + 'what are some ways to jump start a car', + 'if i think my car battery is dead, what steps should i take to fix it', + 'how do i jump start my vehicle', + 'how do i give my car a jump', + 'i need to jump start my car what do i do', + 'what do i do to fix a dead car battery', + 'what should i do if my car battery is dead', + "what's the method of jumping my car", + 'do you have instructions on jump starting a chevy', + 'how do i properly give my car a jump', + "what's next when it seems there's a dead car battery", + "how should i proceed if my car won't start and i think it's the battery", + 'how can i start my car using a jump start' + ] + }, + { + name: 'meal_suggestion', + slots: [], + contexts: ['main'], + utterances: [ + 'suggest a meal from laos to me, please', + 'can you give me a thai meal suggestion, please', + 'can you give me a vietnamese meal suggestion, please', + 'can you give me a burmese meal suggestion, please', + 'suggest a meal from thailand to me', + 'what can i make for dinner using ground beef', + "i'd like to get dinner suggestions for thai food", + 'would you provide me with italian cuisine dinner suggestions', + "what's the best pizza place around here", + 'what kind of mexican meal should i get', + "i can't decide on dinner, what do you suggest", + "what's a good french meal", + 'i want you to give me a suggestion for what to make for dinner', + 'i need an italian meal suggestion', + 'i need you to give me a suggestion for what to make for dinner', + 'i would love an italian meal suggestion', + 'please suggest a british meal for me', + 'please provide an option for dinner', + 'i would like to hear what ideas you have about making dinner tonight', + 'do you have any suggestions as to what i should cook for dinner' + ] + }, + { + name: 'recipe', + slots: [], + contexts: ['main'], + utterances: [ + 'how do i fry pork chops', + 'how do i cook boiled eggs', + 'show me a beef recipe', + 'making spaghetti sauce perfectly is done how', + 'what are the steps i need to follow to make chewy rice krispies treats', + "what's the best way to make chicken stir fry", + 'will you give me some guidance on how to make ceviche', + 'can you search for the best new york style cheesecake recipes please', + 'how do you make pot roast', + 'find a good recipe for meatloaf', + 'how do you cook meatloaf', + 'i need a recipe for chicken pot pie', + 'how do i bake a blackberry pie', + 'find me a recipe for teriyaki chicken', + 'find out how to make chicken tikka masala', + 'please show me a recipe for chili', + 'can you find me a recipe for salsa', + 'can you find me a recipe for honey ham', + 'how do you create pork chili', + 'can you instruct me on how to make german chocolate cake' + ] + }, + { + name: 'income', + slots: [], + contexts: ['main'], + utterances: [ + 'i need details on my income', + 'tell me information about my income', + 'do you know anything about my income', + 'i want income data', + 'what do i usually earn in a day', + 'how much should my income be for the month', + 'how much do i make at work', + 'how much can i expect on my next check', + 'what amount of money do i earn at work', + 'i need to know how much i make at my job', + 'calculate how much i earn from my job', + 'could you tell me about my income', + 'i want you to tell me about my income', + 'what is my hourly rate', + 'can you tell me what my income will be this week', + 'how much do i make per day', + 'what do i earn on average at my job', + 'what do you make in a year', + 'i wanna know my income', + 'how much cash do i earn' + ] + } + ] +} diff --git a/packages/nlu-e2e/src/datasets/clinc50_666.ts b/packages/nlu-e2e/src/datasets/clinc50_666.ts new file mode 100644 index 00000000..13a04f52 --- /dev/null +++ b/packages/nlu-e2e/src/datasets/clinc50_666.ts @@ -0,0 +1,1359 @@ +import { TrainInput } from '@botpress/nlu-client' + +export const trainSet: TrainInput = { + language: 'en', + entities: [], + seed: 666, + intents: [ + { + name: 'timer', + slots: [], + contexts: ['main'], + utterances: [ + 'i need a 7 minute timer', + 'please set a timer for 40 minutes', + 'set timer for (x) minutes', + 'i need a 10 minute timer', + 'set a ten second timer', + 'i must set a timer', + 'set a timer for twelve seconds', + 'tell me when two minutes are up', + '"can u set a timer for 5 minutes', + 'you need to set the timer for me', + 'i would like a timer set', + 'i need you to set a time', + 'start a countdown for 20 minutes', + 'please start counting down from 10 minutes', + 'alert me in 20 minutes', + 'timer 10 minutes', + 'please run a timer for me', + 'set a seven minute timer', + 'set timer for 5 minutes', + 'please, can you set the timer for me' + ] + }, + { + name: 'meaning_of_life', + slots: [], + contexts: ['main'], + utterances: [ + 'does life have a meaning', + "what's the answer to existence", + "what's the point of sentience", + "what's lifes meaning", + 'i wish to know the meaning of life', + 'what do you think is the meaning of like', + 'what is your version on the meaning of life', + 'whats your take on the meaning of life', + 'what is life all about', + 'do you know the meaning of life', + 'what is the underlying purpose to life', + 'what does life mean to you', + 'is there really a meaning of life', + 'can you share the meaning of life', + 'research the meaning of life', + 'can you tell me what you think the meaning of life is', + 'is there a reason people exist', + 'what is the secret or meaning to life', + 'what do people say life means or the meaning is', + 'can you share with me your interpretation of the meaning of life' + ] + }, + { + name: 'find_phone', + slots: [], + contexts: ['main'], + utterances: [ + 'i left my phone somewhere', + 'ai, please help me find my phone', + 'ai locate my phone', + 'my phone is lost, can you help with that', + 'help me find my phone', + 'what is the location of my phone', + 'my phone is currently lost', + "i don't remember where my phone is", + 'i need to find my phone', + 'where did i last place my phone', + 'misplaced my phone', + 'can you help locate my phone', + "i don't know where my phone is", + 'can you help me find my phone', + 'do you know where my phone is located now', + 'alexa, tell me the location of my phone', + 'i lost my phone', + 'i need some help finding my phone', + 'help find my phone', + "i can't locate my phone" + ] + }, + { + name: 'where_are_you_from', + slots: [], + contexts: ['main'], + utterances: [ + "tell me where you're made", + 'where did you originate from', + 'where do you hail from', + 'would you care to let me know where you were made', + 'where is the location that you were made', + 'in which area were you made', + 'where were you created', + "what's your country of origin", + "what's your place of manufacture", + 'where do you come from', + 'where were you manufactured', + 'where did you used to live', + 'are you from the uk', + 'what was the location of your birth', + 'can you tell me your place of origin', + 'where is your home', + 'where are you from', + 'what city were you born in', + 'where is your birth place', + 'were you born in a hospital' + ] + }, + { + name: 'shopping_list_update', + slots: [], + contexts: ['main'], + utterances: [ + 'please take away the fries from the shopping list', + 'remove fries from my shopping list', + "add flour to my shopping list if it's not already on it", + 'are bananas on the list, if not can you add them', + 'add bread to my list of groceries', + 'add bread to my list of things to buy', + 'will you include kleenex on my list for shopping please', + 'write apples on my shopping list', + 'please add milk on my shopping list, if it is not already on it', + 'maybe we can put blank cds on my list for shopping', + 'can we put sunscreen on my list for shopping', + 'remove corn from my shopping list', + 'remove chips from my shopping list', + 'i need soda added to my list and carrots removed', + 'take carrots off my list for shopping', + 'put carrots on my shopping list', + 'delete ice cream from my shopping list', + 'i need to add cherrios to the grocery list', + "i'm low on cherrios can you add that to the shopping list", + 'i need you to add creamer to my shopping list' + ] + }, + { + name: 'restaurant_reservation', + slots: [], + contexts: ['main'], + utterances: [ + "please reserve me a table for 2 pm at maxwell's on the 14th", + "can you get me a table for 6 at mark's at 11am", + "can you get me a table for 4 at steve's", + 'reserve a table for 4 at red lobster under the name dave at 7:30pm', + "reserve a table for 6 at morton's under the name jaime at 8:30pm", + 'book a reservation for 2 at olive garden under the name jim', + "i'd like to set up a reservation for kibble at 4", + 'i want you to book a reservation for 8 pm at red robin under the name kevin', + 'please reserve a table for 4 at red robin under the name kevin at 7', + 'i need you to reserve a table for 4 at red robin under the name kevin at 7', + 'i think i need to make a reservation for 8 pm at red robin for 3', + 'could you make reservations for 2 at olive garden for 5 pm today', + 'make a reservation at 5 pm today for 2 at olive garden', + 'please make a reservation for 2 at olive garden for 5 pm today', + "i need you to reserve a table for 6:00 pm for three at devon's seafood", + "i need you to reserve at table for a party of four at devon's for 6:00 pm", + 'can i make a reservation for 2 at state bird for 8:30, please', + 'can i make a reservation for 3 at state bird for 8:30, please', + 'schedule me a table for 2 at red lobster at 8', + 'check reservation availability for 2 at red lobster at 8pm' + ] + }, + { + name: 'confirm_reservation', + slots: [], + contexts: ['main'], + utterances: [ + 'can you check my reservations for mortons under david winters', + 'verify that my reservations at won wons for joe lee are good', + "can you please confirm that i have a 6:00 pm table reserved under michelle solomon at devon's", + 'please confirm my reservation at parc for 8:00 pm, with the name denise jack', + 'please confirm my reservation for thursday at 10am', + "can you verify that i have a reservation at o'reilly's for 9", + "do i have reservations at o'neal's at 3 pm", + 'do you know if my reservation for dono is confirmed at 8 pm', + 'i need to know if i have reservations at arufflo at 5 pm', + 'can you confirm my reservation for kevin at red robin', + 'i need you to confirm my reservation for kevin at red robin', + 'i need to confirm that i have a reservation at red robin for josh', + 'please confirm my reservation for red robin at 9', + 'confirm my reservation for red robin at 8', + 'can you confirm my reservation for 6 pm on the 13th of february', + 'can you verify mike has a reservation at black rock at 6 pm', + 'i would like you to confirm my reservation for next saturday at 11 am', + 'please confirm my reservation for march 8th at 12:00', + 'please confirm my reservation for feburary 21st at 6pm', + 'confirm my taco house reservation for jean' + ] + }, + { + name: 'freeze_account', + slots: [], + contexts: ['main'], + utterances: [ + 'can you block my chase account right away please', + 'please block my chase account right away', + 'place a hold on my bank account', + 'how can i stop transactions on my account', + 'freeze my account immediately', + 'can you put a stop on my bank account now', + 'can you freeze my account, please', + 'could you freeze my account now', + 'could you put a stop on my bank account', + 'please freeze my account', + 'i want my account frozen', + 'place a block on my capital one account right now', + "i'd like a block on my charles schwab account immediately", + 'pause my account', + 'put a hold on my bank account', + 'please do a stop on my bank account', + 'dont allow any action on my account', + 'please freeze my bank account', + 'freeze my account', + 'please put a block on my wells fargo account' + ] + }, + { + name: 'user_name', + slots: [], + contexts: ['main'], + utterances: [ + 'is there a name that you call me', + 'by what name people call me', + 'say what you think my name is', + 'tell me what you think my name is', + 'what would you like to refer to me as', + 'what are you going to refer to me as', + 'you saved my name how', + 'how did you save my name', + 'by what nym do you think of me', + 'can you tell me what you refer to me as', + 'what is my name saved as in your system', + 'what is the name you associate with me', + 'what do you refer to me as', + 'how do you address me', + 'tell me what you believe my name to be', + "what's the name that you have for me, please", + 'do you call me a certain name', + 'i need to know what names you have for me', + 'so what is my name saved as', + 'so what is my current name saved as' + ] + }, + { + name: 'spending_history', + slots: [], + contexts: ['main'], + utterances: [ + 'how much have i spent lately on eating out', + 'how much did i spend on christmas gifts in december', + 'what did i spend my money on last week', + 'have i been spending a lot on shoes lately', + 'how much money have i spent this week', + 'how much have i spent eating out this week', + 'what have car repairs ran me this month', + "what's the total amount i've spent eating out in the last two weeks", + 'how much have i spent on clothes recently', + 'please list my total spending on grocery shopping for the last two months', + 'how much have i spent on starbucks since the beginning of 2019', + 'how much have i lately spent on eating out', + 'can you tell me what i spend on gas for the past two weeks', + 'how much did i spend on furniture last week', + 'let me know the amount i spent on food last month', + 'i have spent a lot on groceries lately', + 'i spent a lot on vacations recently', + 'i have been spending a lot on insurance lately', + 'how much do i spend going out to eat', + 'how much did i spend on gas the past month' + ] + }, + { + name: 'mpg', + slots: [], + contexts: ['main'], + utterances: [ + 'what is the gas mileage for the car that i drive', + 'retrieve my current mpg on my car', + "can you give me the car's mpg for the city", + "what's the mpg for the car when it's on the highway", + "what are the estimates for the car's highway mpg", + 'how far can the car get per gallon on the highway', + "what's my current mpg", + 'what is my average mpg in this car', + 'whats the cars fuel mileage like', + 'how much gas does this use in the city', + "what's this car guzzle in gas when driving in town", + 'what mpg does this car get on the expressway', + "what'll this car do for gas mileage on the open road", + 'how good is the fuel usage for this vehicle', + "how's the mpg for this on the freeway", + 'how many mpg does this get on the highway', + 'how many miles per gallon does this car get on the highway', + 'whats the cars mpg', + 'please tell me what the mpg is for this car', + 'my mpg is how much' + ] + }, + { + name: 'travel_suggestion', + slots: [], + contexts: ['main'], + utterances: [ + 'what should i do for fun in tokyo', + 'where should i book my next trip', + "what's a good place to travel to", + 'where should i travel to next', + 'when visiting chicago, what things can i do there', + 'what kind of entertainment is available in chicago', + 'what are some tourist sites to see in ann arbor', + 'what experiences does kentucky offer', + 'help me find things to do in nashville', + 'what are some touristy things to do in boston', + 'what kinds of fun touristy things are there to do in boston', + 'provide me with ideas of things we can do in charlotte', + 'give me a list of things to do in orlando', + "i'd like some suggestions on where to go on my vacation this year", + 'in portland, what things are there to do', + 'what are some things i can do in portland', + 'what should i do in france', + 'where do you suggest i plan my next vacation', + 'what can a tourist in denver do', + 'what sort of things do people do in new york' + ] + }, + { + name: 'insurance', + slots: [], + contexts: ['main'], + utterances: [ + 'what is my health plan', + 'can you tell me my health plan', + "is it possible for you to let me know what health plan i'm on", + 'list insurance benefits', + 'tell me the name of my insurance plan', + "what's the healthcare plan i'm on", + 'please find the benefits provided by my insurance', + 'what benefits do i have with my insurance', + 'can you tell me what is included in my health benefits', + 'i would like to hear a list of my insurance benefits, please', + "what is the insurance plan that i'm signed up for", + "can you tell me the name of the insurance plan i'm a member of", + 'what benefits does my health coverage give me', + 'what is the aid i get from these health benefits', + 'what was my health plan again', + 'where is the list of my insurance benefits', + 'what insurance benefits am i getting', + 'what are my insurance perks', + 'what are my insurance rewards', + 'what are my insurance amenities' + ] + }, + { + name: 'tire_pressure', + slots: [], + contexts: ['main'], + utterances: [ + 'are my tires in need of air', + 'have my tires got adequate amounts of air in them', + 'is there air in my tires', + 'how much air remains in my tires', + "tell me my car's tires' air pressure", + "tell me my tires' air pressure", + 'can you tell me the tire pressure in my car', + "what's the air level in my tires", + 'what is my current tire pressure for each tire', + 'can you check the tire pressure', + 'measure tire pressure', + 'are my tires good on pressure', + "i would like to know my car's tire pressure please", + 'are my tires low', + 'should i air up the tires', + 'let me know how low the air in my tires are', + "what's the tire pressure of my car", + 'tire pressure please', + 'can you tell me what my tire pressure is', + 'how much air do i have in my tires' + ] + }, + { + name: 'calendar', + slots: [], + contexts: ['main'], + utterances: [ + 'do i have anything to do march 2nd', + 'is my calendar free next sunday', + 'what is on my calendar next sunday', + 'what is on my calander for valentines day', + 'i wanna know what is currently on my calendar for friday the 1st', + 'let me know what is showing on my calendar for the 17th of march', + 'i gotta know what is currently on my calendar for friday the 1st', + 'what is on march 2 calendar', + 'what do i have scheduled for february 28th', + 'what events are on my calendar for april 1st', + 'can you give me an overview of what march 23 will look like for me', + 'tell me what my calendar looks like for april 13', + 'what have i got planned for the date of april 23', + 'tell me what is on my calendar for april 1', + 'tell me what is on my calendar for may 3', + 'do i have anything on my schedule for may 3', + 'check my calendar for 30th birthday party', + 'read my calendar events', + 'what am i supposed to be doing today', + 'what do i have planned for tomorrow' + ] + }, + { + name: 'calculator', + slots: [], + contexts: ['main'], + utterances: [ + 'what is 7 x 7', + 'what is the sum of 10 plus 5', + 'what is 4 + 4', + 'what is 4 x 4', + 'what is 20+ 5', + 'what is the square root of 10294', + 'what is 213 times 3', + 'what is the square root of 144', + 'what is 78 times 85', + 'help me with math', + 'what is 10 to the 12th power', + 'how many times can 12 go into 600', + 'add twelve and twelve please', + 'what is 10 + 10', + 'what is the square root of 5', + 'what is 38% of 389209', + 'what is 1 fifth times 2 fifths', + "what's 3 plus 3", + 'what’s the answer to 5-6=', + 'what does 6 x 1 equal' + ] + }, + { + name: 'carry_on', + slots: [], + contexts: ['main'], + utterances: [ + 'how my carry ons does spirit airlines allow me for a flight to dallas', + 'could you list out the carry-on restrictions for american airlines', + 'is there are limit of carry ons for my flight with aer lingus to cork', + 'delta has too many carry-on restrictions! do you know them', + 'can i carry on a garment bag, a small travel bag, and a back pack on my trip with american airlines', + 'what are the carry-on limits for flying domestically with delta', + 'how many carry ons can i take on a flight with united to lax', + 'what is the carry-on policy say for flights on delta airlines', + 'what are the carry-on restrictions for frontier airlines', + 'how many bags can i carry-on for flights on singapore airlines', + 'what are the carry-on restrictions for southwest airlines', + "what can't i carry-on to delta", + 'what is the carry on limit', + "what are delta's carry-on policies for flights", + 'how strict is spirit when it comes to carry ons', + 'can i have 3 carry ons with delta at jfk', + 'what are the requirements for carry on on flights with united airlines', + 'please find the rules for carry on when flying with spirit airlines', + 'on a flight with allegiant to orlando, how many carry ons can i take', + 'when taking a flight with porter to toronto, how many carry ons can i take' + ] + }, + { + name: 'pto_used', + slots: [], + contexts: ['main'], + utterances: [ + "call hr so i can figure out how many days off i've taken", + 'about how many vacation days have i taken', + "i've used how much pto so far", + "i need to know how many days so far i've taken off", + 'how many days have i taken off this year', + "can you tell me how much pto i've gone through", + "what's the amount of time off i've had", + "what's the amount of vacation days i've taken", + 'please find out how many of my vacation days i have spent', + 'check to see how many days off i have used', + 'i need to know how many of my days off i have used at this point', + 'what is the total number of days i have taken off so far', + 'how many vacation days have i spent in total', + 'i need to know the number of days off i have taken at this point', + 'how many days was i sick or on vacation', + 'how much of my time off have i used', + 'how many times did i take a day off', + 'how many days off have i taken', + 'so far, how many days off have i taken', + 'how many vacation days have i taken' + ] + }, + { + name: 'sync_device', + slots: [], + contexts: ['main'], + utterances: [ + 'please desync my current mobile', + 'please link up to my cell', + 'stop syncing with this phone', + 'disconnect from my phone please', + 'please connect to my phone', + 'disconnect from my phone for me', + 'would you disconnect from my phone', + 'i need for you to connect to my phone', + 'mind connecting to my phone', + 'can you link to my phone', + 'please disconnect from my phone', + 'pair with my phone', + 'unsync my phone now please', + 'how can i unsync my phone', + 'unsync my phone', + 'remove the sync from my phone', + 'please unsync my phone', + 'can we pair this with my new phone', + 'you can disconnect from my phone now', + 'do you have access to my phone' + ] + }, + { + name: 'thank_you', + slots: [], + contexts: ['main'], + utterances: [ + 'i want to thank you for helping', + 'much obliged', + 'thanks!', + 'i thank you', + "i'm grateful to you", + "you've been a great help", + "i'm so glad you helped me!", + 'i have much gratitude for your help', + 'i appreciate you helping me', + 'i appreciate it', + 'thank you for the information', + 'you are so thoughtful and i appreciate your gesture', + 'your answer pleased me', + "i'm grateful for the answer you just gave me", + 'good answer, thanks for providing it', + 'i appreciate that answer!', + 'i can not thank you enough', + 'let me thank you', + 'thanks for the help!', + 'i appreciate the help' + ] + }, + { + name: 'cook_time', + slots: [], + contexts: ['main'], + utterances: [ + 'how long does it take to cook roast beef', + 'how long will it take to make the wings', + "what's the cooking time for the cake", + 'if i cook the pizza at 400 degrees how long must it be in for', + 'how long do you need to put the cuish in for', + 'the sauce must simmer then go in the oven but for how long', + 'how long is it acceptable to carmelize onions', + 'about how long should you cook a 10 pound turkey', + 'how long to i bake the brownies', + 'for how long should i bake the brownies', + "what do i set the timer for if i'm making gyoza", + 'how long to microwave a frozen dinner', + 'how long should this be in the oven', + 'how long should i expect beef stroganoff to prepare', + 'how long should i boil noodles for', + 'what time should i cook noodles for', + 'how long am i supposed to cook pork loin', + 'how long should i spend preparing roast beef', + 'how long do i cook pork', + 'how long do you need to bake chicken for it to be safe to eat' + ] + }, + { + name: 'report_lost_card', + slots: [], + contexts: ['main'], + utterances: [ + "i can't seem to find my gold card and want it reported as lost", + 'what do i do if my visa card got stolen', + 'someone stole my mastercade', + 'how do i report a lost discover credit card', + "i need to report a card that's been lost but i dont know how", + 'what must i do to report my card lost', + 'my card is lost please report it', + 'i need to report that my card has been lost', + 'i need to report that my card is lost', + 'i need to replace my lost card', + 'someone stole my discover credit card', + 'my capital one credit card was stolen', + 'i need to report my stolen card', + 'i need to report my card as stolen, please', + "i can't find my mastercard and i want to report it as lost", + "i can't locate my mastercard and i want to report it as lost, please", + 'i need to report a stolen card, please', + 'my card is stolen, i need to report it', + 'my bronze card is missing i would like to report it as lost', + 'i need to report my card as stolen, who do i tell' + ] + }, + { + name: 'alarm', + slots: [], + contexts: ['main'], + utterances: [ + 'set my alarm for getting up', + 'create an alarm', + 'set an alarm for 6am, please', + 'set up an alarm', + 'how do i set the alarm', + 'please turn on the alarm', + 'make an alarm 6am', + 'set alarm', + 'i need an alarm to wake me up at 8 tomorrow morning', + 'i need you to set an alarm for 8am tomorrow', + 'set an alarm for 9pm please', + 'at 9 tomorrow, set off the alarm', + 'please set a reminder for me', + 'set an alarm to work out and another to go home', + 'set an alarm for my meeting and another for my phone call', + 'i need an alarm set please', + 'i need an alarm set for 5:30 pm', + 'please create a new alarm for me', + 'please create an alarm for 12 noon', + 'help me get up at eight thirty in the morning tomorrow' + ] + }, + { + name: 'todo_list', + slots: [], + contexts: ['main'], + utterances: [ + 'what is left to do today', + 'the tasks for today, what are they', + 'do i have cleaning the counters on my to-do list', + 'tell me what is on the list of things to do', + 'tell me what is on my to do list', + 'please read my todo list', + 'do i have brush my teeth on my todo list', + 'what chores do i have waiting on my reminder list', + 'is doing my laundry already on my todo list', + 'did i add purchase tickets to the penguin game to my todo list', + 'when is babysitting on my to do list', + 'can you please tell me what is on my to-do list', + 'i wonder what my to-do list looks like for today', + "go ahead and say all of the points on my list of to-do's please", + 'what have i got on my to-do list', + 'please let me know if giving the dog a bath is on my list of tasks to complete', + 'please inform me of what tasks i have listed on my to do list', + 'can you read my to do list to me please', + 'check my to do list to see if feeding the fish is on it', + 'do i have watering the plants on my to do list' + ] + }, + { + name: 'w2', + slots: [], + contexts: ['main'], + utterances: [ + 'where is the w-2 form located', + 'where can i locate my w-2 form', + 'where do i pick up a w2 form', + 'what location can i get a w2 from', + 'a w2 form is obtainable where', + 'where do i get the form that shows my wages for the year for taxes', + 'can you help me find my w-2', + 'hey, anyone seen my w-2', + 'i need to download my w-2 form', + 'where can i get a w2 form from', + 'how do i get a w2', + 'can you tell me where to get a w2', + 'who has the tax form that has my wage informaton', + 'where do i get the tax form that shows my wages', + 'locate my w-2 form', + 'where do i get my w2 form from', + 'tell me where to get myy w-2 form', + 'is my w2 available at the post office', + 'when will i receive my w2', + 'can i get my w2 online' + ] + }, + { + name: 'damaged_card', + slots: [], + contexts: ['main'], + utterances: [ + 'what should i do with an unusable and damaged card', + 'my card got melted and i need to report it', + 'my card got melted in the dryer and i need to report it, please', + 'the magnetic strip on my card is scratched, let the card company know', + 'my card is cracked, please tell the card company', + "my card is too scratched and won't read correctly", + 'the sticker on my card is peeling off', + 'my card is partially damaged how do i report this', + 'my card is not working and i need to let them know', + 'the chip on my card is damaged and i need a new one', + 'my card is cracked and i want to report it', + 'report to the company that i damaged my card', + 'call the card company and ask them to replace my card', + 'i need a report form for my damaged, demagnetized card', + 'how to tell the company that my credit card melted', + 'how do i report a melted credit card', + 'i accidentally put my credit card in the shredder', + 'i was using my credit card as a cutting board and accidentally sliced it in half', + 'can you assist me with reporting that the atm nicked my card', + 'my card fell in the toilet and now the chip does not work' + ] + }, + { + name: 'schedule_meeting', + slots: [], + contexts: ['main'], + utterances: [ + 'i want to know if there is meeting room available at 8', + 'meeting room availability from 8:00 please', + 'are there any meetings room available between 8:00 and 9:00 am', + 'i want to check if there is any meeting room available between 8:00 and 10:00 am', + 'meeting room availability at 8:00 please', + 'do you know how do i schedule a meeting', + 'can you schedule a meeting with james at the office, please', + 'how do i create a meeting', + 'can you schedule a meeting with james at the office', + 'do you have a meeting room open from noon until 2:00 pm', + 'i want to schedule a meeting with tom for 6pm', + 'i want you to schedule a meeting with carrie and lisa', + 'could you schedule a meeting with john smith at 1 pm tomorrow', + 'can you schedule a meeting with damon for 1', + 'are any meeting rooms open between 9 and 10', + 'i need to schedule a meeting with mae at 5pm', + 'is it possible to book a meeting room between 10 and 11', + 'check if meeting rooms are free from 6 to 7', + 'are there rooms available between 5 and 530', + 'if i want to schedule a meeting, how do i do it' + ] + }, + { + name: 'gas_type', + slots: [], + contexts: ['main'], + utterances: [ + 'tell me what kind of gas this car uses', + 'what type of gas does this car need', + 'can i use any fuel in my car', + 'i need to know what kind of gas to put in this car', + 'what gas is needed for the car', + 'i wanna know the gad i need to fill this car up with', + 'i wish to know the gas i need to fill this car up with', + 'what kind of fuel does my vehicle run on', + 'look up the kind of fuel i use in my car', + 'what sort of fuel do i need for my car', + 'look up the type of gas needed for this vehicle', + 'what kind of gas is needed for this car', + 'should i use premium gas with this car', + 'can i put normal unleaded in this car', + 'what kind of gas does this car use', + 'tell me the gasoline type for this car', + 'what type of gas do i need to put in this car', + 'does this take regular unleaded', + 'tell me what type of fuel my car uses', + 'what gas should i fill it up with' + ] + }, + { + name: 'plug_type', + slots: [], + contexts: ['main'], + utterances: [ + "do i need a socket converter if i'm going to mexico", + 'does england have a different plug type and if so what is it', + 'when you go to germany, do you need a special converter for your plugs', + 'do visitors to japan need to use a socket converter', + 'do they use special outlets in australia', + 'is there an outlet plug adapter', + "i'm going out of country, what type of outlet plug do i need", + 'inform me as to the plug type of the country', + 'do i need a plug converter when i visit russia', + "do i need a socket converter if i'm going to be in uruguay", + 'do i need a plug converter when i visit belgium', + 'what plug type is used in spain', + "do i need a socket converter if i'm going to be in france", + 'do i need a converter in barcelona', + 'what kind of hertz outlets do they use in z', + 'should i buy a specific plug type when in china', + 'what are those european plugs called', + 'should i purchase a new plug converter when in france', + 'when in europe would i be able to use a regular plug', + "do i need a socket converter if i'm going to be in eithiopia" + ] + }, + { + name: 'next_holiday', + slots: [], + contexts: ['main'], + utterances: [ + 'when is the next scheduled holiday, please', + 'when is the next day i have off', + 'i need to know how long until my next vacation day', + "what's the next day i am scheduled off", + 'how many days until my next day off', + 'when is the next time off for a holiday here', + 'show me my next day off', + 'which holiday is next', + 'tell me when the next day off will be', + 'please look up the next holiday for me', + 'i need to know when i will next have the day off', + 'i need to know when the next holiday will be', + 'what is my next day off', + 'whats the next day that i am off', + 'what day am i off next', + 'tell me what holiday is comming up next', + "are there any holiday's comming up", + 'what holiday can i expect next', + 'what holiday is coming up next', + "what's the date of the next closest holiday" + ] + }, + { + name: 'who_do_you_work_for', + slots: [], + contexts: ['main'], + utterances: [ + 'i need to know who is your boss', + 'what is the full name of your boss', + 'could you tell me the name of your boss', + 'who or what association do you work for', + 'describe who it is you work for', + 'who do you work for', + 'what does your boss go by', + 'are you employed by someone else', + 'who do you do task for', + 'who is it u work for', + 'is it correct to say you work for me', + 'who are you currently working for', + 'are you now working for me', + 'who are you working for', + "don't you work for your self", + 'am i your boss', + 'is there someone else you work for', + 'who would you say you work for', + 'who would you say you are employed by', + 'for what person do you work' + ] + }, + { + name: 'how_busy', + slots: [], + contexts: ['main'], + utterances: [ + 'will i have to wait for long before being seated at the cheese cake factory', + 'what is the average wait time at the cheese cake factory', + 'how long will i have to wait if i want to eat at panera', + 'can you tell me how busy chipotle will be at nine tonight', + 'i want a table at texas roadhouse; how long will it be', + 'how busy is michel at 9', + 'how long do i have to wait for shokudo', + 'how busy is shokudo at 12', + 'how busy is jiro at 12 lunch time', + 'at 5 pm, is kaya very busy', + 'around 5 pm, how busy is kaya', + "what's the wait at macaroni grill", + 'is the restaurant busy at lunchtime', + "how busy will chili's be if i go at 6 pm", + 'find out how busy ihop usually is around 5 pm', + 'tell me how busy macaroni grill will be around 8 pm', + 'i want to know how busy ruby tuesday will be at around 8:45 pm', + 'do you know how busy outback steakhouse tends to be at 7:00 pm', + 'how long will it take to be seated at the flying w', + "how many people go to chili's around 9pm" + ] + }, + { + name: 'pin_change', + slots: [], + contexts: ['main'], + utterances: [ + 'how do i change my pin for number for my abc bank account', + 'i forgot the pin number for my college fund account', + 'my checking account needs a new pin number', + 'i seem to have lost the pin for my retirement account', + "i'm afraid i've forgotten the pin for my 401k account", + 'help me change my pin number for my money market account', + 'how do i get a new pin', + 'what is the procedure for getting a new pin number', + 'you need to change the pin on my bank of america account to be 1234', + 'i need you to change the pin on my bank of america account to be 1234', + 'i want to change my pin number for my checking account', + "so it turns out i can't remember what my pin is for my bank of america checking account", + 'change the pin on my capital one account to be 1234', + 'how do i reset my pin number for my account, please', + 'please change pin to 1234 on my bank account trailing in 3829', + 'change the pin on my bank account first republic to 1234', + 'help me change my pin number', + 'can you tell me my password checking account', + "i'd like to change my pin number for my wells fargo account", + 'i need to change my pin number for my checking account' + ] + }, + { + name: 'goodbye', + slots: [], + contexts: ['main'], + utterances: [ + 'syonara, ai device!', + 'goodbye, helpful ai device!', + 'bye-bye', + "i'm glad i got to talk to you", + 'this was a great conversation', + 'later, thanks for chatting', + 'nice talk, talk to you later', + 'thanks for chatting, later', + 'goodbye to you', + 'i must say goodbye', + 'cya later', + 'good night', + 'peace', + 'i have to get going', + 'talk to you later!', + 'thanks for talking to me', + 'it was enjoyable talking with you', + 'it was pleasant conversing with you', + 'until next time!', + 'tootles' + ] + }, + { + name: 'international_fees', + slots: [], + contexts: ['main'], + utterances: [ + 'are there any transaction fees associated with my chase card if i am in tokyo', + 'does my bank of the west card have international transaction fees', + 'does my bank of america card have international transaction fees', + 'what are the fees for using my card in toronto', + 'if i use my card in denver will i get charged extra', + 'will i pay extra if i use my card in juarez', + 'what are the international transaction fees on my amex', + 'what are the foreign transaction fees for cuba', + 'i need to know if there are transaction fees for my visa card in canada', + 'is there any transaction fees on my citibank card if used in spain', + 'can you tell me if i will have any transactions fees for using my discover card in turkey', + 'are there any international transaction fees associated with my visa card', + 'will i receive a fee if i use my card in ireland', + 'if i use my card in mexico do i get an extra fee', + 'do i get charged more for using my card in canada', + 'will i be charged for using my card in japan', + 'what are the international transactions fees for my industrial bank card', + 'does the national card have international transactions fees and how much', + 'will i be charged if i use my card in mexico', + 'will i be charged if i use my card in canada' + ] + }, + { + name: 'meeting_schedule', + slots: [], + contexts: ['main'], + utterances: [ + 'how many meetings do i have scheduled between 10 and 3', + 'do i have meetings from 1 to 4 today', + 'tell me when my meeting is scheduled with roger', + 'do i have any meetings today between 2 and 3', + 'do i have any meetings today between 6 and 7', + 'tell me the meeting schedule for the day', + 'what meetings are on my schedule today', + 'what meetings are scheduled for today', + 'do i have any meeting to go to today', + 'what meetings are on my calendar', + 'what are my meetings today', + 'bring up any mettings for today', + 'are any meetings scheduled for today', + 'do i have any meetings scheduled for today', + 'do you know if i have any meetings set up with travis today', + 'do you see any meetings with john on my schedule today', + 'am i scheduled for any gatherings today', + 'is the gang getting together this afternoon', + 'i need to know if i have any meetings scheduled for today', + 'when is my meeting with john' + ] + }, + { + name: 'report_fraud', + slots: [], + contexts: ['main'], + utterances: [ + "i'm pretty sure this charge from sam's club is fraudulent", + 'can you help me with some fraudulent charges on my card', + 'i suspect fraudulent transaction', + 'i need to report fraudulent activity on my card', + 'i need to make a report due to fraudulent activity on my card', + 'due to fraudulent activity on my card i need to make a report', + 'how do i report a fraudulent charge on my visa', + "i believe there's fraud on my card", + 'i need to report fraudulent activity on my mastercard', + "i have transactions on my card that aren't mine", + 'someone misused my card and put fraudulent transactions on it', + 'i think my chase account has been compromised and fraud committed', + 'i think someone made an illegal charge to my card', + "i'm afraid there is a false transaction on my account", + 'i need help investigating a suspicious transaction', + 'i believe there are fraudulent charges on my card how can i report them', + 'send information about suspicious credit card activity', + 'i want to report fraudulent activity on my visa card', + 'i want to report fraudulent activity on my amex card, please', + 'i need to know how to report fraud on my discover card' + ] + }, + { + name: 'measurement_conversion', + slots: [], + contexts: ['main'], + utterances: [ + 'what amount of miles are in a hundred kilometers', + "what's 8 ounces in cups", + 'how do you convert ounces to grams', + 'how do you convert pounds to kilos', + 'how many inches and centimeters would i need if i had 10 feet of something', + 'what would four inches be in centimeters', + 'how do i convert four inches into centimeters', + 'what are four inches in centimeters', + 'help me to understand the conversion between tablespoons and teaspoons', + 'tell me how to convert grams into ounces', + 'what is the proper way to convert centimeters into inches', + 'how many ounces are in a cup', + 'how many tablespoons are in three cups', + 'how do i convert inches and centimeters', + 'convert 2 inches to meters', + 'how many inches are in 5 feet', + 'how can i change centimeters into inches', + 'how many meters are in 10 millimeters', + 'how can you convert miles to kilometers', + 'how would you convert yards to inches' + ] + }, + { + name: 'book_hotel', + slots: [], + contexts: ['main'], + utterances: [ + 'need a place to crash in elizabeth near the refinery from june 4th until the 9th', + 'i want to book a hotel room for 10 people from monday to tuesday in manhattan', + 'get me a hotel room for 3 people from march 12th to 14th in chicago', + 'book me a hotel in seattle near the space needle from march 3rd to 5th', + 'how about any bookings in new york city from april 2 through april 8', + 'create a reservation novemeber 11 to 15 in cali', + 'show me all hotels in evans with good reviews', + 'show me a well reviewed hotel in evans', + 'are there any good hotels in salem', + "what's a good hotel to stay at in chicago", + 'i need a hotel booked in denver near the museum from friday to saturday', + 'i required a hotel room for 20 people from march 8th through march 20th in las vegas', + "i'd like to find a hotel in tampa that people liked enough to leave good reviews about", + 'please help me book a place to stay in pittsburgh from monday to friday', + 'help me find a hotel in that has good reviews in columbus', + 'do you know of any well reviewed hotels in chicago', + 'look up hotels with positive reviews in new york city', + 'find a suitable lodging in vancouver on march 16-19', + 'can i book a hotel in new york city near times square from 3/1 until 3/5', + "i'm going to need a hotel room for 8 people in tallahassee from 3/10 until 3/12" + ] + }, + { + name: 'weather', + slots: [], + contexts: ['main'], + utterances: [ + 'what are the conditions like in sarasota', + "what's the climate like in chicago", + 'what will the temperature be tomorrow', + 'will it freeze tonight', + 'give me the 7 day forecast', + 'what is the weather going to be like tomorrow', + 'is it snowing in the next two weeks', + 'is it raining', + 'can you tell me what the weather will be please', + "what's the weather looking like", + 'what is the weather report for costa mesa', + 'costa mesa weather', + 'what are the weather conditions in seattle', + "what's the weather like right now", + 'when is it going to rain again', + 'what is the high temperature for tomorrow', + 'how much snow will we be getting tomorrow', + "what's the current weather", + "what's it doing outside right now", + "what's today's weather going to be" + ] + }, + { + name: 'whisper_mode', + slots: [], + contexts: ['main'], + utterances: [ + 'turn off whisper mode', + 'activate whisper mode', + 'use your indoor voice please', + 'can you whisper until i tell you not to', + 'i need you to use your whisper voice', + 'you need to speak softer', + 'change to normal vocals mode', + 'turn up your volume', + 'switch to quiet mode', + "let's use whisper voice", + "let's whisper", + 'please turn on whisper mode', + 'please switch to whisper', + 'i want you to use whisper voice now', + 'please turn on whisper mode because i am about to be in class', + 'i want your setting adjusted to whisper voice now', + 'engage whisper mode now', + 'can you go to the whisper voice', + 'please use whisper', + 'please talk quietly' + ] + }, + { + name: 'direct_deposit', + slots: [], + contexts: ['main'], + utterances: [ + 'give me instructions to set up direct deposit for my paycheck', + "i'd like to set up a direct deposit for my paycheck", + 'i want my paycheck to go directly to my bank account', + 'set up direct deposit to my money market account for my pay check', + 'how do i get direct deposit for my paycheck', + 'set up payroll direct deposit to my checking account', + 'assist me to set up direct deposit', + 'tell me how to set up direct deposit for my paycheck', + 'how do i direct deposit my paycheck', + 'i would like to set up a direct deposit, please tell me how', + 'what do i need to do to start direct deposit', + 'how do i go about setting up direct deposit', + 'can you show me how to set up my paycheck to be direct deposit to my first hawaiian bank account', + 'how do i set up direct deposit to my bank of hawaii account', + 'help me set up a direct deposit', + 'onpay gives you two convenient ways to pay your employees', + 'i want to switch to direct deposit', + 'can you show me how to set up direct deposit', + "what's needed to direct deposit my paycheck", + 'can i get paychecks directly deposited to my bank of america account' + ] + }, + { + name: 'interest_rate', + slots: [], + contexts: ['main'], + utterances: [ + 'i want to know my interest rate', + 'how can i find my interest rate for my alliance account', + "tell me my current savings account's interest rate", + 'how to calculate my interest rate', + 'i gotta find the interest rate for my checking account', + 'i have to find the interest rate for my checking account', + 'i need the interest rate on my capital one account', + "what do i need to do to look up my 1st republic account's interest rate", + "if i'm using citibank, what is the interest rate", + 'i would like to know my chase interest rate', + 'how percentage am i earning annually on my account', + 'what is the best interest rate i can get from now on', + 'can you tell me my current interest rate', + "what's my current interest rate, if you could look", + 'will you tell me the interest rate', + 'how much is the interest rate for the account i have at bluebird', + 'let me know my ally interest rate', + 'what is the interest rate on my charles schwab checking account', + 'what is the interest rate on my chase slate credit card', + 'what is the interest rate on my money market account' + ] + }, + { + name: 'bill_balance', + slots: [], + contexts: ['main'], + utterances: [ + "what's my bill for water and electricity", + 'read my bill balances', + 'find out the cost of my bill payments', + 'let me know the amounts i owe for my utilities and tuition bills', + 'can you tell me the total cost of my bills please', + 'what is the total amount due on my electricity bill', + 'let me know how much money i will need to spend on paying bills', + 'please let me know how much my gas bill is', + 'how much in total will i be paying for bills', + 'what do i owe for my utilities bill', + 'how much is the total due for my pgw gas bill', + 'what is my dte bill this month', + 'can you check the balance on the water bill', + 'do you know how much i owe on my cable bill', + 'how much do i need to pay on my bills', + 'add all my bills for this month and tell me the total', + "what's the water bill", + 'how much is my rent', + 'how much is my electric bill for', + 'what is the amount on my water bill' + ] + }, + { + name: 'share_location', + slots: [], + contexts: ['main'], + utterances: [ + 'tell john and sam where i am', + 'let cora and nancy know were i am now', + 'can you share my location with my girlfriends', + 'let shih chieh see my location', + 'let coach know where i am', + 'tell base camp my gps coordinates', + 'let the folks know my gps coordinates', + 'send my current location to my superior', + 'share my coordinates with melissa and doug', + 'i would like ms johannson and ms alba to be informed of my location', + 'give adam my gps coordinates', + 'share my location with sarah', + 'share my location with my sister', + 'share my location with mom', + 'share location with bill', + 'please tell steve my location', + 'give my location to bill and craig', + 'share my location with ben and jerry', + 'let ben and jerry know where i am', + 'give tom my location' + ] + }, + { + name: 'play_music', + slots: [], + contexts: ['main'], + utterances: [ + 'can x’s music be played', + "i'd like to hear my workout playlist", + 'put my music on, please', + 'rock playlist please', + 'turn on the rap playlist', + 'i want to listen to my maroon 5 playlist, play it for me', + 'can you play music the beatles', + 'play help! by the beatles', + "please play the song that goes like hey jude don't make it bad", + 'play heart shaped box', + 'i want to hear some music', + 'play my post-rock playlist', + 'resume my death metal playlist', + 'play me some tunes', + 'play some rock', + 'find the song with, "baby shark, doo doo', + 'that "baby shark, doo doo," song, i want to hear it', + 'play songs by the beatles', + 'can you play elvis', + 'do you have music by elvis' + ] + }, + { + name: 'gas', + slots: [], + contexts: ['main'], + utterances: [ + 'how much gas is left', + 'tell me the amount of gas i have', + 'in my tank how much gas is there', + "what's the status of my gas tank", + 'can you tell me the amount of gas i have', + 'does my car have enough gas to get to chicago', + 'how much gas is in my tank', + 'do i have a lot of gas left', + 'what is my gas level', + 'is there enough fuel to drive to tampa', + 'do i have enough fuel to get to tampa', + 'is the level of fuel in my car enough to get me to work', + 'how much does it cost to get my car filled up', + 'how many miles is there left to get to sioux falls', + "i'd just like to know how much gas is in my vehicle", + 'please check the amount of gas i have', + 'how low is my gas tank', + 'what is left in my gas tank', + 'how much fuel is in the tank', + 'how much petrol do i have' + ] + }, + { + name: 'cancel_reservation', + slots: [], + contexts: ['main'], + utterances: [ + 'remove my reservation at outback for timmy', + 'my reservation is no longer needed', + 'i need to put in a cancellation for the dinner plans tonight', + "get rid of my 2 pm reservation at ruth's steakhouse", + 'can you cancelt the reservation i have for dinner', + 'cancel the reservation i made for 8 pm at black rock', + "i don't need my reservation, cancel it", + 'abandon the dinner reservation i made', + 'cancel my reservation for me', + 'can i cancel my reservations', + "i need you to cancel my reservation for smith's at 6:15", + "i no longer need a table for four at chili's", + "i'd like to cancel the party of 7 we reserved at the olive garden", + 'i no longer need the reservation for 20 at ihop', + 'i no longer need the dinner reservation', + 'cancel my reservation for 4 at red robin', + 'i no longer need my reservation with carl at umami', + 'joe and i are to cancel the reservation at zephers now', + 'please cancel the reservation at zephers for joe and myself right now', + 'cancel the reservation for joe and i right now for the zephers reservation we had booked' + ] + }, + { + name: 'recipe', + slots: [], + contexts: ['main'], + utterances: [ + 'how do i fry pork chops', + 'how do i cook boiled eggs', + 'show me a beef recipe', + 'making spaghetti sauce perfectly is done how', + 'what are the steps i need to follow to make chewy rice krispies treats', + "what's the best way to make chicken stir fry", + 'will you give me some guidance on how to make ceviche', + 'can you search for the best new york style cheesecake recipes please', + 'how do you make pot roast', + 'find a good recipe for meatloaf', + 'how do you cook meatloaf', + 'i need a recipe for chicken pot pie', + 'how do i bake a blackberry pie', + 'find me a recipe for teriyaki chicken', + 'find out how to make chicken tikka masala', + 'please show me a recipe for chili', + 'can you find me a recipe for salsa', + 'can you find me a recipe for honey ham', + 'how do you create pork chili', + 'can you instruct me on how to make german chocolate cake' + ] + }, + { + name: 'order_checks', + slots: [], + contexts: ['main'], + utterances: [ + 'order more checks', + "i don't have any checks left; please order some", + "i'm in need of checks for bank of america", + 'could you please mail me some checkbooks', + 'i need to order more checks for my savings account', + 'can i please get some more checkbooks mailed to me', + "i don't have checks so can i order some new ones", + 'i need more checkbooks delivered to me by mail', + 'can you show me how to get more checkbooks by mail', + 'how do you order new checks', + 'is it possible to get more checkbooks for my wells fargo checkings', + 'does my great western bank account allow me to order more checkbooks', + 'how do i go about getting more checks since i ran out of them', + 'is is possible to order more checkbooks for account ending in sequence 939392', + 'tell my bank i need new checks', + 'please order me some new checks', + 'how do i place an order for more checks for my bank of america account', + 'can i order for more checkbooks for my chase bank account', + 'ai, where do i go to order checks for my usbank account', + 'make an order for new checbooks on my td checking please' + ] + } + ] +} diff --git a/packages/nlu-e2e/src/datasets/grocery.ts b/packages/nlu-e2e/src/datasets/grocery.ts new file mode 100644 index 00000000..008c2237 --- /dev/null +++ b/packages/nlu-e2e/src/datasets/grocery.ts @@ -0,0 +1,102 @@ +import { TrainInput } from '@botpress/nlu-client' + +export const sample = { + utterance: 'these grapes look moldy!', + intent: 'fruit-is-moldy' +} + +export const trainSet: TrainInput = { + language: 'en', + intents: [ + { + name: 'fruit-is-moldy', + contexts: ['grocery'], + utterances: [ + 'fruit is moldy', + 'this fruit is moldy', + 'this [banana](moldy_fruit) is not good to eat', + 'theses [oranges](moldy_fruit) have passed', + 'theses [grapes](moldy_fruit) look bad', + 'theses [apples](moldy_fruit) look so moldy' + ], + slots: [ + { + name: 'moldy_fruit', + entities: ['fruits'] + } + ] + }, + { + name: 'hello', + contexts: ['global', 'grocery'], + slots: [], + utterances: [ + 'good day!', + 'good morning', + 'holla', + 'bonjour', + 'hey there', + 'hi bot', + 'hey bot', + 'hey robot', + 'hey!', + 'hi', + 'hello' + ] + }, + { + name: 'talk-to-manager', + contexts: ['grocery'], + utterances: [ + 'talk to manager', + 'I want to talk to the manager', + "Who's your boss?", + 'Can I talk to the person in charge?', + "I'd like to speak to your manager", + 'Can I talk to your boss? plz', + 'I wanna speak to manager please', + 'let me speak to your boss or someone', + 'can I meet your boss [at 1pm today](appointment_time) ?', + 'will your manager be available [tomorrow afternoon around 4pm](appointment_time)' + ], + slots: [ + { + name: 'appointment_time', + entities: ['time'] + } + ] + }, + { + name: 'where-is', + contexts: ['grocery'], + utterances: [ + 'where is [milk](thing_to_search) ?', + 'where are [apples](thing_to_search) ?', + 'can you help me find [apples](thing_to_search) ?', + "I'm searching for [pie](thing_to_search) ?", + 'where is the [milk](thing_to_search) ?', + 'where are the [milk](thing_to_search) ?' + ], + slots: [ + { + name: 'thing_to_search', + entities: ['fruits', 'any'] + } + ] + } + ], + entities: [ + { + name: 'fruits', + type: 'list', + fuzzy: 0.9, + values: [ + { name: 'banana', synonyms: ['bananas'] }, + { name: 'apple', synonyms: ['apples'] }, + { name: 'grape', synonyms: ['grapes'] }, + { name: 'orange', synonyms: ['oranges'] } + ] + } + ], + seed: 42 +} diff --git a/packages/nlu-e2e/src/datasets/index.ts b/packages/nlu-e2e/src/datasets/index.ts new file mode 100644 index 00000000..1187428d --- /dev/null +++ b/packages/nlu-e2e/src/datasets/index.ts @@ -0,0 +1,3 @@ +export { trainSet as clinc50_42_dataset } from './clinc50_42' +export { trainSet as clinc50_666_dataset } from './clinc50_666' +export { trainSet as grocery_dataset, sample as grocery_test_sample } from './grocery' diff --git a/packages/nlu-e2e/src/errors.ts b/packages/nlu-e2e/src/errors.ts new file mode 100644 index 00000000..a1e316c9 --- /dev/null +++ b/packages/nlu-e2e/src/errors.ts @@ -0,0 +1,17 @@ +import { http } from '@botpress/nlu-client' + +export class UnsuccessfullAPICall extends Error { + constructor(nluError: http.NLUError, hint?: string) { + const { message } = nluError + super(`An error occured when querying the NLU Server: "${message}". \n${hint}`) + } +} + +export class UnsuccessfullModelTransfer extends Error { + constructor(status: string, verb: 'GET' | 'POST') { + const action = verb === 'GET' ? 'downloading' : 'uploading' + super(`${action} model weights returned with status: "${status}".`) + } +} + +export class PrecondtionFailed extends Error {} diff --git a/packages/nlu-e2e/src/index.ts b/packages/nlu-e2e/src/index.ts new file mode 100644 index 00000000..60c09ac8 --- /dev/null +++ b/packages/nlu-e2e/src/index.ts @@ -0,0 +1,35 @@ +import chalk from 'chalk' +import yargs from 'yargs' +import { listTests } from './ls-tests' +import { runTests } from './run-tests' + +yargs + .command( + ['test', '$0'], + 'Launch e2e tests on nlu-server', + { + nluEndpoint: { + type: 'string', + alias: 'e', + required: true, + default: 'http://localhost:3200' + }, + pattern: { + type: 'string', + alias: 'p', + optionnal: true + } + }, + (argv) => { + void runTests(argv) + .then(() => {}) + .catch((err) => { + console.error(chalk.red('Test failed for the following reason:\n'), err) + process.exit(1) + }) + } + ) + .command(['list', 'ls'], 'List available tests', {}, (argv) => { + listTests() + }) + .help().argv diff --git a/packages/nlu-e2e/src/ls-tests.ts b/packages/nlu-e2e/src/ls-tests.ts new file mode 100644 index 00000000..f21e8b53 --- /dev/null +++ b/packages/nlu-e2e/src/ls-tests.ts @@ -0,0 +1,13 @@ +import { Logger } from '@bpinternal/log4bot' +import tests from './tests' + +export const listTests = () => { + const logger = new Logger('e2e', { + level: 'debug' + }) + + logger.info('Available tests are:') + for (const test of tests) { + logger.info(`- ${test.name}`) + } +} diff --git a/packages/nlu-e2e/src/run-tests.ts b/packages/nlu-e2e/src/run-tests.ts new file mode 100644 index 00000000..fb9c418d --- /dev/null +++ b/packages/nlu-e2e/src/run-tests.ts @@ -0,0 +1,52 @@ +import { Client as NLUClient } from '@botpress/nlu-client' +import { Logger } from '@bpinternal/log4bot' + +import fs from 'fs' +import _ from 'lodash' +import { nanoid } from 'nanoid' +import { assertModelsAreEmpty, assertServerIsReachable } from './assertions' +import { clinc50_42_dataset, clinc50_666_dataset, grocery_dataset } from './datasets' +import tests from './tests' +import { AssertionArgs, Test } from './typings' +import { getE2ECachePath, syncE2ECachePath } from './utils' + +type CommandLineArgs = { + nluEndpoint: string + pattern?: string +} + +export const runTests = async (cliArgs: CommandLineArgs) => { + const { nluEndpoint, pattern } = cliArgs + + const appId = `${nanoid()}/e2e-tests/${nanoid()}` + const logger = new Logger('e2e', { + level: 'debug' + }) + + logger.info(`Running e2e tests on server located at "${nluEndpoint}"`) + await syncE2ECachePath(logger, appId) + + const client = new NLUClient({ + baseURL: nluEndpoint + }) + const args: AssertionArgs = { logger, appId, client } + + const requiredLanguages = [clinc50_42_dataset, clinc50_666_dataset, grocery_dataset].map((ts) => ts.language) + + const baseLogger = logger.sub('base') + const baseArgs = { ...args, logger: baseLogger } + await assertServerIsReachable(baseArgs, requiredLanguages) + await assertModelsAreEmpty(baseArgs) + + let testToRun: Test[] = tests + if (pattern) { + const rgx = new RegExp(pattern) + testToRun = tests.filter(({ name }) => rgx.exec(name)) + } + + for (const test of testToRun) { + await test.handler(args) + } + + fs.rmSync(getE2ECachePath(appId), { recursive: true, force: true }) +} diff --git a/packages/nlu-e2e/src/tests/index.ts b/packages/nlu-e2e/src/tests/index.ts new file mode 100644 index 00000000..6b6773c2 --- /dev/null +++ b/packages/nlu-e2e/src/tests/index.ts @@ -0,0 +1,17 @@ +import { Test } from '../typings' +import { langDetectionTest } from './lang-detection' +import { lintingTest } from './linting' +import { modelLifecycleTest } from './model-lifecycle' +import { modelWeightsTransferTest } from './modelweights-transfer' +import { predictionTest } from './prediction' +import { trainingErrorsTest } from './training' + +const tests: Test[] = [ + trainingErrorsTest, + lintingTest, + modelWeightsTransferTest, + modelLifecycleTest, + langDetectionTest, + predictionTest +] +export default tests diff --git a/packages/nlu-e2e/src/tests/lang-detection.ts b/packages/nlu-e2e/src/tests/lang-detection.ts new file mode 100644 index 00000000..480d05e3 --- /dev/null +++ b/packages/nlu-e2e/src/tests/lang-detection.ts @@ -0,0 +1,16 @@ +import { AssertionArgs, Test } from 'src/typings' +import { assertLanguageDetectionWorks } from '../assertions' + +const NAME = 'lang-detection' + +export const langDetectionTest: Test = { + name: NAME, + handler: async (args: AssertionArgs) => { + const { logger } = args + logger.info(`Running test: ${NAME}`) + const langDetectionLogger = logger.sub(NAME) + const langDetectionArgs = { ...args, logger: langDetectionLogger } + await assertLanguageDetectionWorks(langDetectionArgs, 'I love Botpress', 'en') + await assertLanguageDetectionWorks(langDetectionArgs, "J'aime Botpress de tout mon coeur", 'fr') + } +} diff --git a/packages/nlu-e2e/src/tests/linting.ts b/packages/nlu-e2e/src/tests/linting.ts new file mode 100644 index 00000000..2ead78d3 --- /dev/null +++ b/packages/nlu-e2e/src/tests/linting.ts @@ -0,0 +1,68 @@ +import { DatasetIssue, IntentDefinition, IssueCode, IssueComputationSpeed, TrainInput } from '@botpress/nlu-client' +import chai from 'chai' +import _ from 'lodash' +import { AssertionArgs, Test } from 'src/typings' +import { assertLintingFinishes, assertLintingStarts } from '../assertions' +import { grocery_dataset } from '../datasets' + +const NAME = 'linting' + +const getIntent = (ts: TrainInput, name: string): IntentDefinition => { + const intent = ts.intents.find((i) => i.name === name) + if (!intent) { + throw new Error(`Intent "${name}" does not exist.`) + } + return intent +} + +const issueGuard = (code: C) => (i: DatasetIssue): i is DatasetIssue => { + return i.code === code +} + +export const lintingTest: Test = { + name: NAME, + handler: async (args: AssertionArgs) => { + const { logger } = args + logger.info(`Running test: ${NAME}`) + const lintingLogger = logger.sub(NAME) + const lintingArgs = { ...args, logger: lintingLogger } + const speed: IssueComputationSpeed = 'slow' + + const modelId = await assertLintingStarts(lintingArgs, speed, grocery_dataset) + const dataset_issues = await assertLintingFinishes(lintingArgs, speed, modelId) + chai.expect(dataset_issues).to.have.length(0, 'original dataset should have no issues') + + const c_000_dataset = _.cloneDeep(grocery_dataset) + getIntent(c_000_dataset, 'fruit-is-moldy').utterances.push('I love [bananas](some_fruit_lol)') + const c000_modelId = await assertLintingStarts(lintingArgs, speed, c_000_dataset) + const c_000_dataset_issues = await assertLintingFinishes(lintingArgs, speed, c000_modelId) + const c000_issues = c_000_dataset_issues.filter(issueGuard('C_000')) + chai.expect(c000_issues).to.have.length(1, 'c000 issue count is incorrect') + chai.expect(c000_issues[0]).to.have.property('code', 'C_000') + + const c_001_dataset = _.cloneDeep(grocery_dataset) + getIntent(c_001_dataset, 'fruit-is-moldy').slots.push({ name: 'some-slot', entities: ['non-existent-entity'] }) + const c001_modelId = await assertLintingStarts(lintingArgs, speed, c_001_dataset) + const c001_dataset_issues = await assertLintingFinishes(lintingArgs, speed, c001_modelId) + const c001_issues = c001_dataset_issues.filter(issueGuard('C_001')) + chai.expect(c001_issues).to.have.length(1, 'c001 issue count is incorrect') + chai.expect(c001_issues[0]).to.have.property('code', 'C_001') + + const e_000_dataset = _.cloneDeep(grocery_dataset) + getIntent(e_000_dataset, 'fruit-is-moldy').utterances.push('I love [milk](moldy_fruit)') + getIntent(e_000_dataset, 'talk-to-manager').utterances.push( + 'Can I talk with your boss [in Quebec city](appointment_time)?' + ) + const e000_modelId = await assertLintingStarts(lintingArgs, speed, e_000_dataset) + const e_000_dataset_issues = await assertLintingFinishes(lintingArgs, speed, e000_modelId) + const e000_issues = _(e_000_dataset_issues) + .filter(issueGuard('E_000')) + .orderBy((i) => i.data.source) + .value() + chai.expect(e000_issues).to.have.length(2, 'e000 issue count is incorrect') + chai.expect(e000_issues[0]).to.have.property('code', 'E_000') + chai.expect(e000_issues[0].data).to.have.property('source', 'in Quebec city') + chai.expect(e000_issues[1]).to.have.property('code', 'E_000') + chai.expect(e000_issues[1].data).to.have.property('source', 'milk') + } +} diff --git a/packages/nlu-e2e/src/tests/model-lifecycle.ts b/packages/nlu-e2e/src/tests/model-lifecycle.ts new file mode 100644 index 00000000..601533f6 --- /dev/null +++ b/packages/nlu-e2e/src/tests/model-lifecycle.ts @@ -0,0 +1,49 @@ +import ms from 'ms' +import { AssertionArgs, Test } from 'src/typings' +import { + assertCancelTrainingFails, + assertModelsInclude, + assertModelsPrune, + assertPredictionFails, + assertQueueTrainingFails, + assertTrainingCancels, + assertTrainingFinishes, + assertTrainingsAre, + assertTrainingStarts +} from '../assertions' +import { clinc50_42_dataset, clinc50_666_dataset } from '../datasets' +import { sleep } from '../utils' + +const NAME = 'life-cycle' + +export const modelLifecycleTest: Test = { + name: NAME, + handler: async (args: AssertionArgs) => { + const { logger } = args + logger.info(`Running test: ${NAME}`) + const modelLifecycleLogger = logger.sub(NAME) + const modelLifecycleArgs = { ...args, logger: modelLifecycleLogger } + + await assertCancelTrainingFails(modelLifecycleArgs, 'my-model-id-lol', 'training_not_found') + + let clinc150_42_modelId = await assertTrainingStarts(modelLifecycleArgs, clinc50_42_dataset) + + await sleep(ms('1s')) + await assertTrainingCancels(modelLifecycleArgs, clinc150_42_modelId) + + clinc150_42_modelId = await assertTrainingStarts(modelLifecycleArgs, clinc50_42_dataset) + await assertTrainingFinishes(modelLifecycleArgs, clinc150_42_modelId) + + const clinc150_666_modelId = await assertTrainingStarts(modelLifecycleArgs, clinc50_666_dataset) + + await assertQueueTrainingFails(modelLifecycleArgs, clinc50_666_dataset, 'training_already_started') + await assertPredictionFails(modelLifecycleArgs, clinc150_666_modelId, 'I love Botpress', 'model_not_found') + await assertModelsInclude(modelLifecycleArgs, [clinc150_42_modelId]) + await assertTrainingsAre(modelLifecycleArgs, ['done', 'training']) + + await sleep(ms('1s')) + await assertTrainingCancels(modelLifecycleArgs, clinc150_666_modelId) + + await assertModelsPrune(args) + } +} diff --git a/packages/nlu-e2e/src/tests/modelweights-transfer.ts b/packages/nlu-e2e/src/tests/modelweights-transfer.ts new file mode 100644 index 00000000..5ec933b6 --- /dev/null +++ b/packages/nlu-e2e/src/tests/modelweights-transfer.ts @@ -0,0 +1,89 @@ +import fs from 'fs' +import _ from 'lodash' +import path from 'path' +import { AssertionArgs, Test } from 'src/typings' +import { + assertIntentPredictionWorks, + assertModelsInclude, + assertModelsPrune, + assertModelTransferIsEnabled, + assertModelWeightsDownload, + assertModelWeightsDownloadFails, + assertModelWeightsUpload, + assertModelWeightsUploadFails, + assertPredictionFails, + assertTrainingFinishes, + assertTrainingStarts +} from '../assertions' +import { grocery_dataset, grocery_test_sample } from '../datasets' +import { bufferReplace, corruptBuffer, getE2ECachePath } from '../utils' + +const NAME = 'modelweights-transfer' + +export const modelWeightsTransferTest: Test = { + name: NAME, + handler: async (args: AssertionArgs) => { + const { logger } = args + logger.info(`Running test: ${NAME}`) + const modelWeightsTransferLogger = logger.sub(NAME) + const modelWeightsTransferArgs = { ...args, logger: modelWeightsTransferLogger } + + await assertModelTransferIsEnabled(modelWeightsTransferArgs) + + const modelId = await assertTrainingStarts(modelWeightsTransferArgs, grocery_dataset) + + // ensure model download fails if model is not created yet + await assertModelWeightsDownloadFails(modelWeightsTransferArgs, modelId, 'MODEL_NOT_FOUND') + + // ensure model works + await assertTrainingFinishes(modelWeightsTransferArgs, modelId) + await assertModelsInclude(modelWeightsTransferArgs, [modelId]) + await assertIntentPredictionWorks( + modelWeightsTransferArgs, + modelId, + grocery_test_sample.utterance, + grocery_test_sample.intent + ) + + // download model locally + const cachePath = getE2ECachePath(modelWeightsTransferArgs.appId) + const fileLocation = path.join(cachePath, `${modelId}.model`) + await assertModelWeightsDownload(modelWeightsTransferArgs, modelId, fileLocation) + + // prune model remotly and ensure prediction does not work + await assertModelsPrune(modelWeightsTransferArgs) + await assertPredictionFails(modelWeightsTransferArgs, modelId, grocery_test_sample.utterance, 'model_not_found') + + // upload model and ensure prediction works again + await assertModelWeightsUpload(modelWeightsTransferArgs, fileLocation) + await assertModelsInclude(modelWeightsTransferArgs, [modelId]) + await assertIntentPredictionWorks( + modelWeightsTransferArgs, + modelId, + grocery_test_sample.utterance, + grocery_test_sample.intent + ) + + // ensure uploading a corrupted buffer fails + const originalModelWeights = await fs.promises.readFile(fileLocation) + const corruptedWeights = corruptBuffer(originalModelWeights) + const corruptedFileLocation = path.join(cachePath, `${modelId}.corrupted.model`) + await fs.promises.writeFile(corruptedFileLocation, corruptedWeights) + await assertModelWeightsUploadFails(modelWeightsTransferArgs, corruptedFileLocation, 'INVALID_MODEL_FORMAT') + + // ensure uploading a older version buffer fails + const specHash = modelId.split('.')[1] + const dummySpecHash = 'ffffff9999999999' + const deprecatedWeights = bufferReplace( + originalModelWeights, + Buffer.from(specHash, 'utf8'), + Buffer.from(dummySpecHash, 'utf8') + ) + const deprecatedFileLocation = path.join(cachePath, `${modelId}.deprecated.model`) + await fs.promises.writeFile(deprecatedFileLocation, deprecatedWeights) + await assertModelWeightsUploadFails(modelWeightsTransferArgs, deprecatedFileLocation, 'UNSUPORTED_MODEL_SPEC') + + // cleanup + await assertModelsPrune(modelWeightsTransferArgs) + } +} diff --git a/packages/nlu-e2e/src/tests/prediction.ts b/packages/nlu-e2e/src/tests/prediction.ts new file mode 100644 index 00000000..146bc9f3 --- /dev/null +++ b/packages/nlu-e2e/src/tests/prediction.ts @@ -0,0 +1,35 @@ +import { AssertionArgs, Test } from 'src/typings' +import { + assertIntentPredictionWorks, + assertModelsInclude, + assertModelsPrune, + assertTrainingFinishes, + assertTrainingStarts +} from '../assertions' +import { grocery_dataset, grocery_test_sample } from '../datasets' + +const NAME = 'prediction' + +export const predictionTest: Test = { + name: NAME, + handler: async (args: AssertionArgs) => { + const { logger } = args + logger.info(`Running test: ${NAME}`) + const predictionLogger = logger.sub(NAME) + const predictionArgs = { ...args, logger: predictionLogger } + + const grocery_modelId = await assertTrainingStarts(predictionArgs, grocery_dataset) + await assertTrainingFinishes(predictionArgs, grocery_modelId) + + await assertModelsInclude(predictionArgs, [grocery_modelId]) + + await assertIntentPredictionWorks( + predictionArgs, + grocery_modelId, + grocery_test_sample.utterance, + grocery_test_sample.intent + ) + + await assertModelsPrune(predictionArgs) + } +} diff --git a/packages/nlu-e2e/src/tests/training.ts b/packages/nlu-e2e/src/tests/training.ts new file mode 100644 index 00000000..6ab24605 --- /dev/null +++ b/packages/nlu-e2e/src/tests/training.ts @@ -0,0 +1,22 @@ +import _ from 'lodash' +import { Test, AssertionArgs } from 'src/typings' +import { assertModelsPrune, assertQueueTrainingFails } from '../assertions' +import { grocery_dataset } from '../datasets' + +const NAME = 'training-errors' + +export const trainingErrorsTest: Test = { + name: NAME, + handler: async (args: AssertionArgs) => { + const { logger } = args + logger.info(`Running test: ${NAME}`) + const trainingLogger = logger.sub(NAME) + const trainingArgs = { ...args, logger: trainingLogger } + + const invalidDataset = _.cloneDeep(grocery_dataset) + invalidDataset.intents[0].slots.push({ name: 'some-slot', entities: ['non-existent-entity'] }) + await assertQueueTrainingFails(trainingArgs, invalidDataset, 'dataset_format') + await assertQueueTrainingFails(trainingArgs, { ...grocery_dataset, language: 'ab' }, 'dataset_format') + await assertModelsPrune(args) + } +} diff --git a/packages/nlu-e2e/src/typings.ts b/packages/nlu-e2e/src/typings.ts new file mode 100644 index 00000000..1ddab9b1 --- /dev/null +++ b/packages/nlu-e2e/src/typings.ts @@ -0,0 +1,15 @@ +import { Client as NLUClient } from '@botpress/nlu-client' +import { Logger } from '@bpinternal/log4bot' + +export type AssertionArgs = { + client: NLUClient + logger: Logger + appId: string +} + +export type TestHandler = (args: AssertionArgs) => Promise + +export type Test = { + name: string + handler: TestHandler +} diff --git a/packages/nlu-e2e/src/utils.ts b/packages/nlu-e2e/src/utils.ts new file mode 100644 index 00000000..4c4bfe4d --- /dev/null +++ b/packages/nlu-e2e/src/utils.ts @@ -0,0 +1,157 @@ +import { Client as NLUClient, IssueComputationSpeed, LintingState, TrainingState } from '@botpress/nlu-client' +import { Logger } from '@bpinternal/log4bot' +import Bluebird from 'bluebird' +import crypto from 'crypto' +import fs from 'fs' +import path from 'path' +import { getAppDataPath } from './app-data' +import { UnsuccessfullAPICall } from './errors' + +export type TrainLintPredicate = (state: T) => boolean +export type PollingArgs = { + appId: string + modelId: string + nluClient: NLUClient + condition: TrainLintPredicate + maxTime: number +} + +const DEFAULT_POLLING_INTERVAL = 500 +const E2E_CACHE_DIR = 'e2e' + +const timeout = (ms: number) => + new Promise((_resolve, reject) => + setTimeout(() => { + reject(new Error(`Timeout of ${ms} ms reached`)) + }, ms) + ) + +export const pollTrainingUntil = async (args: PollingArgs): Promise => { + const { appId, condition, maxTime, modelId, nluClient } = args + const interval = maxTime < 0 ? DEFAULT_POLLING_INTERVAL : maxTime / 20 + + const trainUntilPromise = new Promise((resolve, reject) => { + const int = setInterval(async () => { + try { + const trainStatusRes = await nluClient.getTrainingStatus(appId, modelId) + if (!trainStatusRes.success) { + clearInterval(int) + reject(new UnsuccessfullAPICall(trainStatusRes.error)) + return + } + + const { session } = trainStatusRes + if (condition(session)) { + clearInterval(int) + resolve(session) + return + } + } catch (thrown) { + clearInterval(int) + reject(thrown) + } + }, interval) + }) + + if (maxTime < 0) { + return trainUntilPromise + } + return Bluebird.race([timeout(maxTime), trainUntilPromise]) +} + +export const pollLintingUntil = async ( + args: PollingArgs & { speed: IssueComputationSpeed } +): Promise => { + const { appId, condition, maxTime, modelId, nluClient, speed } = args + const interval = maxTime < 0 ? DEFAULT_POLLING_INTERVAL : maxTime / 20 + + const lintUntilPromise = new Promise((resolve, reject) => { + const int = setInterval(async () => { + try { + const lintStatusRes = await nluClient.getLintingStatus(appId, modelId, speed) + if (!lintStatusRes.success) { + clearInterval(int) + reject(new UnsuccessfullAPICall(lintStatusRes.error)) + return + } + + const { session } = lintStatusRes + if (condition(session)) { + clearInterval(int) + resolve(session) + return + } + } catch (thrown) { + clearInterval(int) + reject(thrown) + } + }, interval) + }) + + if (maxTime < 0) { + return lintUntilPromise + } + return Bluebird.race([timeout(maxTime), lintUntilPromise]) +} + +export const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)) + +export const getE2ECachePath = (appId?: string) => { + const bpCachePath = getAppDataPath() + if (!appId) { + return path.join(bpCachePath, E2E_CACHE_DIR) + } + const uriEncodedAppId = encodeURIComponent(appId) + return path.join(bpCachePath, E2E_CACHE_DIR, uriEncodedAppId) +} + +export const syncE2ECachePath = async (logger: Logger, appId?: string) => { + const bpCachePath = getAppDataPath() + if (!fs.existsSync(bpCachePath)) { + throw new Error('APP_DATA_PATH does not exist') + } + + const e2eCachePath = getE2ECachePath() + if (!fs.existsSync(e2eCachePath)) { + logger.info('making e2e cache directory') + await fs.promises.mkdir(e2eCachePath) + } else { + logger.debug('e2e cache directory already exists') + } + + if (appId) { + const uriEncodedAppId = encodeURIComponent(appId) + await fs.promises.mkdir(path.join(e2eCachePath, uriEncodedAppId)) + } +} + +export const corruptBuffer = (buffer: Buffer): Buffer => { + const algorithm = 'aes-256-gcm' + const key = crypto.randomBytes(32) + const iv = crypto.randomBytes(16) + + const cipher = crypto.createCipheriv(algorithm, Buffer.from(key), iv) + let encrypted = cipher.update(buffer) + + encrypted = Buffer.concat([encrypted, cipher.final()]) + + return encrypted +} + +export const bufferReplace = (buffer: Buffer, from: Buffer, to: Buffer): Buffer => { + const patternStart = buffer.indexOf(from) + if (patternStart < 0) { + return buffer + } + + const patternEnd = patternStart + from.length + + let result = Buffer.from([]) + buffer.copy(result) + + result = Buffer.concat([result, buffer.slice(0, patternStart)]) + result = Buffer.concat([result, to]) + result = Buffer.concat([result, buffer.slice(patternEnd, buffer.length)]) + + return result +} diff --git a/packages/nlu-e2e/tsconfig.json b/packages/nlu-e2e/tsconfig.json new file mode 100644 index 00000000..e5ee8cf1 --- /dev/null +++ b/packages/nlu-e2e/tsconfig.json @@ -0,0 +1,16 @@ +{ + "extends": "../../tsconfig.packages.json", + "references": [ + { + "path": "../nlu-client" + } + ], + "compilerOptions": { + "outDir": "./dist" /* Redirect output structure to the directory. */, + "rootDir": "./src" /* Specify the root directory of input files. Use to control the output directory structure with --outDir. */, + "forceConsistentCasingInFileNames": true /* Disallow inconsistently-cased references to the same file. */, + "baseUrl": ".", + "composite": true, + "typeRoots": ["./node_modules/@types", "../../node_modules/@types"] + } +} diff --git a/packages/nlu-engine/jest.config.js b/packages/nlu-engine/jest.config.js deleted file mode 100644 index 61ea9c96..00000000 --- a/packages/nlu-engine/jest.config.js +++ /dev/null @@ -1,11 +0,0 @@ -module.exports = { - preset: 'ts-jest', - testEnvironment: 'node', - testPathIgnorePatterns: ['dist', 'node_modules'], - rootDir: '.', - resetModules: true, - verbose: true, - modulePaths: ['/src/'], - moduleFileExtensions: ['js', 'json', 'jsx', 'ts', 'tsx', 'd.ts'], - modulePathIgnorePatterns: ['out'] -} diff --git a/packages/nlu-engine/package.json b/packages/nlu-engine/package.json index be5aa904..58f1bc2f 100644 --- a/packages/nlu-engine/package.json +++ b/packages/nlu-engine/package.json @@ -1,18 +1,21 @@ { "name": "@botpress/nlu-engine", - "version": "0.1.6", + "version": "0.1.7", "author": "Botpress, Inc.", "license": "AGPL-3.0", "scripts": { "build": "tsc --build", - "test": "cross-env jest -i --detectOpenHandles -c jest.config.js" + "test": "jest --roots ./dist", + "clean": "rimraf ./dist && rimraf ./node_modules" }, "dependencies": { "@botpress/node-crfsuite": "*", "@botpress/node-fasttext": "*", "@botpress/node-sentencepiece": "*", "@botpress/node-svm": "*", + "@bpinternal/ptb-schema": "^0.0.2", "@botpress/worker": "*", + "@botpress/lang-client": "*", "@microsoft/recognizers-text-suite": "^1.3.0", "@types/nanoid": "^3.0.0", "axios": "^0.21.1", @@ -33,10 +36,9 @@ "numeric": "^1.2.6", "object-sizeof": "^1.5.2", "seedrandom": "^3.0.5", - "semver": "^5.6.0", "tmp": "^0.0.33", "verror": "^1.10.0", - "yn": "^2.0.0" + "yn": "^4.0.0" }, "devDependencies": { "@types/bluebird-retry": "^0.11.4", @@ -47,7 +49,7 @@ "@types/lodash": "^4.14.116", "@types/lru-cache": "^5.1.0", "@types/ms": "^0.7.30", - "@types/node": "^12.13.0", + "@types/node": "^16.11.10", "@types/numeric": "^1.2.1", "@types/seedrandom": "^2.4.28", "@types/tmp": "^0.0.33", @@ -60,8 +62,7 @@ "eslint-plugin-jsdoc": "^32.3.2", "jest": "^24.9.0", "prettier": "^2.2.1", - "ts-jest": "^26.5.5", - "typescript": "^3.9.10" + "typescript": "^5.0.4" }, "types": "./src/typings.d.ts", "main": "./dist/index.js" diff --git a/packages/nlu-engine/src/component.ts b/packages/nlu-engine/src/component.ts new file mode 100644 index 00000000..e6a4739e --- /dev/null +++ b/packages/nlu-engine/src/component.ts @@ -0,0 +1,18 @@ +import * as ptb from '@bpinternal/ptb-schema' + +type Predictor = { + predict: (u: PredictInput) => Promise +} + +export type PipelineComponent, PredictInput, PredictOutput> = Predictor< + PredictInput, + PredictOutput +> & { + readonly name: string + readonly modelType: Model + train: (input: TrainInput, progress: (p: number) => void) => Promise> + load: (model: ptb.Infer) => Promise +} + +export type PredictorOf = C extends PipelineComponent ? Predictor : never +export type ModelOf, any, any>> = ptb.Infer diff --git a/packages/nlu-engine/src/constants.ts b/packages/nlu-engine/src/constants.ts new file mode 100644 index 00000000..7ec5a533 --- /dev/null +++ b/packages/nlu-engine/src/constants.ts @@ -0,0 +1,4 @@ +import { DUCKLING_ENTITIES } from './engine/entities/duckling-extractor/enums' + +export const SLOT_ANY = 'any' +export const SYSTEM_ENTITIES = DUCKLING_ENTITIES diff --git a/packages/nlu-engine/src/engine/clustering.ts b/packages/nlu-engine/src/engine/clustering.ts index f76f9eeb..1f77c209 100644 --- a/packages/nlu-engine/src/engine/clustering.ts +++ b/packages/nlu-engine/src/engine/clustering.ts @@ -1,6 +1,6 @@ import _ from 'lodash' import nearestVector from 'ml-nearest-vector' -import { MLToolkit } from '../ml/typings' +import * as MLToolkit from '../ml/toolkit' import { euclideanDistanceSquared } from './tools/math' import { Intent, SerializedKmeansResult, Tools } from './typings' @@ -14,14 +14,11 @@ const KMEANS_OPTIONS = { distanceFunction: euclideanDistanceSquared } -const NONE_INTENT = 'none' - export const computeKmeans = ( intents: Intent[], tools: Tools ): MLToolkit.KMeans.KmeansResult | undefined => { const data = _.chain(intents) - .filter((i) => i.name !== NONE_INTENT) .flatMap((i) => i.utterances) .flatMap((u) => u.tokens) .uniqBy((t: UtteranceToken) => t.value) diff --git a/packages/nlu-engine/src/engine/entities/custom-extractor/index.ts b/packages/nlu-engine/src/engine/entities/custom-extractor/index.ts index fe30dccd..2a924e82 100644 --- a/packages/nlu-engine/src/engine/entities/custom-extractor/index.ts +++ b/packages/nlu-engine/src/engine/entities/custom-extractor/index.ts @@ -1,20 +1,20 @@ import _ from 'lodash' + import { extractPattern } from '../../tools/patterns-utils' import { EntityExtractionResult, ListEntityModel, PatternEntity, WarmedListEntityModel } from '../../typings' -import Utterance from '../../utterance/utterance' import { extractForListModel } from './list-extraction' -import { serializeUtteranceToken } from './serializable-token' +import { keepTokenProperties, Utterance } from './token' -interface SplittedModels { +type SplittedModels = { withCacheHit: WarmedListEntityModel[] withCacheMiss: WarmedListEntityModel[] } export class CustomEntityExtractor { public extractListEntities(utterance: Utterance, list_entities: ListEntityModel[]): EntityExtractionResult[] { - const serializedTokens = utterance.tokens.map(serializeUtteranceToken) + const tokens = utterance.tokens.map(keepTokenProperties) return _(list_entities) - .map((model) => extractForListModel(serializedTokens, model)) + .map((model) => extractForListModel(tokens, model)) .flatten() .value() } @@ -24,7 +24,6 @@ export class CustomEntityExtractor { pattern_entities: PatternEntity[] ): EntityExtractionResult[] => { const input = utterance.toString() - // taken from pattern_extractor return _.flatMap(pattern_entities, (ent) => { const regex = new RegExp(ent.pattern!, ent.matchCase ? '' : 'i') @@ -82,8 +81,8 @@ export class CustomEntityExtractor { const extractedMatches: EntityExtractionResult[] = _(withCacheMiss) .map((model) => { - const serializedTokens = utterance.tokens.map(serializeUtteranceToken) - const extractions = extractForListModel(serializedTokens, model) + const tokens = utterance.tokens.map(keepTokenProperties) + const extractions = extractForListModel(tokens, model) model.cache.set(cacheKey, extractions) return extractions }) diff --git a/packages/nlu-engine/src/engine/entities/custom-extractor/list-extraction.ts b/packages/nlu-engine/src/engine/entities/custom-extractor/list-extraction.ts index 083c0d2a..1bcd70bf 100644 --- a/packages/nlu-engine/src/engine/entities/custom-extractor/list-extraction.ts +++ b/packages/nlu-engine/src/engine/entities/custom-extractor/list-extraction.ts @@ -1,15 +1,11 @@ import _ from 'lodash' import { jaroWinklerSimilarity, levenshteinSimilarity } from '../../tools/strings' import { EntityExtractionResult, ListEntityModel } from '../../typings' -import { SerializableUtteranceToken, tokenToString } from './serializable-token' +import { ListEntityUtteranceToken, tokenToString } from './token' const ENTITY_SCORE_THRESHOLD = 0.6 -function takeUntil( - arr: SerializableUtteranceToken[], - start: number, - desiredLength: number -): SerializableUtteranceToken[] { +function takeUntil(arr: ListEntityUtteranceToken[], start: number, desiredLength: number): ListEntityUtteranceToken[] { let total = 0 const result = _.takeWhile(arr.slice(start), (t) => { const toAdd = tokenToString(t).length @@ -71,7 +67,7 @@ function computeStructuralScore(a: string[], b: string[]): number { return Math.sqrt(final_charset_score * token_qty_score * token_size_score) } -interface Candidate { +type Candidate = { score: number canonical: string start: number @@ -82,7 +78,7 @@ interface Candidate { } export function extractForListModel( - tokens: SerializableUtteranceToken[], + tokens: ListEntityUtteranceToken[], listModel: ListEntityModel ): EntityExtractionResult[] { const candidates: Candidate[] = [] @@ -94,7 +90,12 @@ export function extractForListModel( if (tokens[i].isSpace) { continue } - const workset = takeUntil(tokens, i, _.sumBy(occurrence, 'length')) + + const workset = takeUntil( + tokens, + i, + _.sumBy(occurrence, (o) => o.length) + ) const worksetStrLow = workset.map((x) => tokenToString(x, { lowerCase: true, realSpaces: true, trim: false })) const worksetStrWCase = workset.map((x) => tokenToString(x, { lowerCase: false, realSpaces: true, trim: false }) @@ -142,7 +143,7 @@ export function extractForListModel( } } - return candidates + const results: EntityExtractionResult[] = candidates .filter((x) => !x.eliminated && x.score >= ENTITY_SCORE_THRESHOLD) .map((match) => ({ confidence: match.score, @@ -157,5 +158,6 @@ export function extractForListModel( }, sensitive: listModel.sensitive, type: listModel.entityName - })) as EntityExtractionResult[] + })) + return results } diff --git a/packages/nlu-engine/src/engine/entities/custom-extractor/multi-thread-extractor.ts b/packages/nlu-engine/src/engine/entities/custom-extractor/multi-thread-extractor.ts index 28e424c3..558d8925 100644 --- a/packages/nlu-engine/src/engine/entities/custom-extractor/multi-thread-extractor.ts +++ b/packages/nlu-engine/src/engine/entities/custom-extractor/multi-thread-extractor.ts @@ -4,40 +4,39 @@ import _ from 'lodash' import os from 'os' import { Logger } from 'src/typings' import { EntityExtractionResult, ListEntityModel, WarmedListEntityModel } from '../../typings' -import Utterance from '../../utterance/utterance' import { CustomEntityExtractor } from '.' -import { SerializableUtteranceToken, serializeUtteranceToken } from './serializable-token' import { ENTRY_POINT } from './thread-entry-point' +import { ListEntityUtteranceToken, keepTokenProperties, Utterance } from './token' const maxMLThreads = Math.max(os.cpus().length - 1, 1) // ncpus - webworker const userMlThread = process.env.BP_NUM_ML_THREADS ? Number(process.env.BP_NUM_ML_THREADS) : 4 const numMLThreads = Math.min(maxMLThreads, userMlThread) -interface TaskUnitInput { +type TaskUnitInput = { utt_idx: number entity_idx: number utterance: Utterance list_entity: WarmedListEntityModel } -interface SerializableTaskUnitInput { +type SerializableTaskUnitInput = { utt_idx: number entity_idx: number - tokens: SerializableUtteranceToken[] + tokens: ListEntityUtteranceToken[] list_entity: ListEntityModel } -interface TaskUnitOutput { +type TaskUnitOutput = { utt_idx: number entity_idx: number entities: EntityExtractionResult[] } -export interface TaskInput { +export type TaskInput = { units: SerializableTaskUnitInput[] } -export interface TaskOutput { +export type TaskOutput = { units: TaskUnitOutput[] } @@ -102,7 +101,7 @@ export class MultiThreadCustomEntityExtractor extends CustomEntityExtractor { const { entity_idx, utt_idx, utterance, list_entity: warmedModel } = unit const { cache, ...coldModel } = warmedModel const { tokens } = utterance - return { entity_idx, utt_idx, tokens: tokens.map(serializeUtteranceToken), list_entity: coldModel } + return { entity_idx, utt_idx, tokens: tokens.map(keepTokenProperties), list_entity: coldModel } } private _splitUnitsByCacheHitOrMiss(units: TaskUnitInput[]): [TaskUnitInput[], TaskUnitInput[]] { diff --git a/packages/nlu-engine/src/engine/entities/custom-extractor/serializable-token.ts b/packages/nlu-engine/src/engine/entities/custom-extractor/serializable-token.ts deleted file mode 100644 index 67c10fc4..00000000 --- a/packages/nlu-engine/src/engine/entities/custom-extractor/serializable-token.ts +++ /dev/null @@ -1,29 +0,0 @@ -import { convertToRealSpaces } from '../../tools/token-utils' -import { DefaultTokenToStringOptions, TokenToStringOptions, UtteranceToken } from '../../utterance/utterance' - -export type SerializableUtteranceToken = Omit - -export const serializeUtteranceToken = (token: UtteranceToken): SerializableUtteranceToken => { - const { toString, ...otherFields } = token - return { ...otherFields } -} - -/** - * - * @description Copied from UtteranceToken.toString() - * @returns a string - */ -export const tokenToString = (token: SerializableUtteranceToken, opts: Partial = {}) => { - const options = { ...DefaultTokenToStringOptions, ...opts } - let result = token.value - if (options.lowerCase) { - result = result.toLowerCase() - } - if (options.realSpaces) { - result = convertToRealSpaces(result) - } - if (options.trim) { - result = result.trim() - } - return result -} diff --git a/packages/nlu-engine/src/engine/entities/custom-extractor/thread-entry-point.ts b/packages/nlu-engine/src/engine/entities/custom-extractor/thread-entry-point.ts index 437eaa14..54c03393 100644 --- a/packages/nlu-engine/src/engine/entities/custom-extractor/thread-entry-point.ts +++ b/packages/nlu-engine/src/engine/entities/custom-extractor/thread-entry-point.ts @@ -8,7 +8,7 @@ const threadEntryPoint = makeThreadEntryPoint() const main = async () => { try { - threadEntryPoint.listenForTask(async (taskDef: TaskDefinition) => { + threadEntryPoint.listenForTask(async (taskDef: TaskDefinition) => { const { input, progress } = taskDef let i = 0 const N = input.units.length @@ -22,7 +22,8 @@ const main = async () => { }) await threadEntryPoint.initialize() - } catch (err) { + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) threadEntryPoint.logger.error('An unhandled error occured in the thread', err) process.exit(1) } diff --git a/packages/nlu-engine/src/engine/entities/custom-extractor/token.ts b/packages/nlu-engine/src/engine/entities/custom-extractor/token.ts new file mode 100644 index 00000000..b486285c --- /dev/null +++ b/packages/nlu-engine/src/engine/entities/custom-extractor/token.ts @@ -0,0 +1,39 @@ +import { convertToRealSpaces } from '../../tools/token-utils' +import { DefaultTokenToStringOptions, TokenToStringOptions, UtteranceToStringOptions } from '../../utterance/utterance' + +export type ListEntityUtteranceToken = Readonly<{ + value: string + isWord: boolean + isSpace: boolean + offset: number +}> + +export type Utterance = { + tokens: ReadonlyArray + toString: (opt?: Partial) => string +} + +export const keepTokenProperties = (token: ListEntityUtteranceToken): ListEntityUtteranceToken => { + const { value, isWord, isSpace, offset } = token + return { value, isWord, isSpace, offset } +} + +/** + * + * @description Copied from UtteranceToken.toString() + * @returns a string + */ +export const tokenToString = (token: ListEntityUtteranceToken, opts: Partial = {}) => { + const options = { ...DefaultTokenToStringOptions, ...opts } + let result = token.value + if (options.lowerCase) { + result = result.toLowerCase() + } + if (options.realSpaces) { + result = convertToRealSpaces(result) + } + if (options.trim) { + result = result.trim() + } + return result +} diff --git a/packages/nlu-engine/src/engine/entities/duckling-extractor/duckling-client.ts b/packages/nlu-engine/src/engine/entities/duckling-extractor/duckling-client.ts index ae25eeb7..614c1914 100644 --- a/packages/nlu-engine/src/engine/entities/duckling-extractor/duckling-client.ts +++ b/packages/nlu-engine/src/engine/entities/duckling-extractor/duckling-client.ts @@ -1,57 +1,50 @@ -import Axios, { AxiosInstance } from 'axios' +import axios, { AxiosInstance } from 'axios' import retry from 'bluebird-retry' import httpsProxyAgent from 'https-proxy-agent' import _ from 'lodash' -import { Logger } from '../../../typings' - +import { DucklingServerError } from '../../errors' import { Duckling } from './typings' -export interface DucklingParams { +export type DucklingParams = { tz: string refTime: number lang: string } -const DISABLED_MSG = `, so it will be disabled. -For more information (or if you want to self-host it), please check the docs at -https://botpress.com/docs/build/nlu/#system-entities -` - -const RETRY_POLICY = { backoff: 2, max_tries: 3, timeout: 500 } +const RETRY_POLICY: retry.Options = { backoff: 2, max_tries: 3, timeout: 500 } export class DucklingClient { - public static client: AxiosInstance - - constructor(private logger?: Logger) {} + private _client: AxiosInstance - public static async init(url: string, logger?: Logger): Promise { + constructor(url: string) { const proxyConfig = process.env.PROXY ? { httpsAgent: new httpsProxyAgent(process.env.PROXY) } : {} - this.client = Axios.create({ + this._client = axios.create({ baseURL: url, headers: { 'Content-Type': 'application/x-www-form-urlencoded' }, ...proxyConfig }) + } + public async init(): Promise { let success = false try { await retry(async () => { - const { data } = await this.client.get('/') + const { data } = await this._client.get('/') if (data !== 'quack!') { - return logger && logger.warning(`Bad response from Duckling server ${DISABLED_MSG}`) + throw new DucklingServerError('Bad response from Duckling server') } success = true }, RETRY_POLICY) - } catch (err) { - logger && logger.warning(`Couldn't reach the Duckling server ${DISABLED_MSG}`, err) + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + throw this._mapError(new Error(`Couldn't reach the Duckling server. ${err.message}`)) } - - return success } public async fetchDuckling(text: string, { lang, tz, refTime }: DucklingParams): Promise { try { return await retry(async () => { - const { data } = await DucklingClient.client.post( + const { data } = await this._client.post( '/parse', `lang=${lang}&text=${encodeURI(text)}&reftime=${refTime}&tz=${tz}` ) @@ -63,9 +56,16 @@ export class DucklingClient { return data }, RETRY_POLICY) } catch (err) { - const error = err.response ? err.response.data : err - this.logger && this.logger.warning('Error extracting duckling entities', error) - return [] + throw this._mapError(err) + } + } + + private _mapError = (thrown: any): Error => { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + if (err instanceof DucklingServerError) { + return err } + const { message, stack } = err + return new DucklingServerError(message, stack) } } diff --git a/packages/nlu-engine/src/engine/entities/duckling-extractor/duckling-extractor.test.ts b/packages/nlu-engine/src/engine/entities/duckling-extractor/duckling-extractor.test.ts index 09ada378..aa02d658 100644 --- a/packages/nlu-engine/src/engine/entities/duckling-extractor/duckling-extractor.test.ts +++ b/packages/nlu-engine/src/engine/entities/duckling-extractor/duckling-extractor.test.ts @@ -1,25 +1,29 @@ +import { unlinkSync } from 'fs' import _ from 'lodash' import path from 'path' -import { unlinkSync } from 'fs' -import { DucklingEntityExtractor } from '.' import { JOIN_CHAR } from '../../tools/token-utils' -import { SystemEntityCacheManager } from '../entity-cache-manager' +import { SystemEntityCacheManager } from '../entity-cache' +import { DucklingEntityExtractor } from '.' +import { DucklingClient } from './duckling-client' + +class FakeDucklingClient extends DucklingClient { + public async init() {} +} describe('Duckling Extract Multiple', () => { let duck: DucklingEntityExtractor let mockedFetch: jest.SpyInstance - let testCachePath = path.join(' ', 'cache', 'testCache.json') + const testCachePath = path.join(' ', 'cache', 'testCache.json') beforeAll(() => { const duckCache = new SystemEntityCacheManager(testCachePath, false) - duck = new DucklingEntityExtractor(duckCache) + duck = new DucklingEntityExtractor(duckCache, new FakeDucklingClient('')) // @ts-ignore mockedFetch = jest.spyOn(duck, '_fetchDuckling') }) beforeEach(async () => { - await duck.configure(true, '') + await duck.init() duck.resetCache() - duck.enable() }) afterEach(() => { @@ -32,16 +36,6 @@ describe('Duckling Extract Multiple', () => { const dummyProgress = (p: number) => {} - test('When disabled returns empty array for each input', async () => { - duck.disable() - const examples = ['this is one', 'this is two'] - const res = await duck.extractMultiple(examples, 'en', dummyProgress) - expect(mockedFetch).not.toHaveBeenCalled() - res.forEach((r) => { - expect(r).toEqual([]) - }) - }) - test('calls extract with join char', async () => { const examples = ['this is one', 'this is two'] mockedFetch.mockResolvedValue([]) diff --git a/packages/nlu-engine/src/engine/entities/duckling-extractor/index.ts b/packages/nlu-engine/src/engine/entities/duckling-extractor/index.ts index cc77ba95..917dbf47 100644 --- a/packages/nlu-engine/src/engine/entities/duckling-extractor/index.ts +++ b/packages/nlu-engine/src/engine/entities/duckling-extractor/index.ts @@ -1,10 +1,9 @@ import Bluebird from 'bluebird' import _ from 'lodash' -import { Logger } from '../../../typings' import { extractPattern } from '../../tools/patterns-utils' import { JOIN_CHAR } from '../../tools/token-utils' import { EntityExtractionResult, KeyedItem, SystemEntityExtractor } from '../../typings' -import { SystemEntityCacheManager } from '../entity-cache-manager' +import { SystemEntityCacheManager } from '../entity-cache' import { DucklingClient, DucklingParams } from './duckling-client' import { DUCKLING_ENTITIES } from './enums' import { mapDucklingToEntity } from './map-duckling' @@ -15,35 +14,19 @@ const BATCH_SIZE = 10 // 1- in _extractBatch, shift results ==> don't walk whole array n times (nlog(n) vs n2) export class DucklingEntityExtractor implements SystemEntityExtractor { - private _enabled: boolean - private _provider: DucklingClient - - public enable() { - this._enabled = true - } - public disable() { - this._enabled = false - } - - constructor(private _cache: SystemEntityCacheManager, private readonly logger?: Logger) { - this._enabled = false - this.logger = logger - this._provider = new DucklingClient(logger) - } + constructor(private _cache: SystemEntityCacheManager, private _ducklingClient: DucklingClient) {} public resetCache() { this._cache.reset() } public get entityTypes(): string[] { - return this._enabled ? DUCKLING_ENTITIES : [] + return DUCKLING_ENTITIES } - public async configure(enabled: boolean, url: string) { - if (enabled) { - this._enabled = await DucklingClient.init(url, this.logger) - await this._cache.restoreCache() - } + public async init() { + await this._ducklingClient.init() + await this._cache.restoreCache() } public async extractMultiple( @@ -52,10 +35,6 @@ export class DucklingEntityExtractor implements SystemEntityExtractor { progress: (p: number) => void, useCache?: boolean ): Promise { - if (!this._enabled) { - return Array(inputs.length).fill([]) - } - const options = { lang, tz: this._getTz(), @@ -75,7 +54,7 @@ export class DucklingEntityExtractor implements SystemEntityExtractor { return _.chain(batchedRes) .flatten() .concat(cached) - .orderBy('idx') + .orderBy((x) => x.idx) .map((x) => x.entities!) .value() } @@ -111,7 +90,7 @@ export class DucklingEntityExtractor implements SystemEntityExtractor { } private async _fetchDuckling(text: string, params: DucklingParams): Promise { - const duckReturn = await this._provider.fetchDuckling(text, params) + const duckReturn = await this._ducklingClient.fetchDuckling(text, params) return duckReturn.map(mapDucklingToEntity) } diff --git a/packages/nlu-engine/src/engine/entities/duckling-extractor/typings.ts b/packages/nlu-engine/src/engine/entities/duckling-extractor/typings.ts index f857c4e1..fba1cea0 100644 --- a/packages/nlu-engine/src/engine/entities/duckling-extractor/typings.ts +++ b/packages/nlu-engine/src/engine/entities/duckling-extractor/typings.ts @@ -1,6 +1,10 @@ +/** + * Approximate typings of Duckling Server response + */ + export type Duckling = DucklingReturn -export interface DucklingReturn { +export type DucklingReturn = { start: number end: number dim: D @@ -36,7 +40,6 @@ type DucklingValueInfo = D ? Value : ValueUnit -// Not sure yet, but I feel like if property `values` is defined, then root properties are also... type DucklingTimeValue = T extends 'interval' ? TimeInterval & { values?: ({ type: 'interval' } & TimeInterval)[] } : ValueGrain & { values?: ({ type: 'value' } & ValueGrain)[] } @@ -54,6 +57,6 @@ export type ValueUnit = Value & { unit: string } -export interface Value { +export type Value = { value: string | number } diff --git a/packages/nlu-engine/src/engine/entities/dummy-system-extractor.ts b/packages/nlu-engine/src/engine/entities/dummy-system-extractor.ts new file mode 100644 index 00000000..c1d6d0f8 --- /dev/null +++ b/packages/nlu-engine/src/engine/entities/dummy-system-extractor.ts @@ -0,0 +1,16 @@ +import { SystemEntityExtractor, EntityExtractionResult } from '../typings' + +export class DummySystemEntityExtractor implements SystemEntityExtractor { + public async extractMultiple( + input: string[], + lang: string, + progress: (p: number) => void, + useCache?: boolean | undefined + ): Promise { + return Array(input.length).fill([]) + } + + public async extract(input: string, lang: string): Promise { + return [] + } +} diff --git a/packages/nlu-engine/src/engine/entities/entity-cache-manager.test.ts b/packages/nlu-engine/src/engine/entities/entity-cache.test.ts similarity index 95% rename from packages/nlu-engine/src/engine/entities/entity-cache-manager.test.ts rename to packages/nlu-engine/src/engine/entities/entity-cache.test.ts index 75ccc978..e0a6d9ab 100644 --- a/packages/nlu-engine/src/engine/entities/entity-cache-manager.test.ts +++ b/packages/nlu-engine/src/engine/entities/entity-cache.test.ts @@ -1,11 +1,11 @@ import { unlinkSync } from 'fs' import path from 'path' import { EntityExtractionResult } from '../typings' -import { SystemEntityCacheManager } from './entity-cache-manager' +import { SystemEntityCacheManager } from './entity-cache' describe('System Entity Cache', () => { let testCache: SystemEntityCacheManager - let testCachePath = path.join(' ', 'cache', 'testCache.json') + const testCachePath = path.join(' ', 'cache', 'testCache.json') let inputExemples: string[] beforeEach(async () => { testCache = new SystemEntityCacheManager(testCachePath, false) @@ -112,7 +112,7 @@ describe('System Entity Cache', () => { }) test('Cache is restored', async () => { - testCache.restoreCache() + await testCache.restoreCache() const [testCached, testToFetch] = testCache.splitCacheHitFromCacheMiss(inputExemples, true) diff --git a/packages/nlu-engine/src/engine/entities/entity-cache-manager.ts b/packages/nlu-engine/src/engine/entities/entity-cache.ts similarity index 79% rename from packages/nlu-engine/src/engine/entities/entity-cache-manager.ts rename to packages/nlu-engine/src/engine/entities/entity-cache.ts index c478e63e..17177f28 100644 --- a/packages/nlu-engine/src/engine/entities/entity-cache-manager.ts +++ b/packages/nlu-engine/src/engine/entities/entity-cache.ts @@ -4,11 +4,7 @@ import LRUCache from 'lru-cache' import ms from 'ms' import sizeof from 'object-sizeof' import { Logger } from '../../typings' -import { ColdListEntityModel, EntityCache, EntityCacheDump, EntityExtractionResult, KeyedItem } from '../typings' - -interface CacheByName { - [name: string]: EntityCacheDump -} +import { EntityCache, EntityCacheDump, EntityExtractionResult, KeyedItem } from '../typings' export function warmEntityCache(coldCache: EntityCacheDump): EntityCache { const warmedCache = new LRUCache(1000) @@ -16,27 +12,6 @@ export function warmEntityCache(coldCache: EntityCacheDump): EntityCache { return warmedCache } -export class EntityCacheManager { - private cache: CacheByName = {} - - getCache(listEntity: string): EntityCacheDump { - if (!this.cache[listEntity]) { - this.cache[listEntity] = [] - } - return this.cache[listEntity] - } - - loadFromData(listEntities: ColdListEntityModel[]) { - for (const e of listEntities) { - this.setCache(e.entityName, e.cache) - } - } - - private setCache(listEntity: string, cache: EntityCacheDump) { - this.cache[listEntity] = cache - } -} - export class SystemEntityCacheManager { private _path: string private _cache: LRUCache @@ -100,7 +75,8 @@ export class SystemEntityCacheManager { if (this._dumpEnabled) { try { await this._dumpCache() - } catch (err) { + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) this._logger?.error(`Could not persist system entities cache, error ${err.message}`, err) this._dumpEnabled = false } diff --git a/packages/nlu-engine/src/engine/entities/list-entity-model.ts b/packages/nlu-engine/src/engine/entities/list-entity-model.ts new file mode 100644 index 00000000..c526cd3f --- /dev/null +++ b/packages/nlu-engine/src/engine/entities/list-entity-model.ts @@ -0,0 +1,28 @@ +import _ from 'lodash' +import { convertToRealSpaces } from '../tools/token-utils' +import { ListEntity, ListEntityModel, Tools } from '../typings' + +export async function makeListEntityModel(entity: ListEntity, languageCode: string, tools: Tools) { + const allValues = _.uniq(Object.keys(entity.synonyms).concat(..._.values(entity.synonyms))).map((t) => t.trim()) + const allTokens = (await tools.tokenize_utterances(allValues, languageCode)).map((toks) => + toks.map(convertToRealSpaces) + ) + + const mappingsTokens = _.mapValues(entity.synonyms, (synonyms, name) => + [...synonyms, name].map((syn) => { + const idx = allValues.indexOf(syn) + return allTokens[idx] + }) + ) + + const model: ListEntityModel = { + type: 'custom.list', + id: `custom.list.${entity.name}`, + entityName: entity.name, + fuzzyTolerance: entity.fuzzyTolerance, + sensitive: entity.sensitive, + mappingsTokens + } + + return model +} diff --git a/packages/nlu-engine/src/engine/entities/list-extractor.test.ts b/packages/nlu-engine/src/engine/entities/list-extractor.test.ts index 7a0b6994..daadfcaf 100644 --- a/packages/nlu-engine/src/engine/entities/list-extractor.test.ts +++ b/packages/nlu-engine/src/engine/entities/list-extractor.test.ts @@ -22,7 +22,6 @@ const list_entities: ListEntityModel[] = [ entityName: 'fruit', fuzzyTolerance: FuzzyTolerance.Medium, id: 'custom.list.fruit', - languageCode: 'en', mappingsTokens: { Blueberry: ['blueberries', 'blueberry', 'blue berries', 'blue berry', 'poisonous blueberry'].map(T), Strawberry: ['strawberries', 'strawberry', 'straw berries', 'straw berry'].map(T), @@ -36,7 +35,6 @@ const list_entities: ListEntityModel[] = [ entityName: 'company', fuzzyTolerance: FuzzyTolerance.Medium, id: 'custom.list.company', - languageCode: 'en', mappingsTokens: { Apple: ['Apple', 'Apple Computers', 'Apple Corporation', 'Apple Inc'].map(T) }, @@ -47,7 +45,6 @@ const list_entities: ListEntityModel[] = [ entityName: 'airport', fuzzyTolerance: FuzzyTolerance.Medium, id: 'custom.list.airport', - languageCode: 'en', mappingsTokens: { JFK: ['JFK', 'New-York', 'NYC'].map(T), SFO: ['SFO', 'SF', 'San-Francisco'].map(T), @@ -108,7 +105,6 @@ describe('list entity extractor', () => { entityName: 'state', fuzzyTolerance: FuzzyTolerance.Medium, id: 'custom.list.state', - languageCode: 'en', mappingsTokens: { NewYork: ['New York'].map(T) }, @@ -119,7 +115,6 @@ describe('list entity extractor', () => { entityName: 'city', fuzzyTolerance: FuzzyTolerance.Medium, id: 'custom.list.city', - languageCode: 'en', mappingsTokens: { NewYork: ['New York'].map(T) }, @@ -137,7 +132,7 @@ describe('list entity extractor', () => { // assert expect(results.length).toEqual(3) - for (let result of results) { + for (const result of results) { const { entityId } = result.metadata test(`Expect ${result.value} to be one of ${expectedIds.join(' ')}`, () => { expectedIds.includes(entityId) diff --git a/packages/nlu-engine/src/engine/entities/microsoft-extractor/index.ts b/packages/nlu-engine/src/engine/entities/microsoft-extractor/index.ts index f2588866..d108fe37 100644 --- a/packages/nlu-engine/src/engine/entities/microsoft-extractor/index.ts +++ b/packages/nlu-engine/src/engine/entities/microsoft-extractor/index.ts @@ -2,7 +2,7 @@ import _ from 'lodash' import { Logger } from '../../../typings' import { EntityExtractionResult, SystemEntityExtractor, KeyedItem } from '../../typings' -import { SystemEntityCacheManager } from '../entity-cache-manager' +import { SystemEntityCacheManager } from '../entity-cache' import { GlobalRecognizers, LanguageDependantRecognizers, @@ -20,7 +20,7 @@ import { MicrosoftValue, MicrosoftEntity } from './typings' -interface MicrosoftParams { +type MicrosoftParams = { lang: MicrosoftSupportedLanguage recognizers: any[] } diff --git a/packages/nlu-engine/src/engine/entities/microsoft-extractor/microsoft-extractor.test.ts b/packages/nlu-engine/src/engine/entities/microsoft-extractor/microsoft-extractor.test.ts deleted file mode 100644 index 9cd4073c..00000000 --- a/packages/nlu-engine/src/engine/entities/microsoft-extractor/microsoft-extractor.test.ts +++ /dev/null @@ -1,344 +0,0 @@ -import { unlinkSync } from 'fs' -import _ from 'lodash' -import path from 'path' -import { MicrosoftEntityExtractor } from '.' -import { SystemEntityCacheManager } from '../entity-cache-manager' -import { createSpyObject, MockObject } from '../../../utils/mock-extra' -import { Logger } from '../../../typings' -import { KeyedItem } from '../../typings' - -const dummyProgess = (p: number) => {} - -describe('Microsoft Extract Multiple', () => { - let microsoft: MicrosoftEntityExtractor - let testCachePath = path.join(' ', 'cache', 'testCache.json') - beforeAll(() => { - const microsoftCache = new SystemEntityCacheManager(testCachePath, false) - microsoft = new MicrosoftEntityExtractor(microsoftCache) - }) - - beforeEach(async () => { - microsoft.resetCache() - }) - - afterAll(() => { - unlinkSync(testCachePath) - }) - - test('Return nothing for unsupported lang', async () => { - const examples = ['один два три четыре пять', 'Я говорю по русски сегодня, но и завтра вечером'] - const res = await microsoft.extractMultiple(examples, 'ru', dummyProgess) - res.forEach((r) => { - expect(r).toEqual([]) - }) - }) - - // Note we could add numbers as global recognizers as well. - test('Return phone number, ip address, mention, hashtag, email, url for unsupported lang', async () => { - const expected = [ - [ - { - confidence: 1, - type: 'phoneNumber', - value: '+33 6 66 66 66 66', - start: 19, - end: 36, - metadata: { - source: '+33 6 66 66 66 66', - entityId: 'system.phoneNumber', - extractor: 'system', - unit: 'phonenumber' - } - } - ], - [ - { - confidence: 1, - type: 'ip', - value: '135.19.84.102', - start: 19, - end: 32, - metadata: { - source: '135.19.84.102', - entityId: 'system.ip', - extractor: 'system', - unit: 'ip' - } - } - ], - [ - { - confidence: 1, - type: 'mention', - value: '@pedro', - start: 8, - end: 14, - metadata: { - source: '@pedro', - entityId: 'system.mention', - extractor: 'system', - unit: 'mention' - } - } - ], - [ - { - confidence: 1, - type: 'hashtag', - value: '#sport', - start: 24, - end: 30, - metadata: { - source: '#sport', - entityId: 'system.hashtag', - extractor: 'system', - unit: 'hashtag' - } - } - ], - [ - { - confidence: 1, - type: 'email', - value: 'hello@helloworld.com', - start: 25, - end: 45, - metadata: { - source: 'hello@helloworld.com', - entityId: 'system.email', - extractor: 'system', - unit: 'email' - } - } - ], - [ - { - confidence: 1, - type: 'url', - value: 'www.thecuteboys.com', - start: 9, - end: 28, - metadata: { - source: 'www.thecuteboys.com', - entityId: 'system.url', - extractor: 'system', - unit: 'url' - } - } - ] - ] - - const examples = [ - 'мой номер телефона +33 6 66 66 66 66', - 'Можете попробовать 135.19.84.102, пожалуйста?', - 'Привет, @pedro, можешь помочь?', - 'Вы смотрели новый канал #sport? Это потрясающе', - 'пн электронная почта est hello@helloworld.com', - 'Мой сайт www.thecuteboys.com, пожалуйста, напишите отзыв' - ] - const res = await microsoft.extractMultiple(examples, 'ru', dummyProgess) - - for (const [prem, hyp] of _.zip(res, expected)) { - expect(prem).toHaveLength(1) - expect(prem![0].type).toEqual(hyp![0].type) - expect(prem![0].value).toEqual(hyp![0].value) - } - }) - - test('returns as many results as n examples', async () => { - const examples = [ - "Today it's my birthday. I'm one hundred percent happy !", - 'I have one little girl born the 4 december 2020. She was 6 pounds', - 'You can reach me at pierre.snell@botpress.com or @ierezell on github #botpress4ever', - "I wish I would have one million dollars before I'm 30 years old", - 'Nothing none null undefined', - 'Is it five degrees outside ?', - 'Oh yes please' - ] - const res = await microsoft.extractMultiple(examples, 'en', dummyProgess) - expect(res.length).toEqual(examples.length) - }) - - test('good results for each examples', async () => { - const examples = ['one two three', 'nothing', 'the 3rd of december 2022 at 4:56pm and then'] - // expected 0123456789012____0123456____012345678901 - // with JoinChar one two three::_::nothing::_::now and then - // 012345678901234567890123456789012345678901 - const result = [ - [ - { - confidence: 1, - type: 'number', - value: '1', - start: 0, - end: 3, - metadata: { - source: 'one', - entityId: 'system.number', - extractor: 'system', - unit: 'number' - } - }, - { - confidence: 1, - type: 'number', - value: '2', - start: 4, - end: 7, - metadata: { - source: 'two', - entityId: 'system.number', - extractor: 'system', - unit: 'number' - } - }, - { - confidence: 1, - type: 'number', - value: '3', - start: 8, - end: 13, - metadata: { - source: 'three', - entityId: 'system.number', - extractor: 'system', - unit: 'number' - } - } - ], - [], - [ - { - confidence: 1, - type: 'ordinal', - value: '3', - start: 4, - end: 7, - metadata: { - source: '3rd', - entityId: 'system.ordinal', - extractor: 'system', - unit: 'ordinal' - } - }, - { - confidence: 1, - type: 'number', - value: '2022', - start: 20, - end: 24, - metadata: { - source: '2022', - entityId: 'system.number', - extractor: 'system', - unit: 'number' - } - }, - { - confidence: 1, - type: 'number', - value: '4', - start: 28, - end: 29, - metadata: { - source: '4', - entityId: 'system.number', - extractor: 'system', - unit: 'number' - } - }, - { - confidence: 1, - type: 'time', - value: '2022-12-03 16:56:00', - start: 0, - end: 34, - metadata: { - source: 'the 3rd of december 2022 at 4:56pm', - entityId: 'system.time', - extractor: 'system', - unit: 'datetime' - } - } - ] - ] - - const res = await microsoft.extractMultiple(examples, 'en', dummyProgess) - - expect(res).toEqual(result) - }) -}) - -describe('Microsft Entity extractor cache usage', () => { - let mem: KeyedItem[] - const fakeSplitCache = (inputs: string[], useCache: boolean) => { - const toItem = (input: string, idx: number): KeyedItem => ({ idx, input, entities: [] }) - - if (!useCache) { - return [[], inputs.map(toItem)] - } - - const knownInputs = mem.map((i) => i.input) - const newInputs = inputs.filter((i) => !knownInputs.includes(i)) - const newItems: KeyedItem[] = newInputs.map(toItem) - const ret: KeyedItem[][] = [[...mem], [...newItems]] - mem.push(...newItems) - return ret - } - - let recognizeDateTimeSpy: any - let cache: MockObject - let extractor: MicrosoftEntityExtractor - - beforeEach(async () => { - mem = [] - - const recognizers = jest.requireActual('@microsoft/recognizers-text-suite') - jest.doMock('@microsoft/recognizers-text-suite', () => { - recognizeDateTimeSpy = jest.spyOn(recognizers, 'recognizeDateTime') - return { - ...recognizers - } - }) - - cache = createSpyObject() - cache.splitCacheHitFromCacheMiss.mockImplementation(fakeSplitCache) - - const stubLogger = createSpyObject() - - const extractorModule = require('.') - extractor = new extractorModule.MicrosoftEntityExtractor(cache.T, stubLogger.T) as MicrosoftEntityExtractor - await extractor.configure() - }) - - test('when extracting multiple time with the same value and cache enabled, extractor uses cache', async () => { - // arrange - const utt = 'a really really really small mice at 4 pm stole my grandma' - - // act - await extractor.extract(utt, 'en', true) - await extractor.extract(utt, 'en', true) - await extractor.extract(utt, 'en', true) - await extractor.extract(utt, 'en', true) - - // assert - expect(recognizeDateTimeSpy).toHaveBeenCalledTimes(1) - expect(cache.splitCacheHitFromCacheMiss).toHaveBeenCalledTimes(4) - expect(cache.cacheBatchResults).toHaveBeenCalledTimes(1) - }) - - test('when extracting multiple time with the same value and cache disabled, extractor doesnt use cache', async () => { - // arrange - const utt = 'a really really really big boston Bruins player ate 5 gallons of of cookie at 4am' - - // act - await extractor.extract(utt, 'en', false) - await extractor.extract(utt, 'en', false) - await extractor.extract(utt, 'en', false) - await extractor.extract(utt, 'en', false) - - // assert - expect(recognizeDateTimeSpy).toHaveBeenCalledTimes(4) - expect(cache.splitCacheHitFromCacheMiss).toHaveBeenCalledTimes(4) - }) -}) diff --git a/packages/nlu-engine/src/engine/entities/microsoft-extractor/typings.ts b/packages/nlu-engine/src/engine/entities/microsoft-extractor/typings.ts index ec71d0e4..6453d064 100644 --- a/packages/nlu-engine/src/engine/entities/microsoft-extractor/typings.ts +++ b/packages/nlu-engine/src/engine/entities/microsoft-extractor/typings.ts @@ -1,6 +1,6 @@ import { ModelResult } from '@microsoft/recognizers-text' -export interface MicrosoftValue { +export type MicrosoftValue = { value: string unit?: string type?: string @@ -8,7 +8,7 @@ export interface MicrosoftValue { otherResults?: any[] } -export interface MicrosoftTimeValues { +export type MicrosoftTimeValues = { timex: string type: string start?: string @@ -18,18 +18,18 @@ export interface MicrosoftTimeValues { sourceEntity?: string } -export interface MicrosoftValues { +export type MicrosoftValues = { values: MicrosoftTimeValues[] } export type MicrosoftResolution = MicrosoftValue | MicrosoftValues -export interface MicrosoftEntity extends ModelResult { +export type MicrosoftEntity = { start: number end: number resolution: MicrosoftResolution text: string typeName: string -} +} & ModelResult export type MicrosoftSupportedLanguage = 'zh' | 'nl' | 'en' | 'fr' | 'de' | 'it' | 'ja' | 'pt' | 'es' diff --git a/packages/nlu-engine/src/engine/errors.ts b/packages/nlu-engine/src/engine/errors.ts new file mode 100644 index 00000000..b667ee4d --- /dev/null +++ b/packages/nlu-engine/src/engine/errors.ts @@ -0,0 +1,45 @@ +import { LangError as SerializedError, ErrorType as LangServerErrorType } from '@botpress/lang-client' + +export class TrainingCanceledError extends Error {} +export class TrainingAlreadyStartedError extends Error {} +export class TrainingExitedUnexpectedlyError extends Error { + constructor(srcWorkerId: number, info: { exitCode: number; signal: string }) { + const { exitCode, signal } = info + super(`Training worker ${srcWorkerId} exited with exit code ${exitCode} and signal ${signal}.`) + } +} + +export class LintingCanceledError extends Error {} +export class LintingAlreadyStartedError extends Error {} +export class LintingExitedUnexpectedlyError extends Error { + constructor(srcWorkerId: number, info: { exitCode: number; signal: string }) { + const { exitCode, signal } = info + super(`Linting worker ${srcWorkerId} exited with exit code ${exitCode} and signal ${signal}.`) + } +} + +export class ModelLoadingError extends Error { + constructor(component: string, innerError: Error | undefined) { + super(`${component} could not load model. Inner error is: "${innerError?.message}"`) + } +} + +export class LangServerError extends Error { + public code: number + public type: LangServerErrorType + + constructor(serializedError: SerializedError) { + super(serializedError.message) + const { code, type, stack } = serializedError + this.stack = stack + this.code = code + this.type = type + } +} + +export class DucklingServerError extends Error { + constructor(message: string, stack?: string) { + super(message) + this.stack = stack + } +} diff --git a/packages/nlu-engine/src/engine/index.ts b/packages/nlu-engine/src/engine/index.ts index 40cd4a36..1500783f 100644 --- a/packages/nlu-engine/src/engine/index.ts +++ b/packages/nlu-engine/src/engine/index.ts @@ -3,73 +3,89 @@ import bytes from 'bytes' import _ from 'lodash' import LRUCache from 'lru-cache' import ms from 'ms' -import sizeof from 'object-sizeof' -import { PredictOutput, TrainInput } from 'src/typings' - import v8 from 'v8' import { isListEntity, isPatternEntity } from '../guards' +import { DatasetIssue, IssueCode, IssueDefinition } from '../linting' import modelIdService from '../model-id-service' -import { TrainingOptions, LanguageConfig, Logger, ModelId, Model, Engine as IEngine } from '../typings' +import { + TrainingOptions, + LanguageConfig, + Logger, + ModelId, + Model, + Engine as IEngine, + PredictOutput, + TrainInput, + Specifications, + LintingOptions +} from '../typings' import { deserializeKmeans } from './clustering' -import { EntityCacheManager } from './entities/entity-cache-manager' import { initializeTools } from './initialize-tools' import { getCtxFeatures } from './intents/context-featurizer' import { OOSIntentClassifier } from './intents/oos-intent-classfier' import { SvmIntentClassifier } from './intents/svm-intent-classifier' +import { LintingProcessPool } from './linting-process-pool' +import { allIssues } from './linting/definitions' +import { lintingPipeline } from './linting/linting-pipeline' import { deserializeModel, PredictableModel, serializeModel } from './model-serializer' import { Predict, Predictors } from './predict-pipeline' -import SlotTagger from './slots/slot-tagger' +import { SlotTagger } from './slots/slot-tagger' import { isPatternValid } from './tools/patterns-utils' -import { TrainInput as TrainingPipelineInput, TrainOutput as TrainingPipelineOutput } from './training-pipeline' +import { TrainInput as TrainingPipelineInput } from './training-pipeline' import { TrainingProcessPool } from './training-process-pool' import { EntityCacheDump, ListEntity, PatternEntity, Tools } from './typings' -import { getModifiedContexts, mergeModelOutputs } from './warm-training-handler' - -interface LoadedModel { - model: PredictableModel - predictors: Predictors - entityCache: EntityCacheManager -} const DEFAULT_CACHE_SIZE = '850mb' const DEFAULT_ENGINE_OPTIONS: EngineOptions = { - cacheSize: DEFAULT_CACHE_SIZE, - legacyElection: false + cacheSize: DEFAULT_CACHE_SIZE } const DEFAULT_TRAINING_OPTIONS: TrainingOptions = { progressCallback: () => {}, - previousModel: undefined, minProgressHeartbeat: ms('10s') } -interface EngineOptions { +const DEFAULT_LINTING_OPTIONS: LintingOptions = { + progressCallback: () => {}, + minSpeed: 'slow', + minSeverity: 'warning', + runInMainProcess: false +} + +type EngineOptions = { cacheSize: string - legacyElection: boolean +} + +type ModelCacheEntry = { + predictors: Predictors + size: number } export default class Engine implements IEngine { private _tools!: Tools private _trainingWorkerQueue!: TrainingProcessPool + private _lintingWorkerQueue!: LintingProcessPool private _options: EngineOptions - private modelsById: LRUCache + private modelsById: LRUCache private _trainLogger: Logger + private _lintLogger: Logger private _predictLogger: Logger - constructor(private _logger: Logger, opt: Partial = {}) { + constructor(private version: string, private _logger: Logger, opt: Partial = {}) { this._trainLogger = _logger.sub('training') + this._lintLogger = _logger.sub('linting') this._predictLogger = _logger.sub('predict') this._options = { ...DEFAULT_ENGINE_OPTIONS, ...opt } this.modelsById = new LRUCache({ max: this._parseCacheSize(this._options.cacheSize), - length: sizeof // ignores size of functions, but let's assume it's small + length: (m) => m.size }) const debugMsg = @@ -94,26 +110,22 @@ export default class Engine implements IEngine { return Math.abs(parsedCacheSize) } - public getHealth() { - return this._tools.getHealth() - } - public getLanguages() { return this._tools.getLanguages() } - public getSpecifications() { - return this._tools.getSpecifications() + public getSpecifications(): Specifications { + const languageServer = this._tools.getLangServerSpecs() + return { + engineVersion: this.version, + languageServer + } } public async initialize(config: LanguageConfig & { assetsPath: string }): Promise { this._tools = await initializeTools(config, this._logger) - const { nluVersion, languageServer } = this._tools.getSpecifications() - if (!_.isString(nluVersion) || !this._dictionnaryIsFilled(languageServer)) { - this._logger.warning('Either the nlu version or the lang server version is not set correctly.') - } - this._trainingWorkerQueue = new TrainingProcessPool(this._trainLogger, config) + this._lintingWorkerQueue = new LintingProcessPool(this._lintLogger, config) } public hasModel(modelId: ModelId) { @@ -121,27 +133,27 @@ export default class Engine implements IEngine { return !!this.modelsById.get(stringId) } - async train(trainId: string, trainSet: TrainInput, opt: Partial = {}): Promise { + public async train(trainId: string, trainSet: TrainInput, opt: Partial = {}): Promise { const { language, seed, entities, intents } = trainSet this._trainLogger.debug(`[${trainId}] Started ${language} training`) const options = { ...DEFAULT_TRAINING_OPTIONS, ...opt } - const { previousModel: previousModelId, progressCallback, minProgressHeartbeat } = options - const previousModel = previousModelId && this.modelsById.get(modelIdService.toString(previousModelId)) - - const list_entities = entities.filter(isListEntity).map((e) => { - return { - name: e.name, - fuzzyTolerance: e.fuzzy, - sensitive: e.sensitive, - synonyms: _.chain(e.values) - .keyBy((e) => e.name) - .mapValues((e) => e.synonyms) - .value(), - cache: previousModel?.entityCache.getCache(e.name) || [] - } - }) + const { progressCallback, minProgressHeartbeat } = options + + const list_entities = entities.filter(isListEntity).map( + (e) => + { + name: e.name, + fuzzyTolerance: e.fuzzy, + sensitive: !!e.sensitive, + synonyms: _.chain(e.values) + .keyBy((e) => e.name) + .mapValues((e) => e.synonyms) + .value(), + cache: [] // TODO: bring back list entitiy caching + } + ) const pattern_entities: PatternEntity[] = entities .filter(isPatternEntity) @@ -168,18 +180,6 @@ export default class Engine implements IEngine { slot_definitions: x.slots })) - let ctxToTrain = contexts - if (previousModel) { - const previousIntents = previousModel.model.data.input.intents - const contextChangeLog = getModifiedContexts(pipelineIntents, previousIntents) - ctxToTrain = [...contextChangeLog.createdContexts, ...contextChangeLog.modifiedContexts] - } - - const debugMsg = previousModel - ? `Retraining only contexts: [${ctxToTrain}] for language: ${language}` - : `Training all contexts for language: ${language}` - this._trainLogger.debug(`[${trainId}] ${debugMsg}`) - const input: TrainingPipelineInput = { trainId, nluSeed: seed, @@ -188,13 +188,22 @@ export default class Engine implements IEngine { pattern_entities, contexts, intents: pipelineIntents, - ctxToTrain, minProgressHeartbeat } const startedAt = new Date() const output = await this._trainingWorkerQueue.startTraining(input, progressCallback) + const { + list_entities: coldEntities, + tfidf, + vocab, + kmeans, + ctx_model, + intent_model_by_ctx, + slots_model_by_intent + } = output + const modelId = modelIdService.makeId({ ...trainSet, specifications: this.getSpecifications() @@ -205,25 +214,34 @@ export default class Engine implements IEngine { startedAt, finishedAt: new Date(), data: { - input, - output + intents: pipelineIntents, + languageCode: language, + pattern_entities, + contexts, + tfidf, + vocab, + kmeans, + ctx_model, + intent_model_by_ctx, + slots_model_by_intent, + list_entities: coldEntities.map(({ cache, ...e }) => e) // rm cache to get smaller model } } - if (previousModel) { - model.data.output = mergeModelOutputs(model.data.output, previousModel.model.data.output, contexts) - } - this._trainLogger.debug(`[${trainId}] Successfully finished ${language} training`) return serializeModel(model) } - cancelTraining(trainSessionId: string): Promise { + public cancelTraining(trainSessionId: string): Promise { return this._trainingWorkerQueue.cancelTraining(trainSessionId) } - async loadModel(serialized: Model) { + public validateModel(serialized: Model): void { + deserializeModel(serialized) // try to deserialize a model to see if it throws + } + + public async loadModel(serialized: Model) { const stringId = modelIdService.toString(serialized.id) this._logger.debug(`Load model ${stringId}`) @@ -232,16 +250,10 @@ export default class Engine implements IEngine { return } + const modelSize = serialized.data.length const model = deserializeModel(serialized) - const { input, output } = model.data - - const modelCacheItem: LoadedModel = { - model, - predictors: await this._makePredictors(input, output), - entityCache: this._makeCacheManager(output) - } + const predictors = await this._makePredictors(model.data) - const modelSize = sizeof(modelCacheItem) const bytesModelSize = bytes(modelSize) this._logger.debug(`Size of model ${stringId} is ${bytesModelSize}`) @@ -252,7 +264,7 @@ export default class Engine implements IEngine { throw new Error(`${msg} (${details}). ${solution}`) } - this.modelsById.set(stringId, modelCacheItem) + this.modelsById.set(stringId, { predictors, size: modelSize }) this._logger.debug(`Model cache entries are: [${this.modelsById.keys().join(', ')}]`) const debug = this._getMemoryUsage() @@ -272,7 +284,7 @@ export default class Engine implements IEngine { ) } - unloadModel(modelId: ModelId) { + public unloadModel(modelId: ModelId) { const stringId = modelIdService.toString(modelId) this._logger.debug(`Unload model ${stringId}`) @@ -285,25 +297,56 @@ export default class Engine implements IEngine { this._logger.debug('Model unloaded with success') } - private _makeCacheManager(output: TrainingPipelineOutput) { - const cacheManager = new EntityCacheManager() - const { list_entities } = output - cacheManager.loadFromData(list_entities) - return cacheManager + public lint = async (lintingId: string, trainSet: TrainInput, opts?: Partial) => { + const options = { ...DEFAULT_LINTING_OPTIONS, ...opts } + + let lintOutput: { issues: DatasetIssue[] } + if (!options.runInMainProcess) { + lintOutput = await this._lintingWorkerQueue.startLinting( + { + lintId: lintingId, + trainSet, + minSpeed: options.minSpeed + }, + options.progressCallback + ) + } else { + const issues = await lintingPipeline(trainSet, { ...this._tools, logger: this._lintLogger }, options) + lintOutput = { issues } + } + return lintOutput + } + + public cancelLinting = async (lintingId: string) => { + return this._lintingWorkerQueue.cancelLinting(lintingId) + } + + public getIssueDetails(code: C): IssueDefinition | undefined { + return allIssues[code] as IssueDefinition | undefined } - private async _makePredictors(input: TrainingPipelineInput, output: TrainingPipelineOutput): Promise { + private async _makePredictors(modelData: PredictableModel['data']): Promise { const tools = this._tools - const { intents, languageCode, pattern_entities, contexts } = input - const { ctx_model, intent_model_by_ctx, kmeans, slots_model_by_intent, tfidf, vocab, list_entities } = output + const { + intents, + languageCode, + pattern_entities, + contexts, + list_entities, + tfidf, + vocab, + kmeans, + ctx_model, + intent_model_by_ctx, + slots_model_by_intent + } = modelData const warmKmeans = kmeans && deserializeKmeans(kmeans) const intent_classifier_per_ctx: _.Dictionary = await Bluebird.props( _.mapValues(intent_model_by_ctx, async (model) => { - const { legacyElection } = this._options - const intentClf = new OOSIntentClassifier(tools, this._predictLogger, { legacyElection }) + const intentClf = new OOSIntentClassifier(tools, this._predictLogger) await intentClf.load(model) return intentClf }) @@ -335,7 +378,7 @@ export default class Engine implements IEngine { } } - async predict(text: string, modelId: ModelId): Promise { + public async predict(text: string, modelId: ModelId): Promise { this._predictLogger.debug(`Predict for input: "${text}"`) const stringId = modelIdService.toString(modelId) @@ -344,7 +387,7 @@ export default class Engine implements IEngine { throw new Error(`model ${stringId} not loaded`) } - const language = loaded.model.id.languageCode + const language = modelId.languageCode return Predict( { language, @@ -355,26 +398,22 @@ export default class Engine implements IEngine { ) } - async detectLanguage(text: string, modelsByLang: _.Dictionary): Promise { + public async detectLanguage(text: string, modelsByLang: _.Dictionary): Promise { this._predictLogger.debug(`Detecting language for input: "${text}"`) const predictorsByLang = _.mapValues(modelsByLang, (id) => { const stringId = modelIdService.toString(id) - return this.modelsById.get(stringId)?.predictors + const entry = this.modelsById.get(stringId) + return entry?.predictors }) - if (!this._dictionnaryIsFilled(predictorsByLang)) { + if (Object.values(predictorsByLang).some(_.isUndefined)) { const missingLangs = _(predictorsByLang) .pickBy((pred) => _.isUndefined(pred)) .keys() .value() throw new Error(`No models loaded for the following languages: [${missingLangs.join(', ')}]`) } - return this._tools.identify_language(text, predictorsByLang) - } - - // TODO: this should go someplace else, but I find it very handy - private _dictionnaryIsFilled = (dictionnary: { [key: string]: T | undefined }): dictionnary is _.Dictionary => { - return !Object.values(dictionnary).some(_.isUndefined) + return this._tools.identify_language(text, predictorsByLang as _.Dictionary) } } diff --git a/packages/nlu-engine/src/engine/initialize-tools.ts b/packages/nlu-engine/src/engine/initialize-tools.ts index efbabfb9..b537f462 100644 --- a/packages/nlu-engine/src/engine/initialize-tools.ts +++ b/packages/nlu-engine/src/engine/initialize-tools.ts @@ -1,73 +1,48 @@ +import Bluebird from 'bluebird' import path from 'path' -import { Health, Specifications } from 'src/typings' +import { LangServerSpecs } from 'src/typings' import yn from 'yn' -// eslint-disable-next-line import/order -const { version: nluVersion } = require('../../package.json') - -import MLToolkit from '../ml/toolkit' +import * as MLToolkit from '../ml/toolkit' import { LanguageConfig, Logger } from '../typings' import { DucklingEntityExtractor } from './entities/duckling-extractor' -import { SystemEntityCacheManager } from './entities/entity-cache-manager' +import { DucklingClient } from './entities/duckling-extractor/duckling-client' +import { DummySystemEntityExtractor } from './entities/dummy-system-extractor' +import { SystemEntityCacheManager } from './entities/entity-cache' import { MicrosoftEntityExtractor } from './entities/microsoft-extractor' import languageIdentifier, { FastTextLanguageId } from './language/language-identifier' -import LangProvider from './language/language-provider' +import { LanguageProvider } from './language/language-provider' import { getPOSTagger, tagSentence } from './language/pos-tagger' +import { nonSpaceSeparatedLanguages } from './language/space-separated' import { getStopWordsForLang } from './language/stopWords' import SeededLodashProvider from './tools/seeded-lodash' -import { LanguageProvider, SystemEntityExtractor, Tools } from './typings' -import { nonSpaceSeparatedLanguages } from './language/space-separated' +import { SystemEntityExtractor, Tools } from './typings' const PRE_TRAINED_DIR = 'pre-trained' const STOP_WORDS_DIR = 'stop-words' const LANG_ID_MODEL = 'lid.176.ftz' -const healthGetter = (languageProvider: LanguageProvider) => (): Health => { - const { validProvidersCount, validLanguages } = languageProvider.getHealth() - return { - isEnabled: validProvidersCount! > 0 && validLanguages!.length > 0, - validProvidersCount: validProvidersCount!, - validLanguages: validLanguages! - } -} +const MICROSOFT_CACHE_FILE = 'microsoft_sys_entities.json' +const DUCKLING_CACHE_FILE = 'duckling_sys_entities.json' -const versionGetter = (languageProvider: LanguageProvider) => (): Specifications => { +const versionGetter = (languageProvider: LanguageProvider) => (): LangServerSpecs => { const { langServerInfo } = languageProvider const { dim, domain, version } = langServerInfo - return { - nluVersion, - languageServer: { - dimensions: dim, - domain, - version - } + dimensions: dim, + domain, + version } } -const initializeLanguageProvider = async ( - config: LanguageConfig, - logger: Logger, - seededLodashProvider: SeededLodashProvider -) => { - try { - const languageProvider = await LangProvider.initialize( - config.languageSources, - logger, - nluVersion, - path.join(config.cachePath, 'cache'), - seededLodashProvider - ) - const getHealth = healthGetter(languageProvider) - return { languageProvider, health: getHealth() } - } catch (e) { - if (e.failure && e.failure.code === 'ECONNREFUSED') { - const errMsg = `Language server can't be reached at address ${e.failure.address}:${e.failure.port}` - logger.error(errMsg) - throw new Error(errMsg) - } - throw e - } +const initializeLanguageProvider = async (config: LanguageConfig, logger: Logger): Promise => { + const { languageURL, languageAuthToken, cachePath } = config + const langProviderCachePath = path.join(cachePath, 'cache') + return LanguageProvider.create(logger, { + languageURL, + languageAuthToken, + cacheDir: langProviderCachePath + }) } const makeSystemEntityExtractor = async (config: LanguageConfig, logger: Logger): Promise => { @@ -78,16 +53,22 @@ const makeSystemEntityExtractor = async (config: LanguageConfig, logger: Logger) logger.warning( 'You are using Microsoft Recognizer entity extractor which is experimental. This feature can disappear at any time.' ) - const msCache = makeCacheManager('microsoft_sys_entities.json') + const msCache = makeCacheManager(MICROSOFT_CACHE_FILE) const extractor = new MicrosoftEntityExtractor(msCache, logger) await extractor.configure() return extractor } - const duckCache = makeCacheManager('duckling_sys_entities.json') - const extractor = new DucklingEntityExtractor(duckCache, logger) - await extractor.configure(config.ducklingEnabled, config.ducklingURL) - return extractor + if (config.ducklingEnabled) { + const duckCache = makeCacheManager(DUCKLING_CACHE_FILE) + const ducklingClient = new DucklingClient(config.ducklingURL) + const extractor = new DucklingEntityExtractor(duckCache, ducklingClient) + await extractor.init() + return extractor + } + + logger.warning('Duckling is disabled. No system entities available.') + return new DummySystemEntityExtractor() } const isSpaceSeparated = (lang: string) => { @@ -95,34 +76,36 @@ const isSpaceSeparated = (lang: string) => { } export async function initializeTools(config: LanguageConfig & { assetsPath: string }, logger: Logger): Promise { - const seededLodashProvider = new SeededLodashProvider() - const { languageProvider } = await initializeLanguageProvider(config, logger, seededLodashProvider) + const languageProvider = await initializeLanguageProvider(config, logger) + const fastTextLanguageIdModelPath = path.resolve(config.assetsPath, PRE_TRAINED_DIR, LANG_ID_MODEL) const fastTextLanguageId = new FastTextLanguageId(MLToolkit) - await fastTextLanguageId.initializeModel(path.resolve(config.assetsPath, PRE_TRAINED_DIR, LANG_ID_MODEL)) - const languageId = languageIdentifier(fastTextLanguageId) + await fastTextLanguageId.initializeModel(fastTextLanguageIdModelPath) + + const stopWordsDirPath = path.resolve(config.assetsPath, STOP_WORDS_DIR) + + const posModelDirPath = path.resolve(config.assetsPath, PRE_TRAINED_DIR) return { - identify_language: languageId, + identify_language: languageIdentifier(fastTextLanguageId), - partOfSpeechUtterances: async (tokenUtterances: string[][], lang: string) => { - const tagger = await getPOSTagger(path.resolve(config.assetsPath, PRE_TRAINED_DIR), lang, MLToolkit) - return tokenUtterances.map((u) => tagSentence(tagger, u)) + pos_utterances: async (tokenUtterances: string[][], lang: string) => { + const tagger = await getPOSTagger(posModelDirPath, lang, MLToolkit, logger) + return Bluebird.map(tokenUtterances, (u) => tagSentence(tagger, u)) }, + tokenize_utterances: (utterances: string[], lang: string, vocab?: string[]) => languageProvider.tokenize(utterances, lang, vocab), vectorize_tokens: async (tokens, lang) => { const a = await languageProvider.vectorize(tokens, lang) return a.map((x) => Array.from(x.values())) }, - generateSimilarJunkWords: (vocab: string[], lang: string) => languageProvider.generateSimilarJunkWords(vocab, lang), - getStopWordsForLang: getStopWordsForLang(path.resolve(config.assetsPath, STOP_WORDS_DIR)), + getStopWordsForLang: getStopWordsForLang(stopWordsDirPath), isSpaceSeparated, - getHealth: healthGetter(languageProvider), getLanguages: () => languageProvider.languages, - getSpecifications: versionGetter(languageProvider), - seededLodashProvider, + getLangServerSpecs: versionGetter(languageProvider), + seededLodashProvider: new SeededLodashProvider(), mlToolkit: MLToolkit, systemEntityExtractor: await makeSystemEntityExtractor(config, logger) } diff --git a/packages/nlu-engine/src/engine/intents/exact-intent-classifier.test.ts b/packages/nlu-engine/src/engine/intents/exact-intent-classifier.test.ts index e76fca01..a5ff9149 100644 --- a/packages/nlu-engine/src/engine/intents/exact-intent-classifier.test.ts +++ b/packages/nlu-engine/src/engine/intents/exact-intent-classifier.test.ts @@ -1,11 +1,11 @@ import _ from 'lodash' +import { ModelLoadingError } from '../errors' import { makeTestUtterance } from '../test-utils/fake-utterance' import { Intent } from '../typings' import Utterance from '../utterance/utterance' import { ExactIntenClassifier } from './exact-intent-classifier' -import { ModelLoadingError } from '../../errors' const u1 = 'Hi my name is Alex W and I try to make NLU for a living' const u2 = "Hi I'm Justine and I'm a smart bot with very scoped skills" @@ -37,7 +37,7 @@ const isOneHot = (x: number[]) => { describe('Exact match intent classifier', () => { test('when no match clf returns all confidence 0 and oos 1', async () => { let exactMatchIntentClf = new ExactIntenClassifier() - await exactMatchIntentClf.train( + const model = await exactMatchIntentClf.train( { intents, languageCode: 'en', @@ -47,8 +47,6 @@ describe('Exact match intent classifier', () => { }, dummyProgress ) - - const model = exactMatchIntentClf.serialize() exactMatchIntentClf = new ExactIntenClassifier() await exactMatchIntentClf.load(model) @@ -60,7 +58,7 @@ describe('Exact match intent classifier', () => { test('when match clf returns one hot vector', async () => { let exactMatchIntentClf = new ExactIntenClassifier() - await exactMatchIntentClf.train( + const model = await exactMatchIntentClf.train( { intents, languageCode: 'en', @@ -70,8 +68,6 @@ describe('Exact match intent classifier', () => { }, dummyProgress ) - - const model = exactMatchIntentClf.serialize() exactMatchIntentClf = new ExactIntenClassifier() await exactMatchIntentClf.load(model) @@ -93,7 +89,7 @@ describe('Exact match intent classifier', () => { // This test is dependant of utterance.toString() implementation. Ideally we would mock the utterance class. test('clf matches even when casing or special characters', async () => { let exactMatchIntentClf = new ExactIntenClassifier() - await exactMatchIntentClf.train( + const model = await exactMatchIntentClf.train( { intents, languageCode: 'en', @@ -103,8 +99,6 @@ describe('Exact match intent classifier', () => { }, dummyProgress ) - - const model = exactMatchIntentClf.serialize() exactMatchIntentClf = new ExactIntenClassifier() await exactMatchIntentClf.load(model) @@ -126,32 +120,4 @@ describe('Exact match intent classifier', () => { expect(isOneHot(confs)).toBe(true) } }) - - test('When model is corrupted, loading a model throws', async () => { - // arrange - const exactMatchIntentClf = new ExactIntenClassifier() - await exactMatchIntentClf.train( - { - intents, - languageCode: 'en', - list_entities: [], - pattern_entities: [], - nluSeed: 42 - }, - dummyProgress - ) - const model = exactMatchIntentClf.serialize() - - // act && asert - await expect(exactMatchIntentClf.load(`${model} heyhey I will kill this model`)).rejects.toThrowError( - ModelLoadingError - ) - - const parsed = JSON.parse(model) - parsed['someKey'] = 'someValue' - await expect(exactMatchIntentClf.load(JSON.stringify(parsed))).rejects.toThrowError(ModelLoadingError) - - const undef: unknown = undefined - await expect(exactMatchIntentClf.load(undef as string)).rejects.toThrowError(ModelLoadingError) - }) }) diff --git a/packages/nlu-engine/src/engine/intents/exact-intent-classifier.ts b/packages/nlu-engine/src/engine/intents/exact-intent-classifier.ts index 32074737..64060079 100644 --- a/packages/nlu-engine/src/engine/intents/exact-intent-classifier.ts +++ b/packages/nlu-engine/src/engine/intents/exact-intent-classifier.ts @@ -1,51 +1,66 @@ -import Joi, { validate } from 'joi' +import * as ptb from '@bpinternal/ptb-schema' import _ from 'lodash' -import { ModelLoadingError } from '../../errors' +import { ModelLoadingError } from '../errors' import { Intent } from '../typings' import Utterance, { UtteranceToStringOptions } from '../utterance/utterance' import { IntentTrainInput, NoneableIntentClassifier, NoneableIntentPredictions } from './intent-classifier' -export interface Model { +type Model = { intents: string[] exact_match_index: ExactMatchIndex } +type Predictors = Model + type ExactMatchIndex = _.Dictionary<{ intent: string }> +const PTBExactIndexValue = new ptb.PTBMessage('ExactIndexValue', { + intent: { type: 'string', id: 1, rule: 'required' } +}) + +const PTBExactIntentModel = new ptb.PTBMessage('ExactIntentModel', { + intents: { type: 'string', id: 1, rule: 'repeated' }, + exact_match_index: { keyType: 'string', type: PTBExactIndexValue, id: 2, rule: 'map' } +}) + const EXACT_MATCH_STR_OPTIONS: UtteranceToStringOptions = { lowerCase: true, onlyWords: true, strategy: 'replace-entity-name' } -const schemaKeys: Record = { - intents: Joi.array().items(Joi.string()).required(), - exact_match_index: Joi.object() - .pattern(/^/, Joi.object().keys({ intent: Joi.string() })) - .required() -} -export const modelSchema = Joi.object().keys(schemaKeys).required() - -export class ExactIntenClassifier implements NoneableIntentClassifier { +export class ExactIntenClassifier implements NoneableIntentClassifier { private static _displayName = 'Exact Intent Classifier' private static _name = 'exact-matcher' - private model: Model | undefined + private predictors: Predictors | undefined - get name() { + public get name() { return ExactIntenClassifier._name } - async train(trainInput: IntentTrainInput, progress: (p: number) => void) { + public static get modelType() { + return PTBExactIntentModel + } + + public get modelType() { + return PTBExactIntentModel + } + + public async train( + trainInput: IntentTrainInput, + progress: (p: number) => void + ): Promise> { const { intents } = trainInput const exact_match_index = this._buildExactMatchIndex(intents) - this.model = { + progress(1) + + return { intents: intents.map((i) => i.name), exact_match_index } - progress(1) } private _buildExactMatchIndex = (intents: Intent[]): ExactMatchIndex => { @@ -65,29 +80,26 @@ export class ExactIntenClassifier implements NoneableIntentClassifier { .value() } - serialize() { - if (!this.model) { - throw new Error(`${ExactIntenClassifier._displayName} must be trained before calling serialize.`) - } - return JSON.stringify(this.model) - } - - async load(serialized: string) { + public async load(serialized: ptb.Infer) { try { - const raw = JSON.parse(serialized) - const model: Model = await validate(raw, modelSchema) - this.model = model - } catch (err) { + const { intents, exact_match_index } = serialized + const model: Model = { + intents: intents ?? [], + exact_match_index + } + this.predictors = model + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) throw new ModelLoadingError(ExactIntenClassifier._displayName, err) } } - async predict(utterance: Utterance): Promise { - if (!this.model) { - throw new Error(`${ExactIntenClassifier._displayName} must be trained before calling predict.`) + public async predict(utterance: Utterance): Promise { + if (!this.predictors) { + throw new Error(`${ExactIntenClassifier._displayName} must load model before calling predict.`) } - const { exact_match_index, intents: intentNames } = this.model + const { exact_match_index, intents: intentNames } = this.predictors const exactPred = this._findExactIntent(exact_match_index, utterance) diff --git a/packages/nlu-engine/src/engine/intents/intent-classifier.d.ts b/packages/nlu-engine/src/engine/intents/intent-classifier.d.ts deleted file mode 100644 index fe154ab5..00000000 --- a/packages/nlu-engine/src/engine/intents/intent-classifier.d.ts +++ /dev/null @@ -1,32 +0,0 @@ -import { Intent, ListEntityModel, PatternEntity } from '../typings' -import Utterance from '../utterance/utterance' - -export interface IntentTrainInput { - languageCode: string - list_entities: ListEntityModel[] - pattern_entities: PatternEntity[] - intents: Intent[] - nluSeed: number -} - -export interface IntentPrediction { - name: string - confidence: number - extractor: string -} -export interface IntentPredictions { - intents: IntentPrediction[] -} -export interface NoneableIntentPredictions extends IntentPredictions { - oos: number -} - -export interface IntentClassifier { - train(trainInput: IntentTrainInput, progress: (p: number) => void): Promise - serialize(): string - load(model: string): Promise - predict(utterance: Utterance): Promise -} -export interface NoneableIntentClassifier { - predict(utterance: Utterance): Promise -} diff --git a/packages/nlu-engine/src/engine/intents/intent-classifier.ts b/packages/nlu-engine/src/engine/intents/intent-classifier.ts new file mode 100644 index 00000000..36136b2e --- /dev/null +++ b/packages/nlu-engine/src/engine/intents/intent-classifier.ts @@ -0,0 +1,43 @@ +import * as ptb from '@bpinternal/ptb-schema' +import { PipelineComponent } from 'src/component' +import { Intent, ListEntityModel, PatternEntity } from '../typings' +import Utterance from '../utterance/utterance' + +export type IntentTrainInput = { + languageCode: string + list_entities: ListEntityModel[] + pattern_entities: PatternEntity[] + intents: Intent[] + nluSeed: number +} + +export type IntentPrediction = { + name: string + confidence: number + extractor: string +} +export type IntentPredictions = { + intents: IntentPrediction[] +} + +export type NoneableIntentTrainInput = { + allUtterances: Utterance[] +} & IntentTrainInput + +export type NoneableIntentPredictions = { + oos: number +} & IntentPredictions + +export type IntentClassifier> = PipelineComponent< + IntentTrainInput, + Model, + Utterance, + IntentPredictions +> + +export type NoneableIntentClassifier> = PipelineComponent< + NoneableIntentTrainInput, + Model, + Utterance, + NoneableIntentPredictions +> diff --git a/packages/nlu-engine/src/engine/intents/intent-vocab.test.ts b/packages/nlu-engine/src/engine/intents/intent-vocab.test.ts index 62169eec..50549a44 100644 --- a/packages/nlu-engine/src/engine/intents/intent-vocab.test.ts +++ b/packages/nlu-engine/src/engine/intents/intent-vocab.test.ts @@ -17,7 +17,6 @@ const LIST_ENTITIES: ListEntityModel[] = [ ['air', 'can'] ] }, - languageCode: 'en', sensitive: false } ] diff --git a/packages/nlu-engine/src/engine/intents/intent-vocab.ts b/packages/nlu-engine/src/engine/intents/intent-vocab.ts index 952e8a0e..c4b1d3c7 100644 --- a/packages/nlu-engine/src/engine/intents/intent-vocab.ts +++ b/packages/nlu-engine/src/engine/intents/intent-vocab.ts @@ -1,9 +1,10 @@ import _ from 'lodash' +import { SLOT_ANY } from '../../constants' import { SPACE } from '../tools/token-utils' import { Intent, ListEntityModel } from '../typings' import Utterance from '../utterance/utterance' -interface IntentVocab { +type IntentVocab = { name: string vocab: string[] slot_entities: string[] @@ -26,7 +27,7 @@ export const buildIntentVocab = (utterances: Utterance[], intentEntities: ListEn export const getEntitiesAndVocabOfIntent = (intent: Intent, entities: ListEntityModel[]): IntentVocab => { const allowedEntities = _.chain(intent.slot_definitions) .flatMap((s) => s.entities) - .filter((e) => e !== 'any') + .filter((e) => e !== SLOT_ANY) .uniq() .value() as string[] diff --git a/packages/nlu-engine/src/engine/intents/model-validation.test.ts b/packages/nlu-engine/src/engine/intents/model-validation.test.ts deleted file mode 100644 index 7ffcb7ed..00000000 --- a/packages/nlu-engine/src/engine/intents/model-validation.test.ts +++ /dev/null @@ -1,269 +0,0 @@ -import Bluebird from 'bluebird' -import Joi, { validate } from 'joi' -import { modelSchema as exactMatchModelSchema, Model as ExactMatchModel } from './exact-intent-classifier' -import { modelSchema as oosModelSchema, Model as OOSModel } from './oos-intent-classfier' -import { modelSchema as svmModelSchema, Model as SVMModel } from './svm-intent-classifier' - -import { modelSchema as slotModelSchema, Model as SlotModel } from '../slots/slot-tagger' - -const expectValidates = async (model: any, schema: Joi.ObjectSchema) => { - await expect(validate(model, schema)).resolves.not.toThrow() -} - -const expectThrows = async (model: any, schema: Joi.ObjectSchema) => { - await expect(validate(model, schema)).rejects.toThrow() -} - -test('exact-match intent clf model validation', async () => { - const shouldPass: ExactMatchModel[] = [ - { - intents: [], - exact_match_index: {} - }, - { - intents: ['some_name', 'another_name'], - exact_match_index: { - aaaaaaaaa: { intent: 'some_name' }, - bbbbbbbbb: { intent: 'some_name' }, - ccccccccc: { intent: 'another_name' } - } - } - ] - - const shouldFail = [ - undefined, - null, - {}, - { - intents: [] - }, - { - exact_match_index: {} - }, - { - intents: undefined, - exact_match_index: {} - }, - { - intents: [], - exactMatchIndex: {} - }, - { - intents: [], - exact_match_index: {}, - exactMatchIndex: {} - }, - { - intents: [], - exact_match_index: { - aaaaaaaaa: { intent: '' } // empty intent name - } - } - ] - - await Bluebird.map(shouldPass, (m) => expectValidates(m, exactMatchModelSchema)) - await Bluebird.map(shouldFail, (m) => expectThrows(m, exactMatchModelSchema)) -}) - -test('oos intent clf model validation', async () => { - const shouldPass: OOSModel[] = [ - { - baseIntentClfModel: '', - exactMatchModel: '', - oosSvmModel: '', - trainingVocab: [] - }, - { - baseIntentClfModel: '', - exactMatchModel: '', - oosSvmModel: undefined, - trainingVocab: [] - } - ] - - const shouldFail = [ - undefined, - null, - {}, - { - baseIntentClfModel: undefined, - exactMatchModel: '', - oosSvmModel: '', - trainingVocab: [] - }, - { - baseIntentClfModel: '', - exactMatchModel: undefined, - oosSvmModel: undefined, - trainingVocab: [] - }, - { - baseIntentClfModel: '', - exactMatchModel: '', - oosSvmModel: undefined, - trainingVocab: undefined - }, - { - baseIntentClfModel: '', - exactMatchModel: '', - oosSvmModel: undefined, - trainingVocab: undefined - }, - { - baseIntentClfModel: '', - exactMatchModel: '', - oosSvmModel: undefined - // missing key - } - ] - await Bluebird.map(shouldPass, (m) => expectValidates(m, oosModelSchema)) - await Bluebird.map(shouldFail, (m) => expectThrows(m, oosModelSchema)) -}) - -test('svm intent clf model validation', async () => { - const shouldPass: SVMModel[] = [ - { - svmModel: '', - intentNames: [], - entitiesName: [] - }, - { - svmModel: undefined, - intentNames: [], - entitiesName: [] - } - ] - - const shouldFail = [ - undefined, - null, - {}, - { - svmModel: '', - intentNames: undefined, - list_entities: [], - pattern_entities: [] - }, - { - svmModel: undefined, - intentNames: [], - list_entities: undefined, - pattern_entities: [] - }, - { - svmModel: '', - intentNames: [], - list_entities: [], - pattern_entities: undefined - }, - { - svmModel: undefined, - intentNames: [], - list_entities: [], - pattern_entities: [], - someExtraKey: 42 - }, - { - svmModel: undefined, - intentNames: [], - list_entities: [] - // missing key - } - ] - - await Bluebird.map(shouldPass, (m) => expectValidates(m, svmModelSchema)) - await Bluebird.map(shouldFail, (m) => expectThrows(m, svmModelSchema)) -}) - -test('slot tagger model validation', async () => { - const shouldPass: SlotModel[] = [ - { - crfModel: Buffer.from(''), - intentFeatures: { - name: 'someIntent', - slot_entities: [], - vocab: [] - }, - slot_definitions: [] - }, - { - crfModel: undefined, - intentFeatures: { - name: 'someIntent', - slot_entities: [], - vocab: [] - }, - slot_definitions: [] - }, - { - crfModel: undefined, - intentFeatures: { - name: 'someIntent', - slot_entities: ['entity'], - vocab: [''] - }, - slot_definitions: [{ name: 'some-name', entities: ['entity'] }] - }, - { - crfModel: undefined, - intentFeatures: { - name: 'someIntent', - slot_entities: ['entity'], - vocab: [''] - }, - slot_definitions: [{ name: 'some-name', entities: ['entity'] }] - } - ] - - const shouldFail = [ - undefined, - null, - {}, - { - crfModel: undefined, - intentFeatures: { - name: 'someIntent', - slot_entities: [] - // missing key - }, - slot_definitions: [] - }, - { - crfModel: undefined, - intentFeatures: {}, - slot_definitions: [] - }, - { - crfModel: undefined, - intentFeatures: { - name: 'someIntent', - slot_entities: ['entity'], - vocab: [''] - }, - slot_definitions: undefined - }, - { - crfModel: undefined, - intentFeatures: { - name: 'someIntent', - slot_entities: [], - vocab: [] - }, - slot_definitions: [], - someExtraKey: 42 - }, - { - crfModel: undefined, - intentFeatures: { - name: 'someIntent', - slot_entities: [], - vocab: [] - }, - slot_definitions: [undefined], - someExtraKey: 42 - } - ] - - await Bluebird.map(shouldPass, (m) => expectValidates(m, slotModelSchema)) - await Bluebird.map(shouldFail, (m) => expectThrows(m, slotModelSchema)) -}) diff --git a/packages/nlu-engine/src/engine/intents/oos-intent-classfier.ts b/packages/nlu-engine/src/engine/intents/oos-intent-classfier.ts index 93bf8e47..d3a907a7 100644 --- a/packages/nlu-engine/src/engine/intents/oos-intent-classfier.ts +++ b/packages/nlu-engine/src/engine/intents/oos-intent-classfier.ts @@ -1,47 +1,43 @@ -import Joi, { validate } from 'joi' +import * as ptb from '@bpinternal/ptb-schema' import _ from 'lodash' -import { ModelLoadingError } from '../../errors' -import { MLToolkit } from '../../ml/typings' +import { ModelOf } from 'src/component' +import * as MLToolkit from '../../ml/toolkit' import { Logger } from '../../typings' +import { ModelLoadingError } from '../errors' import { isPOSAvailable } from '../language/pos-tagger' +import { vocabNGram } from '../tools/strings' import { SMALL_TFIDF } from '../tools/tfidf' import { SPACE } from '../tools/token-utils' import { Intent, Tools } from '../typings' import Utterance, { buildUtteranceBatch } from '../utterance/utterance' import { ExactIntenClassifier } from './exact-intent-classifier' -import { - IntentTrainInput, - NoneableIntentClassifier, - NoneableIntentPredictions, - IntentPredictions -} from './intent-classifier' +import { NoneableIntentClassifier, NoneableIntentPredictions, NoneableIntentTrainInput } from './intent-classifier' import { getIntentFeatures } from './intent-featurizer' import { featurizeInScopeUtterances, featurizeOOSUtterances, getUtteranceFeatures } from './out-of-scope-featurizer' import { SvmIntentClassifier } from './svm-intent-classifier' -interface TrainInput extends IntentTrainInput { - allUtterances: Utterance[] -} - -interface Options { - legacyElection: boolean -} +const JUNK_VOCAB_SIZE = 500 +const JUNK_TOKEN_MIN = 1 +const JUNK_TOKEN_MAX = 20 -const DEFAULT_OPTIONS: Options = { - legacyElection: false -} - -export interface Model { +type Model = { trainingVocab: string[] - baseIntentClfModel: string - oosSvmModel: string | undefined - exactMatchModel: string + baseIntentClfModel: ModelOf + oosSvmModel: ModelOf | undefined + exactMatchModel: ModelOf } -interface Predictors { +const PTBOOSIntentModel = new ptb.PTBMessage('OOSIntentModel', { + trainingVocab: { type: 'string', id: 1, rule: 'repeated' }, + baseIntentClfModel: { type: SvmIntentClassifier.modelType, id: 2, rule: 'required' }, + oosSvmModel: { type: MLToolkit.SVM.Classifier.modelType, id: 3, rule: 'optional' }, + exactMatchModel: { type: ExactIntenClassifier.modelType, id: 4, rule: 'required' } +}) + +type Predictors = { baseIntentClf: SvmIntentClassifier - oosSvm: MLToolkit.SVM.Predictor | undefined + oosSvm: MLToolkit.SVM.Classifier | undefined trainingVocab: string[] exactIntenClassifier: ExactIntenClassifier } @@ -53,15 +49,6 @@ const NONE_UTTERANCES_BOUNDS = { MAX: 200 } -export const modelSchema = Joi.object() - .keys({ - trainingVocab: Joi.array().items(Joi.string().allow('')).required(), - baseIntentClfModel: Joi.string().allow('').required(), - oosSvmModel: Joi.string().allow('').optional(), - exactMatchModel: Joi.string().allow('').required() - }) - .required() - /** * @description Intent classfier composed of 3 smaller components: * 1 - an SVM intent classifier @@ -70,24 +57,29 @@ export const modelSchema = Joi.object() * * @returns A confidence level for all possible labels including none */ -export class OOSIntentClassifier implements NoneableIntentClassifier { +export class OOSIntentClassifier implements NoneableIntentClassifier { private static _displayName = 'OOS Intent Classifier' private static _name = 'classifier' - - private model: Model | undefined private predictors: Predictors | undefined - private _options: Options + constructor(private tools: Tools, private _logger: Logger) {} - constructor(private tools: Tools, private _logger: Logger, opt: Partial = {}) { - this._options = { ...DEFAULT_OPTIONS, ...opt } + public get name() { + return OOSIntentClassifier._name } - get name() { - return OOSIntentClassifier._name + public static get modelType() { + return PTBOOSIntentModel + } + + public get modelType() { + return PTBOOSIntentModel } - public async train(trainInput: TrainInput, progress: (p: number) => void): Promise { + public async train( + trainInput: NoneableIntentTrainInput, + progress: (p: number) => void + ): Promise> { const { languageCode, allUtterances } = trainInput const noneIntent = await this._makeNoneIntent(allUtterances, languageCode) @@ -111,10 +103,9 @@ export class OOSIntentClassifier implements NoneableIntentClassifier { const exactIntenClassifier = new ExactIntenClassifier() const dummyProgress = () => {} - await exactIntenClassifier.train(trainInput, dummyProgress) - const exactMatchModel = exactIntenClassifier.serialize() + const exactMatchModel = await exactIntenClassifier.train(trainInput, dummyProgress) - this.model = { + return { oosSvmModel: ooScopeModel, baseIntentClfModel: inScopeModel, trainingVocab: this.getVocab(trainInput.allUtterances), @@ -132,12 +123,7 @@ export class OOSIntentClassifier implements NoneableIntentClassifier { const lo = this.tools.seededLodashProvider.getSeededLodash() - const vocabWithDupes = lo(allTokens) - .map((t) => t.value) - .flattenDeep() - .value() - - const junkWords = await this.tools.generateSimilarJunkWords(vocab, languageCode) + const junkWords = this.generateSimilarJunkWords(vocab) const avgTokens = lo.meanBy(allUtterances, (x) => x.tokens.length) const nbOfNoneUtterances = lo.clamp( (allUtterances.length * 2) / 3, @@ -175,17 +161,39 @@ export class OOSIntentClassifier implements NoneableIntentClassifier { utterances: await buildUtteranceBatch( [...mixedUtts, ...vocabUtts, ...junkWordsUtts, ...stopWords], languageCode, - this.tools + this.tools, + [] ), contexts: [] } } + private generateSimilarJunkWords(subsetVocab: string[]) { + const gramset = vocabNGram(subsetVocab) + + const realWords = _.uniq(subsetVocab) + const meanWordSize = _.meanBy(realWords, (w) => w.length) + const minJunkSize = Math.max(JUNK_TOKEN_MIN, meanWordSize / 2) // Twice as short + const maxJunkSize = Math.min(JUNK_TOKEN_MAX, meanWordSize * 1.5) // A bit longer. Those numbers are discretionary and are not expected to make a big impact on the models. + const lo = this.tools.seededLodashProvider.getSeededLodash() + + const junks = _.range(0, JUNK_VOCAB_SIZE).map(() => { + const finalSize = lo.random(minJunkSize, maxJunkSize, false) + let word = '' + while (word.length < finalSize) { + word += lo.sample(gramset) + } + return word + }) // randomly generated words + + return junks + } + private async _trainOOScopeSvm( - trainInput: TrainInput, + trainInput: NoneableIntentTrainInput, noneIntent: Omit, 'contexts'>, progress: (p: number) => void - ): Promise { + ): Promise | undefined> { const { allUtterances, nluSeed, intents, languageCode } = trainInput const trainingOptions: MLToolkit.SVM.SVMOptions = { @@ -211,17 +219,18 @@ export class OOSIntentClassifier implements NoneableIntentClassifier { .flatMap((i) => featurizeInScopeUtterances(i.utterances, i.name)) .value() - const svm = new this.tools.mlToolkit.SVM.Trainer(this._logger) + const svm = new this.tools.mlToolkit.SVM.Classifier(this._logger) - const model = await svm.train([...in_scope_points, ...oos_points], trainingOptions, progress) + const points = [...in_scope_points, ...oos_points] + const model = await svm.train({ points, options: trainingOptions }, progress) return model } private async _trainInScopeSvm( - trainInput: TrainInput, + trainInput: NoneableIntentTrainInput, noneIntent: Omit, 'contexts'>, progress: (p: number) => void - ): Promise { + ) { const baseIntentClf = new SvmIntentClassifier(this.tools, getIntentFeatures, this._logger) const noneUtts = noneIntent.utterances.filter((u) => u.tokens.filter((t) => t.isWord).length >= 3) @@ -246,28 +255,25 @@ export class OOSIntentClassifier implements NoneableIntentClassifier { } ] - await baseIntentClf.train({ ...trainInput, intents }, progress) - return baseIntentClf.serialize() + return baseIntentClf.train({ ...trainInput, intents }, progress) } private getVocab(utts: Utterance[]) { return _.flatMap(utts, (u) => u.tokens.map((t) => t.toString({ lowerCase: true }))) } - public serialize(): string { - if (!this.model) { - throw new Error(`${OOSIntentClassifier._displayName} must be trained before calling serialize.`) - } - return JSON.stringify(this.model) - } - - public async load(serialized: string): Promise { + public async load(serialized: ptb.Infer): Promise { try { - const raw = JSON.parse(serialized) - const model: Model = await validate(raw, modelSchema) + const { baseIntentClfModel, exactMatchModel, oosSvmModel, trainingVocab } = serialized + const model: Model = { + baseIntentClfModel, + exactMatchModel, + oosSvmModel, + trainingVocab: trainingVocab ?? [] + } this.predictors = await this._makePredictors(model) - this.model = model - } catch (err) { + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) throw new ModelLoadingError(OOSIntentClassifier._displayName, err) } } @@ -278,14 +284,11 @@ export class OOSIntentClassifier implements NoneableIntentClassifier { const baseIntentClf = new SvmIntentClassifier(this.tools, getIntentFeatures, this._logger) await baseIntentClf.load(baseIntentClfModel) - const exactMatcher = new ExactIntenClassifier() - await exactMatcher.load(exactMatchModel) - const exactIntenClassifier = new ExactIntenClassifier() await exactIntenClassifier.load(exactMatchModel) - const oosSvm = oosSvmModel ? new this.tools.mlToolkit.SVM.Predictor(oosSvmModel) : undefined - await oosSvm?.initialize() + const svm = new this.tools.mlToolkit.SVM.Classifier(this._logger) + const oosSvm = oosSvmModel ? await this._makeSvmClf(oosSvmModel) : undefined return { oosSvm, @@ -295,13 +298,15 @@ export class OOSIntentClassifier implements NoneableIntentClassifier { } } + private async _makeSvmClf(svmModel: ModelOf): Promise { + const svm = new this.tools.mlToolkit.SVM.Classifier(this._logger) + await svm.load(svmModel) + return svm + } + public async predict(utterance: Utterance): Promise { if (!this.predictors) { - if (!this.model) { - throw new Error(`${OOSIntentClassifier._displayName} must be trained before calling predict.`) - } - - this.predictors = await this._makePredictors(this.model) + throw new Error(`${OOSIntentClassifier._displayName} must load model before calling predict.`) } const { oosSvm, baseIntentClf, trainingVocab, exactIntenClassifier } = this.predictors @@ -323,50 +328,16 @@ export class OOSIntentClassifier implements NoneableIntentClassifier { } catch (err) {} } - if (this._options.legacyElection) { - const exactMatchPredictions = { - oos: exactPredictions.oos, - intents: [ - ...exactPredictions.intents, - { name: NONE_INTENT, confidence: 0, extractor: exactIntenClassifier.name } - ] - } - return this._returnLegacy(svmPredictions, exactMatchPredictions, oosPrediction) - } - return this._returnNatural(svmPredictions, exactPredictions, oosPrediction) - } - - private _returnLegacy = ( - svmPredictions: IntentPredictions, - exactMatchPredictions: NoneableIntentPredictions, - oos: number - ) => { - // No election between none intent and oos - if (exactMatchPredictions.oos === 0) { - return exactMatchPredictions - } - - return { - intents: svmPredictions.intents, - oos - } - } - - private _returnNatural = ( - svmPredictions: IntentPredictions, - exactMatchPredictions: NoneableIntentPredictions, - oos: number - ) => { - if (exactMatchPredictions.oos === 0) { - return exactMatchPredictions + if (exactPredictions.oos === 0) { + return exactPredictions } return OOSIntentClassifier._removeNoneIntent({ intents: svmPredictions.intents, - oos + oos: oosPrediction }) } - static _removeNoneIntent(preds: NoneableIntentPredictions): NoneableIntentPredictions { + public static _removeNoneIntent(preds: NoneableIntentPredictions): NoneableIntentPredictions { const noneIdx = preds.intents.findIndex((i) => i.name === NONE_INTENT) if (noneIdx < 0) { return preds diff --git a/packages/nlu-engine/src/engine/intents/oos-intent-classifier.test.ts b/packages/nlu-engine/src/engine/intents/oos-intent-classifier.test.ts index ae7034bd..f9052d2c 100644 --- a/packages/nlu-engine/src/engine/intents/oos-intent-classifier.test.ts +++ b/packages/nlu-engine/src/engine/intents/oos-intent-classifier.test.ts @@ -59,9 +59,7 @@ test('predict with exact match returns confidence 1 for exact-match', async () = utterances: [u2, u6, u7, u8, u9] } ] - await oosIntentClassifier.train(makeTrainset(intentsDefs), dummyProgress) - - const model = oosIntentClassifier.serialize() + const model = await oosIntentClassifier.train(makeTrainset(intentsDefs), dummyProgress) oosIntentClassifier = new OOSIntentClassifier(fakeTools, dummyLogger as Logger) await oosIntentClassifier.load(model) @@ -91,9 +89,7 @@ test('predict with no exact match returns confidence that sums up to 1', async ( utterances: [u2, u6, u7, u8, u9] } ] - await oosIntentClassifier.train(makeTrainset(intentsDefs), dummyProgress) - - const model = oosIntentClassifier.serialize() + const model = await oosIntentClassifier.train(makeTrainset(intentsDefs), dummyProgress) oosIntentClassifier = new OOSIntentClassifier(fakeTools, dummyLogger as Logger) await oosIntentClassifier.load(model) @@ -124,9 +120,7 @@ test('predict with less than min utterances for ml should not match', async () = utterances: [u2] } ] - await oosIntentClassifier.train(makeTrainset(intentsDefs), dummyProgress) - - const model = oosIntentClassifier.serialize() + const model = await oosIntentClassifier.train(makeTrainset(intentsDefs), dummyProgress) oosIntentClassifier = new OOSIntentClassifier(fakeTools, dummyLogger as Logger) await oosIntentClassifier.load(model) @@ -158,9 +152,7 @@ test('predict with available oos should give oos prediction', async () => { utterances: [u2] } ] - await oosIntentClassifier.train(makeTrainset(intentsDefs), dummyProgress) - - const model = oosIntentClassifier.serialize() + const model = await oosIntentClassifier.train(makeTrainset(intentsDefs), dummyProgress) oosIntentClassifier = new OOSIntentClassifier(fakeTools, dummyLogger as Logger) await oosIntentClassifier.load(model) @@ -171,38 +163,6 @@ test('predict with available oos should give oos prediction', async () => { expect(oos).toBeGreaterThan(0) }) -test('When model is corrupted, loading a model throws', async () => { - // arrange - const oosIntentClassifier = new OOSIntentClassifier(fakeTools, dummyLogger as Logger) - - const intentsDefs = [ - { - name: 'A', - contexts: [], - slot_definitions: [], - utterances: [u1, u3, u5] - }, - { - name: 'B', - contexts: [], - slot_definitions: [], - utterances: [u2, u6, u7, u8, u9] - } - ] - await oosIntentClassifier.train(makeTrainset(intentsDefs), dummyProgress) - const model = oosIntentClassifier.serialize() - - // act & assert - await expect(oosIntentClassifier.load(`${model} good and bad are relative concepts`)).rejects.toThrowError() - - const parsed = JSON.parse(model) - parsed['someKey'] = 'someValue' - await expect(oosIntentClassifier.load(JSON.stringify(parsed))).rejects.toThrowError() - - const undef: unknown = undefined - await expect(oosIntentClassifier.load(undef as string)).rejects.toThrowError() -}) - test('Classifier always pick between exact match or svm', async () => { // arrange let oosIntentClassifier = new OOSIntentClassifier(fakeTools, dummyLogger as Logger) @@ -215,8 +175,7 @@ test('Classifier always pick between exact match or svm', async () => { utterances: ['k', 'K'].map(makeTestUtterance) // no ml } ] - await oosIntentClassifier.train(makeTrainset(intentsDefs), dummyProgress) - const model = oosIntentClassifier.serialize() + const model = await oosIntentClassifier.train(makeTrainset(intentsDefs), dummyProgress) oosIntentClassifier = new OOSIntentClassifier(fakeTools, dummyLogger as Logger) await oosIntentClassifier.load(model) diff --git a/packages/nlu-engine/src/engine/intents/out-of-scope-featurizer.ts b/packages/nlu-engine/src/engine/intents/out-of-scope-featurizer.ts index f4856fa7..1c9d9fdb 100644 --- a/packages/nlu-engine/src/engine/intents/out-of-scope-featurizer.ts +++ b/packages/nlu-engine/src/engine/intents/out-of-scope-featurizer.ts @@ -1,4 +1,4 @@ -import { MLToolkit } from '../../ml/typings' +import * as MLToolkit from '../../ml/toolkit' import { POSClass } from '../language/pos-tagger' import { averageVectors, scalarMultiply, zeroes } from '../tools/math' diff --git a/packages/nlu-engine/src/engine/intents/remove-none.test.ts b/packages/nlu-engine/src/engine/intents/remove-none.test.ts index 3693dab9..8d7a3f67 100644 --- a/packages/nlu-engine/src/engine/intents/remove-none.test.ts +++ b/packages/nlu-engine/src/engine/intents/remove-none.test.ts @@ -1,7 +1,7 @@ import _ from 'lodash' -import { OOSIntentClassifier } from './oos-intent-classfier' import { NoneableIntentPredictions } from './intent-classifier' +import { OOSIntentClassifier } from './oos-intent-classfier' test('remove none intent', () => { // arrange diff --git a/packages/nlu-engine/src/engine/intents/svm-intent-classifier.test.ts b/packages/nlu-engine/src/engine/intents/svm-intent-classifier.test.ts index 794cdd96..34f5455e 100644 --- a/packages/nlu-engine/src/engine/intents/svm-intent-classifier.test.ts +++ b/packages/nlu-engine/src/engine/intents/svm-intent-classifier.test.ts @@ -1,19 +1,18 @@ import _ from 'lodash' +import { Logger } from 'src/typings' import { makeFakeTools } from '../test-utils/fake-tools' import { makeTestUtterance } from '../test-utils/fake-utterance' +import { Intent } from '../typings' import Utterance from '../utterance/utterance' import { SvmIntentClassifier } from './svm-intent-classifier' -import { Intent } from '../typings' -import { ModelLoadingError } from '../../errors' -import { Logger } from 'src/typings' const languageDimension = 10 const languages = ['en'] const fakeTools = makeFakeTools(languageDimension, languages) const fakeFeaturizer = (utt: Utterance) => [..._.range(languageDimension)] const dummyProgress = (p: number) => {} -const dummyLogger: Partial = { debug: () => {} } +const dummyLogger: Partial = { debug: () => {}, warning: () => {} } const emptyDataset = { languageCode: 'en', @@ -51,9 +50,7 @@ const helloILoveYou = makeTestUtterance("hello, I love you won't you tell me you test('predict with no data points returns empty array', async () => { // arrange let intentClassifier = new SvmIntentClassifier(fakeTools, fakeFeaturizer, dummyLogger as Logger) - await intentClassifier.train(emptyDataset, dummyProgress) - - const model = intentClassifier.serialize() + const model = await intentClassifier.train(emptyDataset, dummyProgress) intentClassifier = new SvmIntentClassifier(fakeTools, fakeFeaturizer, dummyLogger as Logger) await intentClassifier.load(model) @@ -67,9 +64,8 @@ test('predict with no data points returns empty array', async () => { test('predict with only one class returns the only class with confidence 1', async () => { // arrange let intentClassifier = new SvmIntentClassifier(fakeTools, fakeFeaturizer, dummyLogger as Logger) - await intentClassifier.train(makeTrainset([intentA]), dummyProgress) + const model = await intentClassifier.train(makeTrainset([intentA]), dummyProgress) - const model = intentClassifier.serialize() intentClassifier = new SvmIntentClassifier(fakeTools, fakeFeaturizer, dummyLogger as Logger) await intentClassifier.load(model) @@ -86,9 +82,8 @@ test('predict with only one class returns the only class with confidence 1', asy test('predict with multiple class returns svm prediction', async () => { // arrange let intentClassifier = new SvmIntentClassifier(fakeTools, fakeFeaturizer, dummyLogger as Logger) - await intentClassifier.train(makeTrainset([intentA, intentB, intentC]), dummyProgress) + const model = await intentClassifier.train(makeTrainset([intentA, intentB, intentC]), dummyProgress) - const model = intentClassifier.serialize() intentClassifier = new SvmIntentClassifier(fakeTools, fakeFeaturizer, dummyLogger as Logger) await intentClassifier.load(model) @@ -103,22 +98,3 @@ test('predict with multiple class returns svm prediction', async () => { const totalConf = confs.reduce((sum, x) => sum + x, 0) expect(totalConf).toEqual(1) }) - -test('When model is corrupted, loading a model throws', async () => { - // arrange - const intentClassifier = new SvmIntentClassifier(fakeTools, fakeFeaturizer, dummyLogger as Logger) - await intentClassifier.train(makeTrainset([intentA, intentB, intentC]), dummyProgress) - const model = intentClassifier.serialize() - - // act && asert - await expect(intentClassifier.load(`${model} I'm about to end this model's whole career`)).rejects.toThrowError( - ModelLoadingError - ) - - const parsed = JSON.parse(model) - parsed['someKey'] = 'someValue' - await expect(intentClassifier.load(JSON.stringify(parsed))).rejects.toThrowError(ModelLoadingError) - - const undef: unknown = undefined - await expect(intentClassifier.load(undef as string)).rejects.toThrowError(ModelLoadingError) -}) diff --git a/packages/nlu-engine/src/engine/intents/svm-intent-classifier.ts b/packages/nlu-engine/src/engine/intents/svm-intent-classifier.ts index 0019be40..3cda9d70 100644 --- a/packages/nlu-engine/src/engine/intents/svm-intent-classifier.ts +++ b/packages/nlu-engine/src/engine/intents/svm-intent-classifier.ts @@ -1,47 +1,57 @@ -import Joi, { validate } from 'joi' +import * as ptb from '@bpinternal/ptb-schema' import _ from 'lodash' -import { ModelLoadingError } from '../../errors' -import { MLToolkit } from '../../ml/typings' +import { ModelOf } from 'src/component' +import * as MLToolkit from '../../ml/toolkit' import { Logger } from '../../typings' +import { ModelLoadingError } from '../errors' import { ListEntityModel, PatternEntity, Tools } from '../typings' import Utterance from '../utterance/utterance' import { IntentClassifier, IntentPredictions, IntentTrainInput } from './intent-classifier' type Featurizer = (u: Utterance, entities: string[]) => number[] -export interface Model { - svmModel: string | undefined +type Model = { + svmModel: ModelOf | undefined intentNames: string[] entitiesName: string[] } -interface Predictors { - svm: MLToolkit.SVM.Predictor | undefined +const PTBSvmIntentModel = new ptb.PTBMessage('SvmIntentModel', { + svmModel: { type: MLToolkit.SVM.Classifier.modelType, id: 1, rule: 'optional' }, + intentNames: { type: 'string', id: 2, rule: 'repeated' }, + entitiesName: { type: 'string', id: 3, rule: 'repeated' } +}) + +type Predictors = { + svm: MLToolkit.SVM.Classifier | undefined intentNames: string[] entitiesName: string[] } -const keys: Record = { - svmModel: Joi.string().allow('').optional(), - intentNames: Joi.array().items(Joi.string()).required(), - entitiesName: Joi.array().items(Joi.string()).required() -} -export const modelSchema = Joi.object().keys(keys).required() - -export class SvmIntentClassifier implements IntentClassifier { +export class SvmIntentClassifier implements IntentClassifier { private static _displayName = 'SVM Intent Classifier' private static _name = 'svm-classifier' - private model: Model | undefined private predictors: Predictors | undefined constructor(private tools: Tools, private featurizer: Featurizer, private _logger: Logger) {} - get name() { + public get name() { return SvmIntentClassifier._name } - async train(input: IntentTrainInput, progress: (p: number) => void): Promise { + public static get modelType() { + return PTBSvmIntentModel + } + + public get modelType() { + return PTBSvmIntentModel + } + + public async train( + input: IntentTrainInput, + progress: (p: number) => void + ): Promise> { const { intents, nluSeed, list_entities, pattern_entities } = input const entitiesName = this._getEntitiesName(list_entities, pattern_entities) @@ -59,50 +69,45 @@ export class SvmIntentClassifier implements IntentClassifier { const classCount = _.uniqBy(points, (p) => p.label).length if (points.length === 0 || classCount <= 1) { this._logger.debug('No SVM to train because there is less than two classes.') - this.model = { + progress(1) + return { svmModel: undefined, intentNames: intents.map((i) => i.name), entitiesName } - progress(1) - return } - const svm = new this.tools.mlToolkit.SVM.Trainer(this._logger) + const svm = new this.tools.mlToolkit.SVM.Classifier(this._logger) - const seed = nluSeed - const svmModel = await svm.train(points, { kernel: 'LINEAR', classifier: 'C_SVC', seed }, progress) + const options: MLToolkit.SVM.SVMOptions = { kernel: 'LINEAR', classifier: 'C_SVC', seed: nluSeed } + const svmModel = await svm.train({ points, options }, progress) - this.model = { + return { svmModel, intentNames: intents.map((i) => i.name), entitiesName } } - serialize(): string { - if (!this.model) { - throw new Error(`${SvmIntentClassifier._displayName} must be trained before calling serialize.`) - } - return JSON.stringify(this.model) - } - - async load(serialized: string): Promise { + public async load(serialized: ptb.Infer): Promise { try { - const raw = JSON.parse(serialized) - const model: Model = await validate(raw, modelSchema) + const { entitiesName, intentNames, svmModel } = serialized + const model: Model = { + svmModel, + entitiesName: entitiesName ?? [], + intentNames: intentNames ?? [] + } + this.predictors = await this._makePredictors(model) - this.model = model - } catch (err) { + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) throw new ModelLoadingError(SvmIntentClassifier._displayName, err) } } private async _makePredictors(model: Model): Promise { const { svmModel, intentNames, entitiesName } = model - - const svm = svmModel ? new this.tools.mlToolkit.SVM.Predictor(svmModel) : undefined - await svm?.initialize() + const svm = svmModel ? await this._makeSvmClf(svmModel) : undefined return { svm, intentNames, @@ -110,13 +115,15 @@ export class SvmIntentClassifier implements IntentClassifier { } } - async predict(utterance: Utterance): Promise { - if (!this.predictors) { - if (!this.model) { - throw new Error(`${SvmIntentClassifier._displayName} must be trained before calling predict.`) - } + private async _makeSvmClf(svmModel: ModelOf): Promise { + const svm = new this.tools.mlToolkit.SVM.Classifier(this._logger) + await svm.load(svmModel) + return svm + } - this.predictors = await this._makePredictors(this.model) + public async predict(utterance: Utterance): Promise { + if (!this.predictors) { + throw new Error(`${SvmIntentClassifier._displayName} must load model before calling predict.`) } const { svm, intentNames, entitiesName } = this.predictors diff --git a/packages/nlu-engine/src/engine/language/lang-client.ts b/packages/nlu-engine/src/engine/language/lang-client.ts new file mode 100644 index 00000000..c6405cda --- /dev/null +++ b/packages/nlu-engine/src/engine/language/lang-client.ts @@ -0,0 +1,87 @@ +import { Client, LanguageInfo, VectorizeResult, TokenizeResult, LanguageState } from '@botpress/lang-client' +import httpsProxyAgent from 'https-proxy-agent' +import { LangServerError } from '../errors' + +export class LanguageClient { + private _client: Client + + constructor(languageURL: string, languageAuthToken?: string) { + const proxyConfig = process.env.PROXY ? { httpsAgent: new httpsProxyAgent(process.env.PROXY) } : {} + + const headers: _.Dictionary = {} + if (languageAuthToken) { + headers['authorization'] = `bearer ${languageAuthToken}` + } + + this._client = new Client({ + baseURL: languageURL, + headers, + ...proxyConfig + }) + } + + public async getInfo(): Promise { + try { + const infoRes = await this._client.getInfo() + if (!infoRes.success) { + const { error } = infoRes + throw new LangServerError(error) + } + const { success, ...info } = infoRes + return info + } catch (err) { + throw this._mapError(err) + } + } + + public async getLanguages(): Promise { + try { + const langRes = await this._client.getLanguages() + if (!langRes.success) { + const { error } = langRes + throw new LangServerError(error) + } + const { success, ...langState } = langRes + return langState + } catch (err) { + throw this._mapError(err) + } + } + + public async vectorize(tokens: string[], language: string): Promise { + try { + const vectorResponse = await this._client.vectorize(tokens, language) + if (!vectorResponse.success) { + const { error } = vectorResponse + throw new LangServerError(error) + } + const { success, ...vectorResult } = vectorResponse + return vectorResult + } catch (err) { + throw this._mapError(err) + } + } + + public async tokenize(utterances: string[], language: string): Promise { + try { + const tokenResponse = await this._client.tokenize(utterances, language) + if (!tokenResponse.success) { + const { error } = tokenResponse + throw new LangServerError(error) + } + const { success, ...tokenResult } = tokenResponse + return tokenResult + } catch (err) { + throw this._mapError(err) + } + } + + private _mapError = (thrown: any) => { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + if (err instanceof LangServerError) { + return err + } + const { message, stack } = err + return new LangServerError({ message, stack, code: -1, type: 'internal' }) + } +} diff --git a/packages/nlu-engine/src/engine/language/language-identifier.ts b/packages/nlu-engine/src/engine/language/language-identifier.ts index c64f538a..32817592 100644 --- a/packages/nlu-engine/src/engine/language/language-identifier.ts +++ b/packages/nlu-engine/src/engine/language/language-identifier.ts @@ -1,7 +1,7 @@ import { readFileSync, writeFileSync } from 'fs' import _ from 'lodash' import tmp from 'tmp' -import { MLToolkit } from '../../ml/typings' +import * as MLToolkit from '../../ml/toolkit' import { Predictors } from '../predict-pipeline' const NA_LANG = 'n/a' @@ -23,7 +23,7 @@ export class FastTextLanguageId { FastTextLanguageId.model = ft } - async identify(text: string): Promise { + public async identify(text: string): Promise { if (!FastTextLanguageId.model) { return [] } diff --git a/packages/nlu-engine/src/engine/language/language-provider.ts b/packages/nlu-engine/src/engine/language/language-provider.ts index a030e257..d8aa4f8d 100644 --- a/packages/nlu-engine/src/engine/language/language-provider.ts +++ b/packages/nlu-engine/src/engine/language/language-provider.ts @@ -1,435 +1,114 @@ -import axios, { AxiosInstance } from 'axios' -import Bluebird from 'bluebird' import retry from 'bluebird-retry' import crypto from 'crypto' import fse from 'fs-extra' -import httpsProxyAgent from 'https-proxy-agent' import _, { debounce, sumBy } from 'lodash' import lru from 'lru-cache' -import moment from 'moment' import ms from 'ms' import path from 'path' import semver from 'semver' -import { Health } from 'src/typings' -import { LanguageSource, Logger as ILogger } from '../../typings' -import { setSimilarity, vocabNGram } from '../tools/strings' -import { isSpace, processUtteranceTokens, restoreOriginalUtteranceCasing } from '../tools/token-utils' -import { Gateway, LangServerInfo, LangsGateway, LanguageProvider, SeededLodashProvider } from '../typings' +import { Logger as ILogger } from '../../typings' +import { + isSpace, + processUtteranceTokens, + restoreOriginalSpaces, + restoreOriginalUtteranceCasing +} from '../tools/token-utils' +import { LangServerInfo } from '../typings' +import { LanguageClient } from './lang-client' +import { LegacyLanguageClient } from './legacy-lang-client' const MAX_PAYLOAD_SIZE = 150 * 1024 // 150kb -const JUNK_VOCAB_SIZE = 500 -const JUNK_TOKEN_MIN = 1 -const JUNK_TOKEN_MAX = 20 - const VECTOR_FILE_PREFIX = 'lang_vectors' const TOKEN_FILE_PREFIX = 'utterance_tokens' -const JUNK_FILE_PREFIX = 'junk_words' -export class RemoteLanguageProvider implements LanguageProvider { - private _cacheDir!: string - private _vectorsCachePath!: string - private _junkwordsCachePath!: string - private _tokensCachePath!: string +const DISCOVERY_RETRY_POLICY: retry.Options = { + interval: 1000, + max_interval: 5000, + timeout: 2000, + max_tries: 5 +} - private _vectorsCache!: lru - private _tokensCache!: lru - private _junkwordsCache!: lru +export type LangProviderArgs = { + languageURL: string + languageAuthToken?: string + cacheDir: string +} +export class LanguageProvider { + private _vectorsCache: lru + private _tokensCache: lru private _cacheDumpDisabled: boolean = false - private _validProvidersCount!: number - private _languageDims!: number + private _cacheFormatVersion: string = '1.0.0' // increment when changing cache file format to invalidate old cache files - private _nluVersion!: string - private _langServerInfo!: LangServerInfo + public static async create(logger: ILogger, args: LangProviderArgs): Promise { + const { languageURL, languageAuthToken, cacheDir } = args - private _seededLodashProvider!: SeededLodashProvider + const legacyClient = new LegacyLanguageClient(languageURL, languageAuthToken) - private _logger!: ILogger - - private discoveryRetryPolicy = { - interval: 1000, - max_interval: 5000, - timeout: 2000, - max_tries: 5 - } + let installedLanguages: string[] | undefined + let langServerInfo: LangServerInfo | undefined - private langs: LangsGateway = {} - - get languages(): string[] { - return Object.keys(this.langs) - } - - private addProvider(lang: string, source: LanguageSource, client: AxiosInstance) { - this.langs[lang] = [...(this.langs[lang] || []), { source, client, errors: 0, disabledUntil: undefined }] - this._logger.debug(`[${lang.toUpperCase()}] Language Provider added ${source}`) - } - - async initialize( - sources: LanguageSource[], - logger: ILogger, - nluVersion: string, - cacheDir: string, - seededLodashProvider: SeededLodashProvider - ): Promise { - this._nluVersion = nluVersion - this._validProvidersCount = 0 - this._logger = logger - this._cacheDir = cacheDir - - this._seededLodashProvider = seededLodashProvider - - this._vectorsCache = new lru({ - length: (arr: Float32Array) => { - if (arr && arr.BYTES_PER_ELEMENT) { - return arr.length * arr.BYTES_PER_ELEMENT - } else { - return 300 /* dim */ * Float32Array.BYTES_PER_ELEMENT - } - }, - max: 300 /* dim */ * Float32Array.BYTES_PER_ELEMENT /* bytes */ * 500000 /* tokens */ - }) - - this._tokensCache = new lru({ - length: (val: string[], key: string) => key.length * 4 + sumBy(val, (x) => x.length * 4), - max: - 4 * // bytes in strings - 5 * // average size of token - 10 * // nb of tokens per utterance - 10 * // nb of utterances per intent - 200 * // nb of intents per model - 10 * // nb of models per bot - 50 // nb of bots - // total is ~ 200 mb - }) - - this._junkwordsCache = new lru({ - length: (val: string[], key: string[]) => sumBy(key, (x) => x.length * 4) + sumBy(val, (x) => x.length * 4), - max: - 4 * // bytes in strings - 10 * // token size - 500 * // vocab size - 1000 * // junk words - 10 // models - // total is ~ 200 mb - }) - - await Bluebird.mapSeries(sources, async (source) => { - const headers: _.Dictionary = {} - - if (source.authToken) { - headers['authorization'] = `bearer ${source.authToken}` - } - - const proxyConfig = process.env.PROXY ? { httpsAgent: new httpsProxyAgent(process.env.PROXY) } : {} - - const client = axios.create({ - baseURL: source.endpoint, - headers, - ...proxyConfig - }) - try { - await retry(async () => { - const { data } = await client.get('/info') - - if (!data.ready) { - throw new Error('Language source is not ready') - } - - if (!this._languageDims) { - this._languageDims = data.dimentions // note typo in language server - } - - // TODO: also check that the domain and version is consistent across all sources - if (this._languageDims !== data.dimentions) { - throw new Error('Language sources have different dimensions') - } - this._validProvidersCount++ - data.languages.forEach((x) => this.addProvider(x.lang, source, client)) - - this.extractLangServerInfo(data) - }, this.discoveryRetryPolicy) - } catch (err) { - this.handleLanguageServerError(err, source.endpoint) + let langClient: LanguageClient | LegacyLanguageClient | undefined + await retry(async () => { + const info = await legacyClient.getInfo() + if (!info.ready) { + throw new Error('Language server is not ready.') } - }) - - this.computeCacheFilesPaths() - await this.clearOldCacheFiles() - - this._logger.debug(`loaded ${Object.keys(this.langs).length} languages from ${sources.length} sources`) - - await this.restoreVectorsCache() - await this.restoreJunkWordsCache() - await this.restoreTokensCache() - - return this as LanguageProvider - } - - public get langServerInfo(): LangServerInfo { - return this._langServerInfo - } - - private extractLangServerInfo(data) { - const version = semver.valid(semver.coerce(data.version)) - - if (!version) { - throw new Error('Lang server has an invalid version') - } - const langServerInfo = { - version: semver.clean(version), - dim: data.dimentions, - domain: data.domain - } - this._langServerInfo = langServerInfo - } - private computeCacheFilesPaths = () => { - const versionHash = this.computeVersionHash() - this._vectorsCachePath = path.join(this._cacheDir, `${VECTOR_FILE_PREFIX}_${versionHash}.json`) - this._junkwordsCachePath = path.join(this._cacheDir, `${JUNK_FILE_PREFIX}_${versionHash}.json`) - this._tokensCachePath = path.join(this._cacheDir, `${TOKEN_FILE_PREFIX}_${versionHash}.json`) - } - - private clearOldCacheFiles = async () => { - const cacheExists = await fse.pathExists(this._cacheDir) - if (!cacheExists) { - return - } - - const allCacheFiles = await fse.readdir(this._cacheDir) - - const currentHash = this.computeVersionHash() - - const fileStartWithPrefix = (fileName: string) => { - return ( - fileName.startsWith(VECTOR_FILE_PREFIX) || - fileName.startsWith(TOKEN_FILE_PREFIX) || - fileName.startsWith(JUNK_FILE_PREFIX) - ) - } - - const fileEndsWithIncorrectHash = (fileName: string) => !fileName.includes(currentHash) - - const filesToDelete = allCacheFiles - .filter(fileStartWithPrefix) - .filter(fileEndsWithIncorrectHash) - .map((f) => path.join(this._cacheDir, f)) - - for (const f of filesToDelete) { - await fse.unlink(f) - } - } - - private handleLanguageServerError = (err, endpoint: string) => { - const status = _.get(err, 'failure.response.status') - const details = _.get(err, 'failure.response.message') - - if (status === 429) { - this._logger.error( - `Could not load Language Server: ${details}. You may be over the limit for the number of requests allowed for the endpoint ${endpoint}` - ) - } else if (status === 401) { - this._logger.error(`You must provide a valid authentication token for the endpoint ${endpoint}`) - } else { - this._logger.error(`Could not load Language Provider at ${endpoint}: ${err.code}`, err) - } - } - - private onTokensCacheChanged = debounce(async () => { - if (!this._cacheDumpDisabled) { - await this.dumpTokensCache() - } - }, ms('5s')) - - private onVectorsCacheChanged = debounce(async () => { - if (!this._cacheDumpDisabled) { - await this.dumpVectorsCache() - } - }, ms('5s')) - - private onJunkWordsCacheChanged = debounce(async () => { - if (!this._cacheDumpDisabled) { - await this.dumpJunkWordsCache() - } - }, ms('5s')) - - private async dumpTokensCache() { - try { - await fse.ensureFile(this._tokensCachePath) - await fse.writeJson(this._tokensCachePath, this._tokensCache.dump()) - this._logger.debug(`tokens cache updated at: ${this._tokensCachePath}`) - } catch (err) { - this._logger.debug(`could not persist tokens cache, error: ${err.message}`) - this._cacheDumpDisabled = true - } - } - - private async restoreTokensCache() { - try { - if (await fse.pathExists(this._tokensCachePath)) { - const dump = await fse.readJSON(this._tokensCachePath) - this._tokensCache.load(dump) - } - } catch (err) { - this._logger.debug(`could not restore tokens cache, error: ${err.message}`) - } - } - - private async dumpVectorsCache() { - try { - await fse.ensureFile(this._vectorsCachePath) - await fse.writeJSON(this._vectorsCachePath, this._vectorsCache.dump()) - this._logger.debug(`vectors cache updated at: ${this._vectorsCachePath}`) - } catch (err) { - this._logger.debug(`could not persist vectors cache, error: ${err.message}`) - this._cacheDumpDisabled = true - } - } - - private async restoreVectorsCache() { - try { - if (await fse.pathExists(this._vectorsCachePath)) { - const dump = await fse.readJSON(this._vectorsCachePath) - if (dump) { - const kve = dump.map((x) => ({ e: x.e, k: x.k, v: Float32Array.from(Object.values(x.v)) })) - this._vectorsCache.load(kve) - } + // TODO: remove all these checks ASAP + if (!info.version || !_.isString(info.version)) { + throw new Error('Expected route GET /info to return object with string version') + } else if (!semver.valid(info.version) || semver.lt(info.version, '1.2.0')) { + logger.warning( + 'The language server provided uses a deprecated API. Please update the language server to the latest version.' + ) + langClient = legacyClient + } else { + langClient = new LanguageClient(languageURL, languageAuthToken) } - } catch (err) { - this._logger.debug(`could not restore vectors cache, error: ${err.message}`) - } - } - private async dumpJunkWordsCache() { - try { - await fse.ensureFile(this._junkwordsCachePath) - await fse.writeJSON(this._junkwordsCachePath, this._junkwordsCache.dump()) - this._logger.debug(`junk words cache updated at: ${this._junkwordsCache}`) - } catch (err) { - this._logger.debug(`could not persist junk cache, error: ${err.message}`) - this._cacheDumpDisabled = true - } - } - - private async restoreJunkWordsCache() { - try { - if (await fse.pathExists(this._junkwordsCachePath)) { - const dump = await fse.readJSON(this._junkwordsCachePath) - this._vectorsCache.load(dump) + const langState = await langClient.getLanguages() + const { installed } = langState + installedLanguages = installed.map((x) => x.code) + langServerInfo = { + version: info.version, + dim: info.dimentions, + domain: info.domain } - } catch (err) { - this._logger.debug(`could not restore junk cache, error: ${err.message}`) - } - } + }, DISCOVERY_RETRY_POLICY) - getHealth(): Partial { - return { validProvidersCount: this._validProvidersCount, validLanguages: Object.keys(this.langs) } - } - - private getAvailableProviders(lang: string): Gateway[] { - if (!this.langs[lang]) { - throw new Error(`Language "${lang}" is not supported by the configured language sources`) + if (!langClient || !installedLanguages || !langServerInfo) { + throw new Error('Language Server initialization failed') } - return this.langs[lang].filter((x) => !x.disabledUntil || x.disabledUntil <= new Date()) + const provider = new LanguageProvider(langClient, logger, langServerInfo, installedLanguages, cacheDir) + await provider._clearOldCacheFiles() + await provider._restoreVectorsCache() + await provider._restoreTokensCache() + return provider } - private async queryProvider(lang: string, path: string, body: any, returnProperty: string): Promise { - const providers = this.getAvailableProviders(lang) - - for (const provider of providers) { - try { - const { data } = await provider.client.post(path, { ...body, lang }) - - if (data && data[returnProperty]) { - return data[returnProperty] as T - } - - return data - } catch (err) { - this._logger.debug( - `error from language server ${JSON.stringify({ - message: err.message, - code: err.code, - status: err.status, - payload: body - })}` - ) - - if (this.getAvailableProviders(lang).length > 1) { - // we don't disable providers when there's no backup - provider.disabledUntil = moment() - .add(provider.errors++, 'seconds') - .toDate() - - this._logger.debug( - `disabled temporarily source ${JSON.stringify({ - source: provider.source, - err: err.message, - errors: provider.errors, - until: provider.disabledUntil - })}` - ) - } - } - } - - throw new Error(`No provider could successfully fullfil request "${path}" for lang "${lang}"`) + private constructor( + private _langClient: LanguageClient | LegacyLanguageClient, + private _logger: ILogger, + private _langServerInfo: LangServerInfo, + private _installedLanguages: string[], + private _cacheDir: string + ) { + this._vectorsCache = this._makeVectorCache() + this._tokensCache = this._makeTokenCache() } - /** - * Generates words that don't exist in the vocabulary, but that are built from ngrams of existing vocabulary - * @param subsetVocab The tokens to which you want similar tokens to - */ - async generateSimilarJunkWords(subsetVocab: string[], lang: string): Promise { - // TODO: we can remove await + lang - // from totalVocab compute the cachedKey the closest to what we have - // if 75% of the vocabulary is the same, we keep the cache we have instead of rebuilding one - const gramset = vocabNGram(subsetVocab) - let result: string[] | undefined - - this._junkwordsCache.forEach((junk, vocab) => { - if (!result) { - const sim = setSimilarity(vocab, gramset) - if (sim >= 0.75) { - result = junk - } - } - }) - - if (!result) { - // didn't find any close gramset, let's create a new one - result = this.generateJunkWords(subsetVocab, gramset) // randomly generated words - await this.vectorize(result, lang) // vectorize them all in one request to cache the tokens // TODO: remove this - this._junkwordsCache.set(gramset, result) - await this.onJunkWordsCacheChanged() - } - - return result + public get languages(): string[] { + return [...this._installedLanguages] } - private generateJunkWords(subsetVocab: string[], gramset: string[]) { - const realWords = _.uniq(subsetVocab) - const meanWordSize = _.meanBy(realWords, (w) => w.length) - const minJunkSize = Math.max(JUNK_TOKEN_MIN, meanWordSize / 2) // Twice as short - const maxJunkSize = Math.min(JUNK_TOKEN_MAX, meanWordSize * 1.5) // A bit longer. Those numbers are discretionary and are not expected to make a big impact on the models. - - const lo = this._seededLodashProvider.getSeededLodash() - - const junks = _.range(0, JUNK_VOCAB_SIZE).map(() => { - const finalSize = lo.random(minJunkSize, maxJunkSize, false) - let word = '' - while (word.length < finalSize) { - word += lo.sample(gramset) - } - return word - }) // randomly generated words - - return junks + public get langServerInfo(): LangServerInfo { + return this._langServerInfo } - async vectorize(tokens: string[], lang: string): Promise { + public async vectorize(tokens: string[], lang: string): Promise { if (!tokens.length) { return [] } @@ -440,7 +119,7 @@ export class RemoteLanguageProvider implements LanguageProvider { tokens.forEach((token, i) => { if (isSpace(token)) { - vectors[i] = new Float32Array(this._languageDims) // float 32 Arrays are initialized with 0s + vectors[i] = new Float32Array(this._langServerInfo.dim) // float 32 Arrays are initialized with 0s } else if (this._vectorsCache.has(getCacheKey(token))) { vectors[i] = this._vectorsCache.get(getCacheKey(token))! } else { @@ -459,7 +138,7 @@ export class RemoteLanguageProvider implements LanguageProvider { break } - const fetched = await this.queryProvider(lang, '/vectorize', { tokens: query }, 'vectors') + const { vectors: fetched } = await this._langClient.vectorize(query, lang) if (fetched.length !== query.length) { throw new Error( @@ -473,17 +152,13 @@ export class RemoteLanguageProvider implements LanguageProvider { this._vectorsCache.set(getCacheKey(tokens[tokenIdx]), vectors[tokenIdx]) }) - await this.onVectorsCacheChanged() + await this._onVectorsCacheChanged() } return vectors } - _hash(str: string): string { - return crypto.createHash('md5').update(str).digest('hex') - } - - async tokenize(utterances: string[], lang: string, vocab: string[] = []): Promise { + public async tokenize(utterances: string[], lang: string, vocab: string[] = []): Promise { if (!utterances.length) { return [] } @@ -507,13 +182,10 @@ export class RemoteLanguageProvider implements LanguageProvider { // While there's utterances we haven't tokenized yet // We're going to batch requests by maximum 150KB worth's of utterances let totalSize = 0 - const sliceUntil = idxToFetch.reduce((topIdx, idx, i) => { - if ((totalSize += utterances[idx].length * 4) < MAX_PAYLOAD_SIZE) { - return i - } else { - return topIdx - } - }, 0) + const sliceUntil = idxToFetch.reduce( + (topIdx, idx, i) => ((totalSize += utterances[idx].length * 4) < MAX_PAYLOAD_SIZE ? i : topIdx), + 0 + ) const batch = idxToFetch.splice(0, sliceUntil + 1) const query = batch.map((idx) => utterances[idx].toLowerCase()) @@ -521,7 +193,7 @@ export class RemoteLanguageProvider implements LanguageProvider { break } - let fetched = await this.queryProvider(lang, '/tokenize', { utterances: query }, 'tokens') + let { tokens: fetched } = await this._langClient.tokenize(query, lang) fetched = fetched.map((toks) => processUtteranceTokens(toks, vocab)) if (fetched.length !== query.length) { @@ -536,21 +208,150 @@ export class RemoteLanguageProvider implements LanguageProvider { this._tokensCache.set(getCacheKey(utterances[utteranceIdx]), tokenUtterances[utteranceIdx]) }) - await this.onTokensCacheChanged() + await this._onTokensCacheChanged() } - // we restore original chars and casing - return tokenUtterances.map((tokens, i) => restoreOriginalUtteranceCasing(tokens, utterances[i])) + const spaceRestored = tokenUtterances.map((tokens, i) => restoreOriginalSpaces(tokens, utterances[i])) + const caseRestored = spaceRestored.map((tokens, i) => restoreOriginalUtteranceCasing(tokens, utterances[i])) + return caseRestored } - private computeVersionHash = () => { - const { _nluVersion, _langServerInfo } = this - const { dim, domain, version: langServerVersion } = _langServerInfo + private _makeVectorCache = (): lru => { + return new lru({ + length: (arr: Float32Array) => { + if (arr && arr.BYTES_PER_ELEMENT) { + return arr.length * arr.BYTES_PER_ELEMENT + } else { + return 300 /* dim */ * Float32Array.BYTES_PER_ELEMENT + } + }, + max: 300 /* dim */ * Float32Array.BYTES_PER_ELEMENT /* bytes */ * 500000 /* tokens */ + }) + } - const omitPatchNumber = (v: string) => `${semver.major(v)}.${semver.minor(v)}.0` - const hashContent = `${omitPatchNumber(_nluVersion)}:${omitPatchNumber(langServerVersion)}:${dim}:${domain}` + private _makeTokenCache = (): lru => { + return new lru({ + length: (val: string[], key: string) => key.length * 4 + sumBy(val, (x) => x.length * 4), + max: + 4 * // bytes in strings + 5 * // average size of token + 10 * // nb of tokens per utterance + 10 * // nb of utterances per intent + 200 * // nb of intents per model + 10 * // nb of models per bot + 50 // nb of bots + // total is ~ 200 mb + }) + } + + private _computeCacheFilesPaths = () => { + const versionHash = this._computeVersionHash() + const vectorsCachePath = path.join(this._cacheDir, `${VECTOR_FILE_PREFIX}_${versionHash}.json`) + const tokensCachePath = path.join(this._cacheDir, `${TOKEN_FILE_PREFIX}_${versionHash}.json`) + return { vectorsCachePath, tokensCachePath } + } + + private _clearOldCacheFiles = async () => { + const cacheExists = await fse.pathExists(this._cacheDir) + if (!cacheExists) { + return + } + + const allCacheFiles = await fse.readdir(this._cacheDir) + + const currentHash = this._computeVersionHash() + + const fileStartWithPrefix = (fileName: string) => { + return fileName.startsWith(VECTOR_FILE_PREFIX) || fileName.startsWith(TOKEN_FILE_PREFIX) + } + + const fileEndsWithIncorrectHash = (fileName: string) => !fileName.includes(currentHash) + + const filesToDelete = allCacheFiles + .filter(fileStartWithPrefix) + .filter(fileEndsWithIncorrectHash) + .map((f) => path.join(this._cacheDir, f)) + + for (const f of filesToDelete) { + await fse.unlink(f) + } + } + + private _onTokensCacheChanged = debounce(async () => { + if (!this._cacheDumpDisabled) { + await this._dumpTokensCache() + } + }, ms('5s')) + + private _onVectorsCacheChanged = debounce(async () => { + if (!this._cacheDumpDisabled) { + await this._dumpVectorsCache() + } + }, ms('5s')) + + private async _dumpTokensCache() { + try { + const { tokensCachePath } = this._computeCacheFilesPaths() + await fse.ensureFile(tokensCachePath) + await fse.writeJson(tokensCachePath, this._tokensCache.dump()) + this._logger.debug(`tokens cache updated at: ${tokensCachePath}`) + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + this._logger.debug(`could not persist tokens cache, error: ${err.message}`) + this._cacheDumpDisabled = true + } + } + + private async _restoreTokensCache() { + try { + const { tokensCachePath } = this._computeCacheFilesPaths() + if (await fse.pathExists(tokensCachePath)) { + const dump = await fse.readJSON(tokensCachePath) + this._tokensCache.load(dump) + } + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + this._logger.debug(`could not restore tokens cache, error: ${err.message}`) + } + } + + private async _dumpVectorsCache() { + try { + const { vectorsCachePath } = this._computeCacheFilesPaths() + await fse.ensureFile(vectorsCachePath) + await fse.writeJSON(vectorsCachePath, this._vectorsCache.dump()) + this._logger.debug(`vectors cache updated at: ${vectorsCachePath}`) + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + this._logger.debug(`could not persist vectors cache, error: ${err.message}`) + this._cacheDumpDisabled = true + } + } + + private async _restoreVectorsCache() { + try { + const { vectorsCachePath } = this._computeCacheFilesPaths() + if (await fse.pathExists(vectorsCachePath)) { + const dump = await fse.readJSON(vectorsCachePath) + if (dump) { + const kve = dump.map((x) => ({ e: x.e, k: x.k, v: Float32Array.from(Object.values(x.v)) })) + this._vectorsCache.load(kve) + } + } + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + this._logger.debug(`could not restore vectors cache, error: ${err.message}`) + } + } + + private _computeVersionHash = () => { + const { _cacheFormatVersion, _langServerInfo } = this + const { dim, domain, version: langServerVersion } = _langServerInfo + const hashContent = `${_cacheFormatVersion}:${langServerVersion}:${dim}:${domain}` return crypto.createHash('md5').update(hashContent).digest('hex') } -} -export default new RemoteLanguageProvider() + private _hash(str: string): string { + return crypto.createHash('md5').update(str).digest('hex') + } +} diff --git a/packages/nlu-engine/src/engine/language/legacy-lang-client.ts b/packages/nlu-engine/src/engine/language/legacy-lang-client.ts new file mode 100644 index 00000000..36c3b75d --- /dev/null +++ b/packages/nlu-engine/src/engine/language/legacy-lang-client.ts @@ -0,0 +1,95 @@ +import { LanguageInfo, LanguageState, VectorizeResult, TokenizeResult } from '@botpress/lang-client' +import axios, { AxiosInstance, AxiosResponse, AxiosRequestConfig } from 'axios' +import httpsProxyAgent from 'https-proxy-agent' +import { LangServerError } from '../errors' + +type HTTPVerb = 'GET' | 'POST' | 'PUT' | 'DELETE' +type HTTPCall = { + verb: V + ressource: string +} + +// TODO: fully remove this ASAP +export class LegacyLanguageClient { + private _client: AxiosInstance + + constructor(languageURL: string, languageAuthToken?: string) { + const proxyConfig = process.env.PROXY ? { httpsAgent: new httpsProxyAgent(process.env.PROXY) } : {} + + const headers: _.Dictionary = {} + if (languageAuthToken) { + headers['authorization'] = `bearer ${languageAuthToken}` + } + + this._client = axios.create({ + baseURL: languageURL, + headers, + ...proxyConfig + }) + } + + public async getInfo(): Promise { + const call: HTTPCall<'GET'> = { ressource: 'info', verb: 'GET' } + const { data } = await this._get(call) + return data + } + + public async getLanguages(): Promise { + const call: HTTPCall<'GET'> = { ressource: 'languages', verb: 'GET' } + const { data } = await this._get(call) + return data + } + + public async vectorize(tokens: string[], lang: string): Promise { + const call: HTTPCall<'POST'> = { ressource: 'vectorize', verb: 'POST' } + const { data } = await this._post(call, { tokens, lang }) + return data + } + + public async tokenize(utterances: string[], lang: string): Promise { + const call: HTTPCall<'POST'> = { ressource: 'tokenize', verb: 'POST' } + const { data } = await this._post(call, { utterances, lang }) + return data + } + + private _post = async ( + call: HTTPCall<'POST'>, + body?: any, + config?: AxiosRequestConfig + ): Promise> => { + try { + const { ressource } = call + const res = await this._client.post(ressource, body, config) + return res + } catch (err) { + // axios validate status does not prevent all exceptions + throw this._mapError(call, err) + } + } + + private _get = async (call: HTTPCall<'GET'>, config?: AxiosRequestConfig): Promise> => { + try { + const { ressource } = call + const res = await this._client.get(ressource, config) + return res + } catch (err) { + // axios validate status does not prevent all exceptions + throw this._mapError(call, err) + } + } + + private _mapError = (call: HTTPCall, thrown: any): Error => { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + if (err instanceof LangServerError) { + return err + } + + const { message: originalMsg, stack } = err + const { verb, ressource } = call + const ressourcePath = `lang-server/${ressource}` + const prefix = `${verb} ${ressourcePath}` + const message = `(${prefix}) ${originalMsg}` + + return new LangServerError({ message, stack, code: -1, type: 'internal' }) + } +} diff --git a/packages/nlu-engine/src/engine/language/pos-tagger.test.ts b/packages/nlu-engine/src/engine/language/pos-tagger.test.ts index 2ad8de1d..f3686da5 100644 --- a/packages/nlu-engine/src/engine/language/pos-tagger.test.ts +++ b/packages/nlu-engine/src/engine/language/pos-tagger.test.ts @@ -1,42 +1,41 @@ -import MLToolkit from '../../ml/toolkit' +import { PredictorOf } from 'src/component' +import { Logger } from 'src/typings' +import * as MLToolkit from '../../ml/toolkit' import { tokenizeLatinTextForTests } from '../test-utils/fake-tools' import { isSpace } from '../tools/token-utils' import { fallbackTagger, getPOSTagger, tagSentence } from './pos-tagger' +const dummyLogger: Partial = { debug: () => {} } + describe('POS Tagger', () => { - test('Fallback tagger returns NA tags properly', () => { + test('Fallback tagger returns NA tags properly', async () => { const feats = [['feat1=1', 'feat2'], ['feat1=2'], ['feat1=3', 'feat2']] - const { probability, result: tags } = fallbackTagger.tag(feats) + const { probability, result: tags } = await fallbackTagger.predict(feats) expect(probability).toEqual(1) expect(tags.every((t) => t === 'N/A')).toBeTruthy() - - fallbackTagger.marginal(feats).forEach((res) => { - expect(res).toEqual({ 'N/A': 1 }) - }) }) test('Get tagger returns FB tagger for other languages than english', async () => { - const tagger = await getPOSTagger('', 'de', {} as typeof MLToolkit) + const tagger = await getPOSTagger('', 'de', {} as typeof MLToolkit, dummyLogger as Logger) expect(tagger).toEqual(fallbackTagger) }) describe('tagSentence', () => { const mockedTagger = { ...fallbackTagger, - tag: jest.fn((xseq) => fallbackTagger.tag(xseq)) + predict: jest.fn((xseq) => fallbackTagger.predict(xseq)) } - test('Calls tagger without spaces and adds _ for space tokens', () => { + test('Calls tagger without spaces and adds _ for space tokens', async () => { const xseq = tokenizeLatinTextForTests( 'A Sea Fox is a Fox-alien-fish crossbreed with a strange amalgamation of a bunch of different animals and plants' ) const n_space = xseq.filter((t) => isSpace(t)).length - // @ts-ignore - const tags = tagSentence(mockedTagger as sdk.MLToolkit.CRF.Tagger, xseq) - expect(mockedTagger.tag.mock.calls[0][0].length).toEqual(xseq.length - n_space) + const tags = await tagSentence(mockedTagger as PredictorOf, xseq) + expect(mockedTagger.predict.mock.calls[0][0].length).toEqual(xseq.length - n_space) expect(tags.filter((t) => isSpace(t)).length).toEqual(n_space) tags .filter((t) => !isSpace(t)) diff --git a/packages/nlu-engine/src/engine/language/pos-tagger.ts b/packages/nlu-engine/src/engine/language/pos-tagger.ts index 70ea33bb..1aedcaf5 100644 --- a/packages/nlu-engine/src/engine/language/pos-tagger.ts +++ b/packages/nlu-engine/src/engine/language/pos-tagger.ts @@ -1,8 +1,9 @@ import Bluebird from 'bluebird' import fs from 'fs' import path from 'path' -import tmp from 'tmp' -import { MLToolkit } from '../../ml/typings' +import { PredictorOf } from 'src/component' +import { Logger } from 'src/typings' +import * as MLToolkit from '../../ml/toolkit' import { isSpace, SPACE } from '../tools/token-utils' @@ -91,47 +92,37 @@ function wordFeatures(seq: string[], idx: number): string[] { }) } -export const fallbackTagger: MLToolkit.CRF.Tagger = { - tag: (seq) => ({ probability: 1, result: new Array(seq.length).fill('N/A') }), - initialize: async () => {}, - open: (f) => false, - marginal: (seq) => new Array(seq.length).fill({ 'N/A': 1 }) +export const fallbackTagger: PredictorOf = { + predict: async (seq: string[][]) => ({ probability: 1, result: new Array(seq.length).fill('N/A') }) } // eventually this will be moved in language provider // POS tagging will reside language server once we support more than english -const taggersByLang: { [lang: string]: MLToolkit.CRF.Tagger } = {} +const taggersByLang: { [lang: string]: PredictorOf } = {} export async function getPOSTagger( preTrainedDir: string, languageCode: string, - toolkit: typeof MLToolkit -): Promise { + toolkit: typeof MLToolkit, + logger: Logger +): Promise> { if (!isPOSAvailable(languageCode)) { return fallbackTagger } if (!taggersByLang[languageCode]) { - const tagger = new toolkit.CRF.Tagger() - await tagger.initialize() const preTrainedPath = getPretrainedModelFilePath(preTrainedDir, languageCode) - - // copy file to actual disk using only read/write functions because of pkg - const tmpFile = await Bluebird.fromCallback(tmp.file) const model = await Bluebird.fromCallback((cb) => fs.readFile(preTrainedPath, cb)) - await Bluebird.fromCallback((cb) => fs.writeFile(tmpFile, model, cb)) - const openSuccess = tagger.open(tmpFile) - if (!openSuccess) { - throw new Error(`Could not open POS tagger for language "${languageCode}".`) - } + const tagger = new toolkit.CRF.Tagger(logger) + await tagger.load({ content: model }) taggersByLang[languageCode] = tagger } return taggersByLang[languageCode] } -export function tagSentence(tagger: MLToolkit.CRF.Tagger, tokens: string[]): POSClass[] { +export async function tagSentence(tagger: PredictorOf, tokens: string[]): Promise { const [words, spaceIdx] = tokens.reduce( ([words, spaceIdx], token, idx) => { if (isSpace(token)) { @@ -148,7 +139,7 @@ export function tagSentence(tagger: MLToolkit.CRF.Tagger, tokens: string[]): POS feats.push(wordFeatures(words, i)) } - const tags = tagger.tag(feats).result + const { result: tags } = await tagger.predict(feats) for (const idx of spaceIdx) { tags.splice(idx, 0, SPACE) } diff --git a/packages/nlu-engine/src/engine/linting-process-pool/index.ts b/packages/nlu-engine/src/engine/linting-process-pool/index.ts new file mode 100644 index 00000000..3e8b8bc1 --- /dev/null +++ b/packages/nlu-engine/src/engine/linting-process-pool/index.ts @@ -0,0 +1,52 @@ +import { errors, makeProcessPool, ProcessPool } from '@botpress/worker' +import _ from 'lodash' +import { LanguageConfig, LintingProgressCb, Logger } from '../../typings' +import { LintingAlreadyStartedError, LintingCanceledError, LintingExitedUnexpectedlyError } from '../errors' +import { ErrorHandler } from '../training-process-pool/error-handler' +import { ENTRY_POINT } from './process-entry-point' +import { LintingInput, LintingOuput, LintingProgress } from './typings' + +export class LintingProcessPool { + private _processPool: ProcessPool + + constructor(private _logger: Logger, config: LanguageConfig) { + const env = { + ...process.env, + NLU_CONFIG: JSON.stringify(config) + } + + this._processPool = makeProcessPool(this._logger, { + maxWorkers: Number.POSITIVE_INFINITY, + entryPoint: ENTRY_POINT, + env, + errorHandler: new ErrorHandler() + }) + } + + public async cancelLinting(lintId: string): Promise { + return this._processPool.cancel(lintId) + } + + public async startLinting(input: LintingInput, progress: LintingProgressCb): Promise { + try { + const output = await this._processPool.run( + input.lintId, + input, + (_p: number, { current, total, issues }: LintingProgress) => progress(current, total, issues) + ) + return output + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + if (err instanceof errors.TaskCanceledError) { + throw new LintingCanceledError() + } + if (err instanceof errors.TaskAlreadyStartedError) { + throw new LintingAlreadyStartedError() + } + if (err instanceof errors.TaskExitedUnexpectedlyError) { + throw new LintingExitedUnexpectedlyError(err.wid!, err) + } + throw err + } + } +} diff --git a/packages/nlu-engine/src/engine/linting-process-pool/process-entry-point.ts b/packages/nlu-engine/src/engine/linting-process-pool/process-entry-point.ts new file mode 100644 index 00000000..7361183c --- /dev/null +++ b/packages/nlu-engine/src/engine/linting-process-pool/process-entry-point.ts @@ -0,0 +1,60 @@ +import { makeProcessEntryPoint, TaskDefinition } from '@botpress/worker' +import { DatasetIssue, IssueCode } from '../../linting' +import { initializeTools } from '../initialize-tools' +import { lintingPipeline } from '../linting/linting-pipeline' +import { ErrorHandler } from '../training-process-pool/error-handler' +import { LintingInput, LintingOuput, LintingProgress } from './typings' + +export const ENTRY_POINT = __filename + +const processEntryPoint = makeProcessEntryPoint({ + errorHandler: new ErrorHandler() +}) + +const main = async () => { + const config = JSON.parse(process.env.NLU_CONFIG!) + const processId = process.pid + processEntryPoint.logger.info(`Linting worker successfully started on process with pid ${processId}.`) + + try { + const tools = await initializeTools(config, processEntryPoint.logger) + + processEntryPoint.listenForTask(async (taskDef: TaskDefinition) => { + const { input, progress } = taskDef + + tools.seededLodashProvider.setSeed(input.trainSet.seed) + try { + const progressCallback = (current: number, total: number, issues: DatasetIssue[]) => { + const p = current / total + progress(p, { + total, + current, + issues + }) + } + + const issues = await lintingPipeline( + input.trainSet, + { ...tools, logger: taskDef.logger }, + { + minSpeed: input.minSpeed, + progressCallback + } + ) + return { issues } + } finally { + tools.seededLodashProvider.resetSeed() + } + }) + + await processEntryPoint.initialize() + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + processEntryPoint.logger.error('An unhandled error occured in the process', err) + process.exit(1) + } +} + +if (!processEntryPoint.isMainWorker()) { + void main() +} diff --git a/packages/nlu-engine/src/engine/linting-process-pool/typings.ts b/packages/nlu-engine/src/engine/linting-process-pool/typings.ts new file mode 100644 index 00000000..b5294e1c --- /dev/null +++ b/packages/nlu-engine/src/engine/linting-process-pool/typings.ts @@ -0,0 +1,12 @@ +import { DatasetIssue, IssueCode, IssueComputationSpeed } from '../../linting' +import { TrainInput } from '../../typings' + +export type LintingInput = { + lintId: string + trainSet: TrainInput + minSpeed: IssueComputationSpeed +} + +export type LintingOuput = { issues: DatasetIssue[] } + +export type LintingProgress = { total: number; current: number; issues: DatasetIssue[] } diff --git a/packages/nlu-engine/src/engine/linting/c_000.ts b/packages/nlu-engine/src/engine/linting/c_000.ts new file mode 100644 index 00000000..68d515d3 --- /dev/null +++ b/packages/nlu-engine/src/engine/linting/c_000.ts @@ -0,0 +1,57 @@ +import _ from 'lodash' +import { DatasetIssue, IssueDefinition } from '../../linting' +import { IntentDefinition, TrainInput } from '../../typings' +import { ParsedSlot, parseUtterance } from '../utterance/utterance-parser' +import { computeId } from './id' +import { asCode, IssueLinter } from './typings' + +const code = asCode('C_000') + +export const C_000: IssueDefinition = { + code, + severity: 'critical', + name: 'tokens_tagged_with_unexisting_slot' +} + +const validateIntent = (i: IntentDefinition): DatasetIssue[] => { + const { slots, utterances } = i + + const isInvalid = (s: ParsedSlot) => !slots.map((s) => s.name).includes(s.name) + + return _(utterances) + .map(parseUtterance) + .flatMap(({ parsedSlots, utterance }) => { + const invalidSlots = parsedSlots.filter(isInvalid) + return invalidSlots.map((invalidSlot) => { + const { start, end } = invalidSlot.cleanPosition + const faultyTokens = utterance.substring(start, end) + + const data = { + intent: i.name, + slot: invalidSlot.name, + utterance + } + + const issue: DatasetIssue = { + ...C_000, + id: computeId(code, data), + message: `Tokens "${faultyTokens}" of intent "${i.name}" are tagged with a slot that does not exist: "${invalidSlot.name}"`, + data + } + return issue + }) + }) + .value() +} + +export const C_000_Linter: IssueLinter = { + ...C_000, + speed: 'fastest', + lint: async (ts: TrainInput) => { + let issues: DatasetIssue[] = [] + for (const i of ts.intents) { + issues = [...issues, ...validateIntent(i)] + } + return issues + } +} diff --git a/packages/nlu-engine/src/engine/linting/c_001.ts b/packages/nlu-engine/src/engine/linting/c_001.ts new file mode 100644 index 00000000..5f315be5 --- /dev/null +++ b/packages/nlu-engine/src/engine/linting/c_001.ts @@ -0,0 +1,82 @@ +import _ from 'lodash' +import { + IntentDefinition, + ListEntityDefinition, + PatternEntityDefinition, + SlotDefinition, + TrainInput +} from 'src/typings' +import { SLOT_ANY, SYSTEM_ENTITIES } from '../../constants' +import { isListEntity, isPatternEntity } from '../../guards' +import { DatasetIssue, IssueDefinition } from '../../linting' +import { computeId } from './id' +import { asCode, IssueLinter } from './typings' + +const code = asCode('C_001') + +export const C_001: IssueDefinition = { + code, + severity: 'critical', + name: 'slot_has_nonexistent_entity' +} + +const makeSlotChecker = (listEntities: ListEntityDefinition[], patternEntities: PatternEntityDefinition[]) => ( + intent: IntentDefinition, + slot: SlotDefinition +): DatasetIssue[] => { + const { entities } = slot + + const supportedTypes = [ + ...listEntities.map((e) => e.name), + ...patternEntities.map((p) => p.name), + ...SYSTEM_ENTITIES, + SLOT_ANY + ] + + const issues: DatasetIssue[] = [] + + for (const entity of entities) { + if (!supportedTypes.includes(entity)) { + const data = { + entity, + intent: intent.name, + slot: slot.name + } + + issues.push({ + ...C_001, + id: computeId(code, data), + message: `Slot "${slot.name}" of intent "${intent.name}" referers to a type that does not exist: "${entity}"`, + data + }) + } + } + + return issues +} + +const validateIntent = ( + intent: IntentDefinition, + lists: ListEntityDefinition[], + patterns: PatternEntityDefinition[] +): DatasetIssue[] => { + const checkSlot = makeSlotChecker(lists, patterns) + return _.flatMap(intent.slots, (s) => checkSlot(intent, s)) +} + +export const C_001_Linter: IssueLinter = { + ...C_001, + speed: 'fastest', + lint: async (ts: TrainInput) => { + const { entities } = ts + const lists = entities.filter(isListEntity) + const patterns = entities.filter(isPatternEntity) + + let issues: DatasetIssue[] = [] + + for (const i of ts.intents) { + issues = [...issues, ...validateIntent(i, lists, patterns)] + } + return issues + } +} diff --git a/packages/nlu-engine/src/engine/linting/c_002.ts b/packages/nlu-engine/src/engine/linting/c_002.ts new file mode 100644 index 00000000..48022024 --- /dev/null +++ b/packages/nlu-engine/src/engine/linting/c_002.ts @@ -0,0 +1,37 @@ +import _ from 'lodash' +import { TrainInput } from 'src/typings' +import { DatasetIssue, IssueDefinition } from '../../linting' +import { computeId } from './id' +import { asCode, IssueLinter } from './typings' + +const code = asCode('C_002') + +export const C_002: IssueDefinition = { + code, + severity: 'critical', + name: 'intent_has_no_utterances' +} + +export const C_002_Linter: IssueLinter = { + ...C_002, + speed: 'fastest', + lint: async (ts: TrainInput) => { + const issues: DatasetIssue[] = [] + + for (const i of ts.intents) { + if (!i.utterances.length) { + const data = { + intent: i.name + } + + issues.push({ + ...C_002, + id: computeId(code, data), + message: `Intent "${i.name}" has no utterances.`, + data + }) + } + } + return issues + } +} diff --git a/packages/nlu-engine/src/engine/linting/c_003.ts b/packages/nlu-engine/src/engine/linting/c_003.ts new file mode 100644 index 00000000..9aecb8a0 --- /dev/null +++ b/packages/nlu-engine/src/engine/linting/c_003.ts @@ -0,0 +1,37 @@ +import _ from 'lodash' +import { TrainInput } from 'src/typings' +import { IssueData, IssueDefinition } from '../../linting' +import { Tools } from '../typings' +import { computeId } from './id' +import { asCode, IssueLinter } from './typings' + +const code = asCode('C_003') + +export const C_003: IssueDefinition = { + code, + severity: 'critical', + name: 'dataset_has_unsupported_language' +} + +export const C_003_Linter: IssueLinter = { + ...C_003, + speed: 'fastest', + lint: async (ts: TrainInput, tools: Tools) => { + if (tools.getLanguages().includes(ts.language)) { + return [] + } + + const data: IssueData = { + language: ts.language + } + + return [ + { + ...C_003, + id: computeId(code, data), + message: `language "${ts.language}" is not supported by language server.`, + data + } + ] + } +} diff --git a/packages/nlu-engine/src/engine/linting/criticals.test.ts b/packages/nlu-engine/src/engine/linting/criticals.test.ts new file mode 100644 index 00000000..359dedfa --- /dev/null +++ b/packages/nlu-engine/src/engine/linting/criticals.test.ts @@ -0,0 +1,172 @@ +import { + IntentDefinition, + ListEntityDefinition, + Logger, + PatternEntityDefinition, + SlotDefinition, + TrainInput +} from 'src/typings' +import { makeFakeTools } from '../test-utils/fake-tools' +import { lintingPipeline } from './linting-pipeline' + +const dummyLogger: Partial = { debug: () => {} } + +const validateTrainInput = async (ts: TrainInput) => { + const tools = makeFakeTools(100, ['en']) + const issues = await lintingPipeline( + ts, + { ...tools, logger: dummyLogger as Logger }, + { + minSpeed: 'fastest' + } + ) + + if (issues.length) { + const formatted = issues.map((i) => i.message).join('\n') + throw new Error(formatted) + } +} + +const CITY_ENUM: ListEntityDefinition = { + name: 'city', + type: 'list', + fuzzy: 1, + values: [ + { name: 'paris', synonyms: ['city of paris', 'la ville des lumières'] }, + { name: 'quebec', synonyms: [] } + ] +} + +const TICKET_PATTERN: PatternEntityDefinition = { + name: 'ticket', + type: 'pattern', + case_sensitive: true, + regex: '[A-Z]{3}-[0-9]{3}', // ABC-123 + examples: ['ABC-123'] +} + +const VARIABLE_CITY_FROM: SlotDefinition = { name: 'city-from', entities: ['city'] } + +const VARIABLE_TICKET_PROBLEM: SlotDefinition = { name: 'tick-with-problem', entities: ['ticket'] } + +const FLY_INTENT: IntentDefinition = { + name: 'fly', + contexts: ['fly'], + utterances: ['fly from $city-from to anywhere', 'book a flight'], + slots: [VARIABLE_CITY_FROM] +} + +const PROBLEM_INTENT: IntentDefinition = { + name: 'problem', + contexts: ['problem'], + utterances: ['problem with ticket $tick-with-problem', 'problem with ticket'], + slots: [VARIABLE_TICKET_PROBLEM] +} + +const EMPTY_INTENT: IntentDefinition = { + name: 'empty', + contexts: ['empty'], + utterances: ['hahahahahahaha'], + slots: [] +} + +const BOUILLON_INTENT: IntentDefinition = { + name: 'bouillon', + contexts: [''], + utterances: ['I vote for [subway](restaurant-to-vote)'], + slots: [{ name: 'restaurant-to-vote', entities: ['restaurant'] }] +} + +const LANG = 'en' + +test('validate with correct format should pass', async () => { + // arrange + const trainInput: TrainInput = { + intents: [FLY_INTENT], + entities: [CITY_ENUM], + language: LANG, + seed: 42 + } + + // act && assert + await validateTrainInput(trainInput) +}) + +test('validate intent wihtout utterances should fail', async () => { + // arrange + const withoutUtterances: IntentDefinition = { name: 'will break', contexts: ['A'] } as IntentDefinition + + const trainInput: TrainInput = { + intents: [withoutUtterances], + entities: [CITY_ENUM], + language: LANG, + seed: 42 + } + + // act & assert + await expect(validateTrainInput(trainInput)).rejects.toThrow() +}) + +test('validate enum without values or patterns without regexes should fail', async () => { + // arrange + const incompleteEnum: ListEntityDefinition = { name: 'city' } as ListEntityDefinition + + const incompletePattern: PatternEntityDefinition = { name: 'password' } as PatternEntityDefinition + + const withoutValues: TrainInput = { + intents: [FLY_INTENT], + entities: [incompleteEnum], + language: LANG, + seed: 42 + } + + const withoutRegexes: TrainInput = { + intents: [PROBLEM_INTENT], + entities: [incompletePattern], + language: LANG, + seed: 42 + } + + // act & assert + await expect(validateTrainInput(withoutValues)).rejects.toThrow() + await expect(validateTrainInput(withoutRegexes)).rejects.toThrow() +}) + +test('validate with an unexisting referenced enum should throw', async () => { + // arrange + const trainInput: TrainInput = { + intents: [FLY_INTENT], + entities: [TICKET_PATTERN], + language: LANG, + seed: 42 + } + + // act & assert + await expect(validateTrainInput(trainInput)).rejects.toThrow() +}) + +test('validate with an unexisting referenced pattern should throw', async () => { + // arrange + const trainInput: TrainInput = { + intents: [PROBLEM_INTENT], + entities: [CITY_ENUM], + language: LANG, + seed: 42 + } + + // act & assert + await expect(validateTrainInput(trainInput)).rejects.toThrow() +}) + +test('validate with an unexisting referenced complex should throw', async () => { + // arrange + const trainInput: TrainInput = { + intents: [BOUILLON_INTENT], + entities: [CITY_ENUM], + language: LANG, + seed: 42 + } + + // act & assert + await expect(validateTrainInput(trainInput)).rejects.toThrow() +}) diff --git a/packages/nlu-engine/src/engine/linting/definitions.ts b/packages/nlu-engine/src/engine/linting/definitions.ts new file mode 100644 index 00000000..73ab2d65 --- /dev/null +++ b/packages/nlu-engine/src/engine/linting/definitions.ts @@ -0,0 +1,28 @@ +import { IssueCode, IssueDefinition } from '../../linting' +import { C_000 } from './c_000' +import { C_001 } from './c_001' +import { C_002 } from './c_002' +import { C_003 } from './c_003' +import { E_000 } from './e_000' +import { E_001 } from './e_001' +import { E_002 } from './e_002' +import { E_003 } from './e_003' +import { I_000 } from './i_000' +import { W_000 } from './w_000' + +type IssueDefinitions = { + [C in IssueCode]: IssueDefinition +} + +export const allIssues: IssueDefinitions = { + C_000, + C_001, + C_002, + C_003, + E_000, + E_001, + E_002, + E_003, + W_000, + I_000 +} diff --git a/packages/nlu-engine/src/engine/linting/e_000.test.ts b/packages/nlu-engine/src/engine/linting/e_000.test.ts new file mode 100644 index 00000000..ff951846 --- /dev/null +++ b/packages/nlu-engine/src/engine/linting/e_000.test.ts @@ -0,0 +1,68 @@ +import { TrainInput } from '../../typings' +import { makeFakeTools } from '../test-utils/fake-tools' +import { E_000_Linter } from './e_000' + +const utterances = [ + 'I want to buy fruits', + 'Can I have a [banana](fruit_to_buy) please ?', + 'Please, get me an [apple](fruit_to_buy)', + 'Do you have any [melon](fruit_to_buy) left ?', + 'My personal favorite abstract object is [the concept of cheese](favorite_abstract_object).', + 'I have a strong feeling for the idea of an [apple](favorite_abstract_object).', + 'Do you have item [abc-123](fruit_to_buy) left ?', + 'Do you have item [abc-12c](fruit_to_buy) in stock ?' +] + +const trainSet: TrainInput = { + entities: [ + { + name: 'fruit', + type: 'list', + fuzzy: 1, + values: [ + { name: 'grape', synonyms: [] }, + { name: 'melon', synonyms: ['water-melon'] } + ] + }, + { + name: 'store_item', + type: 'pattern', + case_sensitive: true, + examples: [], + regex: '[a-z]{3}-[0-9]{3}' + } + ], + intents: [ + { + name: 'buy_fruits', + contexts: ['global'], + slots: [ + { name: 'fruit_to_buy', entities: ['fruit', 'store_item'] }, + { name: 'favorite_abstract_object', entities: ['fruit', 'any'] } + ], + utterances + } + ], + language: 'en', + seed: 42 +} + +const fakeTools = makeFakeTools(300, ['en']) + +test('linter for E_000 only flags tokens incorrectly tagged as a slot', async () => { + const lintResults = await E_000_Linter.lint(trainSet, fakeTools) + + expect(lintResults.length).toBe(3) + + expect(lintResults[0].data.utterance.idx).toBe(1) + expect(lintResults[0].data.slot).toBe('fruit_to_buy') + expect(lintResults[0].data.source).toBe('banana') + + expect(lintResults[1].data.utterance.idx).toBe(2) + expect(lintResults[1].data.slot).toBe('fruit_to_buy') + expect(lintResults[1].data.source).toBe('apple') + + expect(lintResults[2].data.utterance.idx).toBe(7) + expect(lintResults[2].data.slot).toBe('fruit_to_buy') + expect(lintResults[2].data.source).toBe('abc-12c') +}) diff --git a/packages/nlu-engine/src/engine/linting/e_000.ts b/packages/nlu-engine/src/engine/linting/e_000.ts new file mode 100644 index 00000000..9c52d80a --- /dev/null +++ b/packages/nlu-engine/src/engine/linting/e_000.ts @@ -0,0 +1,250 @@ +import Bluebird from 'bluebird' +import _ from 'lodash' +import { SLOT_ANY, SYSTEM_ENTITIES } from '../../constants' +import { isListEntity, isPatternEntity } from '../../guards' +import { DatasetIssue, IssueData, IssueDefinition } from '../../linting' +import { + EntityDefinition, + ListEntityDefinition, + PatternEntityDefinition, + SlotDefinition, + TrainInput +} from '../../typings' +import { CustomEntityExtractor } from '../entities/custom-extractor' +import { makeListEntityModel } from '../entities/list-entity-model' +import { EntityExtractionResult, ListEntity, ListEntityModel, PatternEntity, Tools } from '../typings' +import Utterance, { buildUtteranceBatch, UtteranceSlot, UtteranceToStringOptions } from '../utterance/utterance' +import { computeId } from './id' +import { asCode, IssueLinter } from './typings' + +const code = asCode('E_000') + +export const E_000: IssueDefinition = { + code, + severity: 'error', + name: 'tokens_tagged_with_slot_has_incorrect_type' +} + +type ResolvedSlotDef = { + name: string + isAny: boolean + listEntities: ListEntityModel[] + patternEntities: PatternEntity[] + systemEntities: string[] +} + +type VerificationUnit = { + intent: string + utteranceIdx: number + rawUtterance: string + utterance: Utterance + slotDef: ResolvedSlotDef + slot: UtteranceSlot +} + +const makeIssueFromData = (data: IssueData): DatasetIssue => ({ + ...E_000, + id: computeId(code, data), + message: `Tokens "${data.source}" tagged with slot "${data.slot}" do not match expected entities.`, + data +}) + +const unitToIssue = ({ intent, utterance, utteranceIdx, slot, slotDef }: VerificationUnit) => + makeIssueFromData({ + intent, + utterance: { idx: utteranceIdx, clean: utterance.toString() }, + charPos: { + clean: { start: slot.startPos, end: slot.endPos } + }, + slot: slotDef.name, + entities: mapResolvedToSlotDef(slotDef).entities, + source: slot.source + }) + +const splitEntities = (entitieDefs: EntityDefinition[]) => { + const listEntities = entitieDefs.filter(isListEntity) + const patternEntities = entitieDefs.filter(isPatternEntity) + return { + listEntities, + patternEntities + } +} + +const truncateZip = (pair: [A | undefined, B | undefined]): pair is [A, B] => { + return pair[0] !== undefined && pair[1] !== undefined +} + +const isDefined = (x: T | undefined): x is T => { + return x !== undefined +} + +type Unpack

= P extends Promise ? X : P + +const mapListEntity = (lang: string, tools: Tools, list: ListEntityDefinition): Promise => { + const { name, values, fuzzy, sensitive } = list + + const synonyms = _(values) + .map(({ name, synonyms }) => <[string, string[]]>[name, synonyms]) + .fromPairs() + .value() + + const mapped: ListEntity = { + name, + fuzzyTolerance: fuzzy, + sensitive: !!sensitive, + synonyms + } + + return makeListEntityModel(mapped, lang, tools) +} + +const mapPatternEntity = (pattern: PatternEntityDefinition): PatternEntity => { + const { name, regex, case_sensitive, examples, sensitive } = pattern + return { + name, + examples, + matchCase: case_sensitive, + pattern: regex, + sensitive: !!sensitive + } +} + +const mapSlotDefToResolved = ( + listModels: ListEntityModel[], + patternModels: PatternEntity[], + { name, entities }: SlotDefinition +): ResolvedSlotDef => { + return { + name, + isAny: entities.includes(SLOT_ANY), + listEntities: entities.map((e) => listModels.find((lm) => lm.entityName === e)).filter(isDefined), + patternEntities: entities.map((e) => patternModels.find((pm) => pm.name === e)).filter(isDefined), + systemEntities: entities.map((e) => SYSTEM_ENTITIES.find((s) => s === e)).filter(isDefined) + } +} + +const mapResolvedToSlotDef = ({ + isAny, + listEntities, + name, + patternEntities, + systemEntities +}: ResolvedSlotDef): SlotDefinition => { + const entities: string[] = [] + entities.push(...listEntities.map((e) => e.entityName)) + entities.push(...patternEntities.map((e) => e.name)) + entities.push(...systemEntities) + isAny && entities.push('any') + return { + name, + entities + } +} + +const resolveEntities = async (ts: TrainInput, tools: Tools) => { + const { intents, entities, language, seed } = ts + const { listEntities, patternEntities } = splitEntities(entities) + + const listModels = await Bluebird.map(listEntities, (e) => mapListEntity(language, tools, e)) + const patternModels = patternEntities.map(mapPatternEntity) + + const resolvedIntents = intents.map(({ slots, ...i }) => ({ + ...i, + slots: slots.map((s) => mapSlotDefToResolved(listModels, patternModels, s)) + })) + + return { + entities, + language, + seed, + intents: resolvedIntents + } +} + +const flattenDataset = async ( + ts: Unpack>, + tools: Tools +): Promise => { + const flatIntents = ts.intents + const flatRawUtterances = _.flatMap(flatIntents, ({ utterances, ...x }) => + utterances.map((u, i) => ({ rawUtterance: u, intent: x, utteranceIdx: i })) + ) + + const rawUtterances: string[] = flatRawUtterances.map(({ rawUtterance }) => rawUtterance) + const utteranceBatch = await buildUtteranceBatch(rawUtterances, ts.language, tools, [], { + vectorize: false, // no need for vectors to go faster + preprocess: false // all characters must be kept + }) + + const flatUtterances = _.zip(flatRawUtterances, utteranceBatch) + .filter(truncateZip) + .map(([x, u]) => ({ ...x, utterance: u })) + + const flatSlotDefinitions = _.flatMap(flatUtterances, ({ intent, ...x }) => + intent.slots.map((s) => ({ intent: intent.name, slotDef: s, ...x })) + ) + + const flatSlotOccurences = _.flatMap(flatSlotDefinitions, ({ utterance, ...x }) => + utterance.slots.map((s) => ({ slot: s, utterance, ...x })) + ) + + return flatSlotOccurences.filter((x) => x.slot.name === x.slotDef.name) +} + +const matchesCustom = (customEntityExtractor: CustomEntityExtractor) => (unit: VerificationUnit) => { + const { startTokenIdx, endTokenIdx } = unit.slot + const slotTokens = unit.utterance.tokens.filter(({ index }) => index >= startTokenIdx && index <= endTokenIdx) + + const toString = (opt?: Partial) => Utterance.toString(slotTokens, opt) + const entityUtterance = { tokens: slotTokens, toString } + const listMatches = customEntityExtractor.extractListEntities(entityUtterance, unit.slotDef.listEntities) + if (listMatches.length) { + return true + } + + const patternMatches = customEntityExtractor.extractPatternEntities(entityUtterance, unit.slotDef.patternEntities) + if (patternMatches.length) { + return true + } + + return false +} + +const entityMatchesSlot = (u: VerificationUnit) => (e: EntityExtractionResult) => + e.start === u.slot.startPos && e.end === u.slot.endPos && u.slotDef.systemEntities.includes(e.type) + +export const E_000_Linter: IssueLinter = { + ...E_000, + speed: 'fastest', + lint: async (ts: TrainInput, tools: Tools) => { + const resolvedSet = await resolveEntities(ts, tools) + const flatDataset = await flattenDataset(resolvedSet, tools) + + const { systemEntityExtractor } = tools + const customEntityExtractor = new CustomEntityExtractor() + + let potentiallyInvalidSlots = flatDataset + potentiallyInvalidSlots = _.reject(potentiallyInvalidSlots, (u) => u.slotDef.isAny) + potentiallyInvalidSlots = _.reject(potentiallyInvalidSlots, matchesCustom(customEntityExtractor)) + + const [withSystemEntities, withoutSystemEntities] = _.partition( + potentiallyInvalidSlots, + (s) => s.slotDef.systemEntities.length + ) + + const extractedSystemEntities = await systemEntityExtractor.extractMultiple( + withSystemEntities.map((u) => u.utterance.toString()), // use whole utterance here as duckling might be influenced by token postion in utterances and is fast anyway + ts.language, + () => {}, + true + ) + + let invalidSlots: VerificationUnit[] = _.zip(extractedSystemEntities, withSystemEntities) + .filter(truncateZip) + .map(([e, u]) => ({ ...u, extractedSystemEntities: e })) + .filter((u) => !u.extractedSystemEntities.some(entityMatchesSlot(u))) + invalidSlots = [...invalidSlots, ...withoutSystemEntities] + + return invalidSlots.map(unitToIssue) + } +} diff --git a/packages/nlu-engine/src/engine/linting/e_001.ts b/packages/nlu-engine/src/engine/linting/e_001.ts new file mode 100644 index 00000000..0226aa3a --- /dev/null +++ b/packages/nlu-engine/src/engine/linting/e_001.ts @@ -0,0 +1,12 @@ +import { IssueDefinition } from '../../linting' +import { asCode } from './typings' + +const code = asCode('E_001') + +export const E_001: IssueDefinition = { + code, + severity: 'error', + name: 'slot_has_nonexistent_entity' +} + +// no linter implemented yet diff --git a/packages/nlu-engine/src/engine/linting/e_002.ts b/packages/nlu-engine/src/engine/linting/e_002.ts new file mode 100644 index 00000000..55d90b06 --- /dev/null +++ b/packages/nlu-engine/src/engine/linting/e_002.ts @@ -0,0 +1,12 @@ +import { IssueDefinition } from '../../linting' +import { asCode } from './typings' + +const code = asCode('E_002') + +export const E_002: IssueDefinition = { + code, + severity: 'error', + name: 'duplicated_utterances' +} + +// no linter implemented yet diff --git a/packages/nlu-engine/src/engine/linting/e_003.ts b/packages/nlu-engine/src/engine/linting/e_003.ts new file mode 100644 index 00000000..8c1d5212 --- /dev/null +++ b/packages/nlu-engine/src/engine/linting/e_003.ts @@ -0,0 +1,12 @@ +import { IssueDefinition } from '../../linting' +import { asCode } from './typings' + +const code = asCode('E_003') + +export const E_003: IssueDefinition = { + code, + severity: 'error', + name: 'whole_utterance_is_tagged_as_a_slot' +} + +// no linter implemented yet diff --git a/packages/nlu-engine/src/engine/linting/i_000.test.ts b/packages/nlu-engine/src/engine/linting/i_000.test.ts new file mode 100644 index 00000000..f58f2801 --- /dev/null +++ b/packages/nlu-engine/src/engine/linting/i_000.test.ts @@ -0,0 +1,64 @@ +import _ from 'lodash' +import { TrainInput } from '../../typings' +import { makeFakeTools } from '../test-utils/fake-tools' +import { I_000_Linter } from './i_000' + +type TestSample = { + utt: string + spans: { start: number; end: number }[] +} + +const samples: TestSample[] = [ + { utt: 'I want to buy [grapes](fruit_to_buy)', spans: [] }, + { utt: 'I want to buy [grapes](fruit_to_buy)', spans: [{ start: 7, end: 8 }] }, + { utt: 'I want to buy [grapes](fruit_to_buy)', spans: [{ start: 10, end: 12 }] }, + { utt: ' I want to buy [grapes](fruit_to_buy)', spans: [{ start: 0, end: 2 }] }, + { utt: 'I want to buy [grapes](fruit_to_buy) ', spans: [{ start: 36, end: 38 }] }, + { + utt: ' I want to buy [grapes](fruit_to_buy)', + spans: [ + { start: 0, end: 3 }, + { start: 10, end: 13 } + ] + } +] + +const trainSet: TrainInput = { + entities: [ + { + name: 'fruit', + type: 'list', + fuzzy: 1, + values: [ + { name: 'grape', synonyms: ['grapes'] }, + { name: 'melon', synonyms: ['water-melon'] } + ] + } + ], + intents: [ + { + name: 'buy_fruits', + contexts: ['global'], + slots: [{ name: 'fruit_to_buy', entities: ['fruit'] }], + utterances: samples.map((s) => s.utt) + } + ], + language: 'en', + seed: 42 +} + +const fakeTools = makeFakeTools(300, ['en']) + +test('linter for I_000 flags all reduntant spaces', async () => { + const lintResults = await I_000_Linter.lint(trainSet, fakeTools) + + for (let i = 0; i < samples.length; i++) { + const sample = samples[i] + const issues = lintResults.filter((r) => r.data.utterance.idx === i) + + for (const [issue, span] of _.zip(issues, sample.spans)) { + expect(issue?.data.charPos.raw.start).toBe(span?.start) + expect(issue?.data.charPos.raw.end).toBe(span?.end) + } + } +}) diff --git a/packages/nlu-engine/src/engine/linting/i_000.ts b/packages/nlu-engine/src/engine/linting/i_000.ts new file mode 100644 index 00000000..d8a1e5b7 --- /dev/null +++ b/packages/nlu-engine/src/engine/linting/i_000.ts @@ -0,0 +1,108 @@ +import _ from 'lodash' +import { TrainInput } from 'src/typings' +import { DatasetIssue, IssueData, IssueDefinition, Span } from '../../linting' +import { computeId } from './id' +import { asCode, IssueLinter } from './typings' + +const LEADING_SPACES = /^ +/ +const TRAILING_SPACES = / +$/ +const CONSECUTIVE_SPACES = /\s{2,}/g + +const code = asCode('I_000') + +type VerificationUnit = { + intent: string + utteranceIdx: number + utterance: string +} + +export const I_000: IssueDefinition = { + code, + severity: 'info', + name: 'dupplicated_or_untrimed_spaces' +} + +const flattenDataset = (ts: TrainInput): VerificationUnit[] => { + return ts.intents.flatMap((intent) => + intent.utterances.map((u, i) => ({ + intent: intent.name, + utterance: u, + utteranceIdx: i + })) + ) +} + +const makeIssue = (x: VerificationUnit & Span): DatasetIssue => { + const { intent, utteranceIdx, utterance, start, end } = x + const data: IssueData = { + intent, + utterance: { idx: utteranceIdx, raw: utterance }, + charPos: { raw: { start, end } } + } + return { + ...I_000, + id: computeId(code, data), + data, + message: 'All leading, trailing and consecutives spaces are removed by the NLU engine.' + } +} + +const getSpan = (regexpMatch: RegExpExecArray) => ({ + start: regexpMatch.index, + end: regexpMatch.index + regexpMatch[0].length +}) + +const checkUtterance = (unit: VerificationUnit): DatasetIssue[] => { + const issues: DatasetIssue[] = [] + const leadingSpacesMatch = LEADING_SPACES.exec(unit.utterance) + if (leadingSpacesMatch) { + const span = getSpan(leadingSpacesMatch) + issues.push( + makeIssue({ + ...unit, + ...span + }) + ) + } + + const trailingSpaceMatch = TRAILING_SPACES.exec(unit.utterance) + if (trailingSpaceMatch) { + const span = getSpan(trailingSpaceMatch) + issues.push( + makeIssue({ + ...unit, + ...span + }) + ) + } + + let dupplicatedSpacesMatch = CONSECUTIVE_SPACES.exec(unit.utterance) + while (dupplicatedSpacesMatch) { + const span = getSpan(dupplicatedSpacesMatch) + + if (span.start !== 0 && span.end !== unit.utterance.length) { + issues.push( + makeIssue({ + ...unit, + start: span.start + 1, + end: span.end + }) + ) + } + + dupplicatedSpacesMatch = CONSECUTIVE_SPACES.exec(unit.utterance) + } + + return issues +} + +export const I_000_Linter: IssueLinter = { + ...I_000, + speed: 'fastest', + lint: async (ts: TrainInput) => { + const units = flattenDataset(ts) + const issues = units.flatMap(checkUtterance) + + return issues + } +} diff --git a/packages/nlu-engine/src/engine/linting/id.ts b/packages/nlu-engine/src/engine/linting/id.ts new file mode 100644 index 00000000..e2bf1d5d --- /dev/null +++ b/packages/nlu-engine/src/engine/linting/id.ts @@ -0,0 +1,8 @@ +import { IssueCode, IssueData } from '../../linting' +import { halfmd5 } from '../../utils/half-md5' + +export const computeId = (code: C, data: IssueData): string => { + const definitionId = code + const instanceId = halfmd5(JSON.stringify(data)) + return `${definitionId}.${instanceId}` +} diff --git a/packages/nlu-engine/src/engine/linting/linting-pipeline.ts b/packages/nlu-engine/src/engine/linting/linting-pipeline.ts new file mode 100644 index 00000000..6ff7c178 --- /dev/null +++ b/packages/nlu-engine/src/engine/linting/linting-pipeline.ts @@ -0,0 +1,56 @@ +import Bluebird from 'bluebird' +import _ from 'lodash' +import { DatasetIssue, IssueCode } from '../../linting' +import { Logger, TrainInput } from '../../typings' +import { Tools } from '../typings' +import { C_000_Linter } from './c_000' +import { C_001_Linter } from './c_001' +import { C_002_Linter } from './c_002' +import { C_003_Linter } from './c_003' +import { E_000_Linter } from './e_000' +import { I_000_Linter } from './i_000' +import * as severity from './severity' +import * as speed from './speed' +import { IssueLinter, LintingOptions } from './typings' + +const allLinters: IssueLinter[] = [ + C_000_Linter, + C_001_Linter, + C_002_Linter, + C_003_Linter, + E_000_Linter, + I_000_Linter +] + +const DEFAULT_OPTS: LintingOptions = { + minSpeed: 'slow', + minSeverity: 'info', + progressCallback: () => {} +} + +export const lintingPipeline = async ( + ts: TrainInput, + tools: Tools & { logger: Logger }, + opts: Partial = {} +) => { + const options = { ...DEFAULT_OPTS, ...opts } + + let idx = 0 + + let targetLinters = allLinters + targetLinters = targetLinters.filter((c) => speed.is(c.speed).asFastAs(options.minSpeed)) + targetLinters = targetLinters.filter((c) => severity.is(c.severity).asSevereAs(options.minSeverity)) + + // TODO: replace this by Promise.all + const results = await Bluebird.mapSeries(targetLinters, async (linter) => { + tools.logger.debug(`Running linter "${linter.name}" started.`) + const t0 = Date.now() + const issues = await linter.lint(ts, tools) + tools.logger.debug(`Running linter "${linter.name}" done (${Date.now() - t0} ms).`) + await options.progressCallback(++idx, targetLinters.length, issues) + return issues + }) + + const allIssues: DatasetIssue[] = _.flatten(results) + return allIssues +} diff --git a/packages/nlu-engine/src/engine/linting/severity.ts b/packages/nlu-engine/src/engine/linting/severity.ts new file mode 100644 index 00000000..7df81aa5 --- /dev/null +++ b/packages/nlu-engine/src/engine/linting/severity.ts @@ -0,0 +1,18 @@ +import { IssueCode, IssueSeverity } from 'src/linting' + +const severities: Record, number> = { + critical: 0, + error: 1, + warning: 2, + info: 3 +} + +export const toInt = (severity: IssueSeverity): number => { + return severities[severity] +} + +export const is = (severity1: IssueSeverity) => ({ + asSevereAs: (severity2: IssueSeverity) => { + return toInt(severity1) <= toInt(severity2) + } +}) diff --git a/packages/nlu-engine/src/engine/linting/speed.ts b/packages/nlu-engine/src/engine/linting/speed.ts new file mode 100644 index 00000000..762c014d --- /dev/null +++ b/packages/nlu-engine/src/engine/linting/speed.ts @@ -0,0 +1,18 @@ +import { IssueComputationSpeed } from '../../linting' + +const speeds: Record = { + fastest: 0, + fast: 1, + slow: 2, + slowest: 3 +} + +export const toInt = (speed: IssueComputationSpeed): number => { + return speeds[speed] +} + +export const is = (speed1: IssueComputationSpeed) => ({ + asFastAs: (speed2: IssueComputationSpeed) => { + return toInt(speed1) <= toInt(speed2) + } +}) diff --git a/packages/nlu-engine/src/engine/linting/typings.ts b/packages/nlu-engine/src/engine/linting/typings.ts new file mode 100644 index 00000000..6fc15916 --- /dev/null +++ b/packages/nlu-engine/src/engine/linting/typings.ts @@ -0,0 +1,17 @@ +import _ from 'lodash' +import { DatasetIssue, IssueCode, IssueComputationSpeed, IssueDefinition, IssueSeverity } from '../../linting' +import { LintingProgressCb, TrainInput } from '../../typings' +import { Tools } from '../typings' + +export const asCode = (c: C): C => c + +export type IssueLinter = IssueDefinition & { + speed: IssueComputationSpeed + lint: (ts: TrainInput, tools: Tools) => Promise[]> +} + +export type LintingOptions = { + minSpeed: IssueComputationSpeed + minSeverity: IssueSeverity + progressCallback: LintingProgressCb +} diff --git a/packages/nlu-engine/src/engine/linting/w_000.ts b/packages/nlu-engine/src/engine/linting/w_000.ts new file mode 100644 index 00000000..b486e909 --- /dev/null +++ b/packages/nlu-engine/src/engine/linting/w_000.ts @@ -0,0 +1,12 @@ +import { IssueDefinition } from '../../linting' +import { asCode } from './typings' + +const code = asCode('W_000') + +export const W_000: IssueDefinition = { + code, + severity: 'warning', + name: 'intents_are_overlapping' +} + +// no linter implemented yet diff --git a/packages/nlu-engine/src/engine/model-serializer.ts b/packages/nlu-engine/src/engine/model-serializer.ts index 3206fdde..d7a6309e 100644 --- a/packages/nlu-engine/src/engine/model-serializer.ts +++ b/packages/nlu-engine/src/engine/model-serializer.ts @@ -1,44 +1,205 @@ +import * as ptb from '@bpinternal/ptb-schema' import _ from 'lodash' -import { Model } from '../typings' +import { ModelOf } from 'src/component' +import { Model, SlotDefinition } from '../typings' +import { OOSIntentClassifier } from './intents/oos-intent-classfier' +import { SvmIntentClassifier } from './intents/svm-intent-classifier' +import { SlotTagger } from './slots/slot-tagger' +import { ListEntityModel, Intent, PatternEntity, SerializedKmeansResult, TFIDF } from './typings' -import { TrainInput, TrainOutput } from './training-pipeline' +export type PredictableModelData = { + intents: Intent[] + languageCode: string + pattern_entities: PatternEntity[] + contexts: string[] + list_entities: ListEntityModel[] + tfidf: TFIDF + vocab: string[] + kmeans: SerializedKmeansResult | undefined + ctx_model: ModelOf + intent_model_by_ctx: _.Dictionary> + slots_model_by_intent: _.Dictionary> +} export type PredictableModel = Omit & { - data: { - input: TrainInput - output: TrainOutput + data: PredictableModelData +} + +const PTBSlotDef = new ptb.PTBMessage('SlotDef', { + name: { type: 'string', id: 1, rule: 'required' }, + entities: { type: 'string', id: 2, rule: 'repeated' } +}) + +const PTBIntentDef = new ptb.PTBMessage('IntentDef', { + name: { type: 'string', id: 1, rule: 'required' }, + contexts: { type: 'string', id: 2, rule: 'repeated' }, + slot_definitions: { type: PTBSlotDef, id: 3, rule: 'repeated' }, + utterances: { type: 'string', id: 4, rule: 'repeated' } +}) + +const PTBPatternEntityDef = new ptb.PTBMessage('PatternEntityDef', { + name: { type: 'string', id: 1, rule: 'required' }, + pattern: { type: 'string', id: 2, rule: 'required' }, + examples: { type: 'string', id: 3, rule: 'repeated' }, + matchCase: { type: 'bool', id: 4, rule: 'required' }, + sensitive: { type: 'bool', id: 5, rule: 'required' } +}) + +const PTBSynonymValue = new ptb.PTBMessage('ListEntitySynonymValue', { + tokens: { type: 'string', id: 1, rule: 'repeated' } +}) + +const PTBSynonym = new ptb.PTBMessage('ListEntitySynonym', { + values: { type: PTBSynonymValue, id: 1, rule: 'repeated' } +}) + +const PTBListEntityModel = new ptb.PTBMessage('ListEntityModel', { + type: { type: 'string', id: 1, rule: 'required' }, + id: { type: 'string', id: 2, rule: 'required' }, + entityName: { type: 'string', id: 3, rule: 'required' }, + fuzzyTolerance: { type: 'double', id: 4, rule: 'required' }, + sensitive: { type: 'bool', id: 5, rule: 'required' }, + mappingsTokens: { keyType: 'string', type: PTBSynonym, id: 6 } +}) + +const PTBCentroid = new ptb.PTBMessage('KmeanCentroid', { + centroid: { type: 'double', id: 1, rule: 'repeated' }, + error: { type: 'double', id: 2, rule: 'required' }, + size: { type: 'int32', id: 3, rule: 'required' } +}) + +const PTBKmeansResult = new ptb.PTBMessage('KmeansResult', { + clusters: { type: 'int32', id: 1, rule: 'repeated' }, + centroids: { type: PTBCentroid, id: 2, rule: 'repeated' }, + iterations: { type: 'int32', id: 3, rule: 'required' } +}) + +let model_data_idx = 0 +const PTBPredictableModelData = new ptb.PTBMessage('PredictableModelData', { + intents: { type: PTBIntentDef, id: model_data_idx++, rule: 'repeated' }, + languageCode: { type: 'string', id: model_data_idx++, rule: 'required' }, + pattern_entities: { type: PTBPatternEntityDef, id: model_data_idx++, rule: 'repeated' }, + contexts: { type: 'string', id: model_data_idx++, rule: 'repeated' }, + list_entities: { type: PTBListEntityModel, id: model_data_idx++, rule: 'repeated' }, + tfidf: { keyType: 'string', type: 'double', id: model_data_idx++ }, + vocab: { type: 'string', id: model_data_idx++, rule: 'repeated' }, + kmeans: { type: PTBKmeansResult, id: model_data_idx++, rule: 'optional' }, + ctx_model: { type: SvmIntentClassifier.modelType, id: model_data_idx++, rule: 'required' }, + intent_model_by_ctx: { keyType: 'string', type: OOSIntentClassifier.modelType, id: model_data_idx++ }, + slots_model_by_intent: { keyType: 'string', type: SlotTagger.modelType, id: model_data_idx++ } +}) + +const encodeListEntity = (list_entity: ListEntityModel): ptb.Infer => { + const { mappingsTokens: encodedMappingTokens, ...others } = list_entity + const decodedMappingTokens = _.mapValues(encodedMappingTokens, (syn) => ({ + values: syn.map((synValue) => ({ tokens: synValue })) + })) + return { + ...others, + mappingsTokens: decodedMappingTokens + } +} + +const decodeListEntity = (list_entity: ptb.Infer): ListEntityModel => { + const { mappingsTokens: decodedMappingTokens, ...others } = list_entity + const encodedMappingTokens = _.mapValues(decodedMappingTokens, ({ values }) => + values ? values.map(({ tokens }) => tokens ?? []) : [] + ) + return { + ...others, + type: 'custom.list', + mappingsTokens: encodedMappingTokens + } +} + +const decodeSlot = (slot: ptb.Infer): SlotDefinition => { + const { name, entities } = slot + return { + name, + entities: entities ?? [] } } -export function serializeModel(model: PredictableModel): Model { - const { id, startedAt, finishedAt, data } = model +const decodeIntent = (intent: ptb.Infer): Intent => { + const { name, slot_definitions, contexts, utterances } = intent + return { + name, + slot_definitions: slot_definitions ? slot_definitions.map(decodeSlot) : [], + contexts: contexts ?? [], + utterances: utterances ?? [] + } +} + +const decodePattern = (pattern: ptb.Infer): PatternEntity => { + const { examples, ...others } = pattern + return { + ...others, + examples: examples ?? [] + } +} + +const decodeKmeans = (kmeans: ptb.Infer): SerializedKmeansResult => { + const { iterations, centroids, clusters } = kmeans + return { + iterations, + clusters: clusters ?? [], + centroids: centroids + ? centroids.map(({ centroid, error, size }) => ({ centroid: centroid ?? [], error, size })) + : [] + } +} + +export const serializeModel = (model: PredictableModel): Model => { + const { id, startedAt, finishedAt, data: predictableData } = model + + const { list_entities, ...others } = predictableData + + const serializedData = Buffer.from( + PTBPredictableModelData.encode({ ...others, list_entities: list_entities.map(encodeListEntity) }) + ) const serialized: Model = { id, startedAt, finishedAt, - data: { - input: '', - output: '' - } + data: serializedData } - - serialized.data.input = JSON.stringify(data.input) - serialized.data.output = JSON.stringify(data.output) - return serialized } -export function deserializeModel(serialized: Model): PredictableModel { - const { id, startedAt, finishedAt, data } = serialized +export const deserializeModel = (serialized: Model): PredictableModel => { + const { id, startedAt, finishedAt, data: serializedData } = serialized + + const { + list_entities, + intents, + languageCode, + pattern_entities, + contexts, + tfidf, + vocab, + kmeans, + ctx_model, + intent_model_by_ctx, + slots_model_by_intent + } = PTBPredictableModelData.decode(serializedData) const model: PredictableModel = { id, startedAt, finishedAt, data: { - input: JSON.parse(data.input), - output: JSON.parse(data.output) + list_entities: list_entities ? list_entities.map(decodeListEntity) : [], + intents: intents ? intents.map(decodeIntent) : [], + languageCode, + pattern_entities: pattern_entities ? pattern_entities.map(decodePattern) : [], + contexts: contexts ?? [], + tfidf, + vocab: vocab ?? [], + kmeans: kmeans && decodeKmeans(kmeans), + ctx_model, + intent_model_by_ctx, + slots_model_by_intent } } return model diff --git a/packages/nlu-engine/src/engine/predict-pipeline.ts b/packages/nlu-engine/src/engine/predict-pipeline.ts index afb41127..a4773575 100644 --- a/packages/nlu-engine/src/engine/predict-pipeline.ts +++ b/packages/nlu-engine/src/engine/predict-pipeline.ts @@ -7,14 +7,14 @@ import { IntentPrediction as StanIntentPrediction, PredictOutput } from 'src/typings' -import { MLToolkit } from '../ml/typings' +import * as MLToolkit from '../ml/toolkit' import { CustomEntityExtractor } from './entities/custom-extractor' import { IntentPrediction, IntentPredictions, NoneableIntentPredictions } from './intents/intent-classifier' import { OOSIntentClassifier } from './intents/oos-intent-classfier' import { SvmIntentClassifier } from './intents/svm-intent-classifier' import { spellCheck as spellCheckUtterance } from './language/spell-check' -import SlotTagger from './slots/slot-tagger' +import { SlotTagger } from './slots/slot-tagger' import { EntityExtractionResult, ExtractedEntity, @@ -27,7 +27,7 @@ import { } from './typings' import Utterance, { buildUtteranceBatch, preprocessRawUtterance, UtteranceEntity } from './utterance/utterance' -export interface Predictors { +export type Predictors = { lang: string tfidf: TFIDF vocab: string[] @@ -41,12 +41,12 @@ export interface Predictors { kmeans?: MLToolkit.KMeans.KmeansResult } -export interface PredictInput { +export type PredictInput = { language: string text: string } -interface InitialStep { +type InitialStep = { rawText: string languageCode: string } diff --git a/packages/nlu-engine/src/engine/slots/schemas.test.ts b/packages/nlu-engine/src/engine/slots/schemas.test.ts deleted file mode 100644 index b71f6eef..00000000 --- a/packages/nlu-engine/src/engine/slots/schemas.test.ts +++ /dev/null @@ -1,55 +0,0 @@ -import Bluebird from 'bluebird' -import Joi, { validate } from 'joi' -import { SlotDefinition } from '../typings' -import { SlotDefinitionSchema } from './schemas' - -const expectValidates = async (model: any, schema: Joi.ObjectSchema) => { - await expect(validate(model, schema)).resolves.not.toThrow() -} - -const expectThrows = async (model: any, schema: Joi.ObjectSchema) => { - await expect(validate(model, schema)).rejects.toThrow() -} - -test('slot model schema', async () => { - const shouldPass: SlotDefinition[] = [ - { - entities: ['heyhey', 'entity'], - name: 'someName' - }, - { - entities: [], - name: 'yoyoyo' - }, - { - entities: [], - name: 'yoyoyo', - extraKey: 42 - } - ] - - const shouldFail: any[] = [ - undefined, - null, - {}, - { - entities: undefined, - name: 'someName' - }, - { - entities: ['heyhey', 'entity'], - name: '' - }, - { - entities: ['heyhey', 'entity'], - name: undefined - }, - { - entities: [''], // no empty entity name - name: 'someName' - } - ] - - await Bluebird.map(shouldPass, (m) => expectValidates(m, SlotDefinitionSchema.required())) - await Bluebird.map(shouldFail, (m) => expectThrows(m, SlotDefinitionSchema.required())) -}) diff --git a/packages/nlu-engine/src/engine/slots/schemas.ts b/packages/nlu-engine/src/engine/slots/schemas.ts deleted file mode 100644 index 430cd817..00000000 --- a/packages/nlu-engine/src/engine/slots/schemas.ts +++ /dev/null @@ -1,8 +0,0 @@ -import Joi from 'joi' -import { SlotDefinition } from '../typings' - -const keys: Record = { - name: Joi.string().required(), - entities: Joi.array().items(Joi.string()).required() -} -export const SlotDefinitionSchema = Joi.object().keys(keys).unknown(true) // extra keys are accepted diff --git a/packages/nlu-engine/src/engine/slots/slot-featurizer.test.ts b/packages/nlu-engine/src/engine/slots/slot-featurizer.test.ts index c6547363..83f8609a 100644 --- a/packages/nlu-engine/src/engine/slots/slot-featurizer.test.ts +++ b/packages/nlu-engine/src/engine/slots/slot-featurizer.test.ts @@ -1,8 +1,16 @@ import { SPACE } from '../tools/token-utils' -import Utterance, { UtteranceEntity, UtteranceToken } from '../utterance/utterance' +import Utterance, { UtteranceEntity, UtteranceSlot, UtteranceToken } from '../utterance/utterance' import * as featurizer from './slot-featurizer' +const asToken = (x: Partial<{ value: string }>): UtteranceToken => { + return x as UtteranceToken +} + +const asSlot = (x: Partial): UtteranceSlot => { + return x as UtteranceSlot +} + describe('CRF Featurizer 2', () => { test('featToCRFsuiteAttr', () => { const feats = [ @@ -87,7 +95,9 @@ describe('CRF Featurizer 2', () => { { value: SPACE, isWord: false, slots: ['hello'] }, { value: SPACE, isWord: false, entities: ['hello'] }, { value: SPACE, isWord: false } - ].map((tok) => Object.defineProperty(tok, 'toString', { value: jest.fn().mockReturnValue(tok.value) })) + ] + .map((tok) => Object.defineProperty(tok, 'toString', { value: jest.fn().mockReturnValue(tok.value) })) + .map(asToken) const feat = featurizer.getWordFeat(tokens[0], true) const feat1 = featurizer.getWordFeat(tokens[0], false) @@ -131,17 +141,18 @@ describe('CRF Featurizer 2', () => { }) test('getInVocabFeat', () => { - const tokens = [{ value: 'fly' }, { value: SPACE }, { value: 'paul' }].map((tok) => - Object.defineProperty(tok, 'toString', { value: () => tok.value, enumerable: true }) - ) + const tokens = [{ value: 'fly' }, { value: SPACE }, { value: 'paul' }] + .map((tok) => Object.defineProperty(tok, 'toString', { value: () => tok.value, enumerable: true })) + .map(asToken) const vocab = ['fly'] - expect(featurizer.getInVocabFeat({ ...tokens[0], slots: ['lol.A.W'] }, vocab).value).toBeTruthy() + const slots = [asSlot({ name: 'lol' })] + expect(featurizer.getInVocabFeat({ ...tokens[0], slots }, vocab).value).toBeTruthy() expect(featurizer.getInVocabFeat(tokens[0], vocab).value).toBeTruthy() - expect(featurizer.getInVocabFeat({ ...tokens[1], slots: ['lol.A.W'] }, vocab).value).toBeFalsy() + expect(featurizer.getInVocabFeat({ ...tokens[1], slots }, vocab).value).toBeFalsy() expect(featurizer.getInVocabFeat(tokens[1], vocab).value).toBeFalsy() - expect(featurizer.getInVocabFeat({ ...tokens[2], slots: ['lol.A.W'] }, vocab).value).toBeFalsy() + expect(featurizer.getInVocabFeat({ ...tokens[2], slots }, vocab).value).toBeFalsy() expect(featurizer.getInVocabFeat(tokens[2], vocab).value).toBeFalsy() }) diff --git a/packages/nlu-engine/src/engine/slots/slot-featurizer.ts b/packages/nlu-engine/src/engine/slots/slot-featurizer.ts index ab064059..8297837f 100644 --- a/packages/nlu-engine/src/engine/slots/slot-featurizer.ts +++ b/packages/nlu-engine/src/engine/slots/slot-featurizer.ts @@ -8,7 +8,7 @@ import Utterance, { UtteranceToken } from '../utterance/utterance' type FeatureValue = string | number | boolean -export interface CRFFeature { +export type CRFFeature = { name: string value: FeatureValue boost?: number diff --git a/packages/nlu-engine/src/engine/slots/slot-tagger-utils.ts b/packages/nlu-engine/src/engine/slots/slot-tagger-utils.ts new file mode 100644 index 00000000..93870d84 --- /dev/null +++ b/packages/nlu-engine/src/engine/slots/slot-tagger-utils.ts @@ -0,0 +1,122 @@ +import _ from 'lodash' +import { BIO, SlotExtractionResult, SlotDefinition, Tag } from '../typings' +import Utterance from '../utterance/utterance' + +export type TagResult = { + tag: Tag | string + name: string + probability: number +} + +const MIN_SLOT_CONFIDENCE = 0.15 + +export function labelizeUtterance(utterance: Utterance): string[] { + return utterance.tokens + .filter((x) => !x.isSpace) + .map((token) => { + if (_.isEmpty(token.slots)) { + return BIO.OUT + } + + const slot = token.slots[0] + const tag = slot.startTokenIdx === token.index ? BIO.BEGINNING : BIO.INSIDE + const any = _.isEmpty(token.entities) ? '/any' : '' + + return `${tag}-${slot.name}${any}` + }) +} + +export function predictionLabelToTagResult(prediction: { [label: string]: number }): TagResult { + const pairedPreds = _.chain(prediction) + .mapValues((value, key) => value + (prediction[`${key}/any`] || 0)) + .toPairs() + .value() + + if (!pairedPreds.length) { + throw new Error('there should be at least one prediction when converting predictions to tag result') + } + const [label, probability] = _.maxBy(pairedPreds, (x) => x[1])! + + return { + tag: label[0], + name: label.slice(2).replace('/any', ''), + probability + } as TagResult +} + +export function removeInvalidTagsForIntent(slot_definitions: SlotDefinition[], tag: TagResult): TagResult { + if (tag.tag === BIO.OUT) { + return tag + } + + const foundInSlotDef = !!slot_definitions.find((s) => s.name === tag.name) + + if (tag.probability < MIN_SLOT_CONFIDENCE || !foundInSlotDef) { + tag = { + tag: BIO.OUT, + name: '', + probability: 1 - tag.probability // anything would do here + } + } + + return tag +} + +export function makeExtractedSlots( + slot_entities: string[], + utterance: Utterance, + slotTagResults: TagResult[] +): SlotExtractionResult[] { + return _.zipWith( + utterance.tokens.filter((t) => !t.isSpace), + slotTagResults, + (token, tagRes) => ({ token, tagRes }) + ) + .filter(({ tagRes }) => tagRes.tag !== BIO.OUT) + .reduce((combined, { token, tagRes }) => { + const last = _.last(combined) + const shouldConcatWithPrev = tagRes.tag === BIO.INSIDE && _.get(last, 'slot.name') === tagRes.name + + if (shouldConcatWithPrev && last) { + const newEnd = token.offset + token.value.length + const newSource = utterance.toString({ strategy: 'keep-token' }).slice(last.start, newEnd) // we use slice in case tokens are space split + last.slot.source = newSource + last.slot.value = newSource + last.end = newEnd + + return [...combined.slice(0, -1), last] + } else { + return [ + ...combined, + { + slot: { + name: tagRes.name, + confidence: tagRes.probability, + source: token.toString(), + value: token.toString() + }, + start: token.offset, + end: token.offset + token.value.length + } + ] + } + }, [] as SlotExtractionResult[]) + .map((extracted: SlotExtractionResult) => { + const associatedEntityInRange = utterance.entities.find( + (e) => + ((e.startPos <= extracted.start && e.endPos >= extracted.end) || // slot is fully contained by an entity + (e.startPos >= extracted.start && e.endPos <= extracted.end)) && // entity is fully within the tagged slot + _.includes(slot_entities, e.type) // entity is part of the possible entities + ) + if (associatedEntityInRange) { + const { startPos, endPos, startTokenIdx, endTokenIdx, ...x } = associatedEntityInRange + extracted.slot.entity = { + ...x, + start: startPos, + end: endPos + } + extracted.slot.value = associatedEntityInRange.value + } + return extracted + }) +} diff --git a/packages/nlu-engine/src/engine/slots/slot-tagger.test.ts b/packages/nlu-engine/src/engine/slots/slot-tagger.test.ts index 3a33f8cf..b445b4d9 100644 --- a/packages/nlu-engine/src/engine/slots/slot-tagger.test.ts +++ b/packages/nlu-engine/src/engine/slots/slot-tagger.test.ts @@ -1,14 +1,14 @@ import _ from 'lodash' +import { Logger } from 'src/typings' +import { ModelLoadingError } from '../errors' +import { makeFakeTools } from '../test-utils/fake-tools' import { makeTestUtterance } from '../test-utils/fake-utterance' import { BIO, ExtractedEntity, ExtractedSlot } from '../typings' import Utterance from '../utterance/utterance' -import SlotTagger, { labelizeUtterance, makeExtractedSlots } from './slot-tagger' -import { TagResult } from './typings' -import { makeFakeTools } from '../test-utils/fake-tools' -import { ModelLoadingError } from '../../errors' -import { Logger } from 'src/typings' +import { SlotTagger } from './slot-tagger' +import { labelizeUtterance, makeExtractedSlots, TagResult } from './slot-tagger-utils' const fakeTools = makeFakeTools(300, ['en']) const dummyProgress = (p: number) => {} @@ -148,7 +148,7 @@ describe('makeExtractedSlots', () => { describe('Slot tagger component lifecycle', () => { test('Slot tagger with no slots should predict empty array', async () => { let slotTagger = new SlotTagger(fakeTools, dummyLogger as Logger) - await slotTagger.train( + const model = await slotTagger.train( { intent: { name: 'someIntent', @@ -161,7 +161,6 @@ describe('Slot tagger component lifecycle', () => { dummyProgress ) - const model = slotTagger.serialize() slotTagger = new SlotTagger(fakeTools, dummyLogger as Logger) await slotTagger.load(model) @@ -169,33 +168,5 @@ describe('Slot tagger component lifecycle', () => { expect(prediction.length).toBe(0) }) - test('When model is corrupted, loading throws', async () => { - const slotTagger = new SlotTagger(fakeTools, dummyLogger as Logger) - await slotTagger.train( - { - intent: { - name: 'someIntent', - contexts: [], - utterances: [dudeWheresMyCar], - slot_definitions: [] - }, - list_entites: [] - }, - dummyProgress - ) - - const model = slotTagger.serialize() - - // act && asert - await expect(slotTagger.load(`${model} I'm not a rapper`)).rejects.toThrowError(ModelLoadingError) - - const parsed = JSON.parse(model) - parsed['someKey'] = 'someValue' - await expect(slotTagger.load(JSON.stringify(parsed))).rejects.toThrowError(ModelLoadingError) - - const undef: unknown = undefined - await expect(slotTagger.load(undef as string)).rejects.toThrowError(ModelLoadingError) - }) - // TODO: add a fake CRF tagger to the fake tools and assert the slot tagger works well as a whole }) diff --git a/packages/nlu-engine/src/engine/slots/slot-tagger.ts b/packages/nlu-engine/src/engine/slots/slot-tagger.ts index 56969876..d935ed66 100644 --- a/packages/nlu-engine/src/engine/slots/slot-tagger.ts +++ b/packages/nlu-engine/src/engine/slots/slot-tagger.ts @@ -1,18 +1,22 @@ -import fse from 'fs-extra' -import Joi, { validate } from 'joi' +import * as ptb from '@bpinternal/ptb-schema' import _ from 'lodash' +import { ModelOf, PipelineComponent } from 'src/component' import { Logger } from 'src/typings' -import tmp from 'tmp' -import { ModelLoadingError } from '../../errors' -import { MLToolkit } from '../../ml/typings' +import * as MLToolkit from '../../ml/toolkit' +import { ModelLoadingError } from '../errors' import { getEntitiesAndVocabOfIntent } from '../intents/intent-vocab' -import { BIO, Intent, ListEntityModel, SlotExtractionResult, Tools, SlotDefinition } from '../typings' +import { Intent, ListEntityModel, SlotExtractionResult, Tools, SlotDefinition } from '../typings' import Utterance, { UtteranceToken } from '../utterance/utterance' -import { SlotDefinitionSchema } from './schemas' import * as featurizer from './slot-featurizer' -import { TagResult, IntentSlotFeatures } from './typings' +import { + labelizeUtterance, + makeExtractedSlots, + predictionLabelToTagResult, + removeInvalidTagsForIntent +} from './slot-tagger-utils' +import { IntentSlotFeatures } from './typings' const CRF_TRAINER_PARAMS = { c1: '0.0001', @@ -22,180 +26,98 @@ const CRF_TRAINER_PARAMS = { 'feature.possible_states': '1' } -const MIN_SLOT_CONFIDENCE = 0.15 - -export function labelizeUtterance(utterance: Utterance): string[] { - return utterance.tokens - .filter((x) => !x.isSpace) - .map((token) => { - if (_.isEmpty(token.slots)) { - return BIO.OUT - } - - const slot = token.slots[0] - const tag = slot.startTokenIdx === token.index ? BIO.BEGINNING : BIO.INSIDE - const any = _.isEmpty(token.entities) ? '/any' : '' - - return `${tag}-${slot.name}${any}` - }) -} - -function predictionLabelToTagResult(prediction: { [label: string]: number }): TagResult { - const pairedPreds = _.chain(prediction) - .mapValues((value, key) => value + (prediction[`${key}/any`] || 0)) - .toPairs() - .value() - - if (!pairedPreds.length) { - throw new Error('there should be at least one prediction when converting predictions to tag result') - } - const [label, probability] = _.maxBy(pairedPreds, (x) => x[1])! - - return { - tag: label[0], - name: label.slice(2).replace('/any', ''), - probability - } as TagResult -} - -function removeInvalidTagsForIntent(slot_definitions: SlotDefinition[], tag: TagResult): TagResult { - if (tag.tag === BIO.OUT) { - return tag - } - - const foundInSlotDef = !!slot_definitions.find((s) => s.name === tag.name) - - if (tag.probability < MIN_SLOT_CONFIDENCE || !foundInSlotDef) { - tag = { - tag: BIO.OUT, - name: '', - probability: 1 - tag.probability // anything would do here - } - } - - return tag -} - -export function makeExtractedSlots( - slot_entities: string[], - utterance: Utterance, - slotTagResults: TagResult[] -): SlotExtractionResult[] { - return _.zipWith( - utterance.tokens.filter((t) => !t.isSpace), - slotTagResults, - (token, tagRes) => ({ token, tagRes }) - ) - .filter(({ tagRes }) => tagRes.tag !== BIO.OUT) - .reduce((combined, { token, tagRes }) => { - const last = _.last(combined) - const shouldConcatWithPrev = tagRes.tag === BIO.INSIDE && _.get(last, 'slot.name') === tagRes.name - - if (shouldConcatWithPrev && last) { - const newEnd = token.offset + token.value.length - const newSource = utterance.toString({ strategy: 'keep-token' }).slice(last.start, newEnd) // we use slice in case tokens are space split - last.slot.source = newSource - last.slot.value = newSource - last.end = newEnd - - return [...combined.slice(0, -1), last] - } else { - return [ - ...combined, - { - slot: { - name: tagRes.name, - confidence: tagRes.probability, - source: token.toString(), - value: token.toString() - }, - start: token.offset, - end: token.offset + token.value.length - } - ] - } - }, [] as SlotExtractionResult[]) - .map((extracted: SlotExtractionResult) => { - const associatedEntityInRange = utterance.entities.find( - (e) => - ((e.startPos <= extracted.start && e.endPos >= extracted.end) || // slot is fully contained by an entity - (e.startPos >= extracted.start && e.endPos <= extracted.end)) && // entity is fully within the tagged slot - _.includes(slot_entities, e.type) // entity is part of the possible entities - ) - if (associatedEntityInRange) { - extracted.slot.entity = { - ..._.omit(associatedEntityInRange, 'startPos', 'endPos', 'startTokenIdx', 'endTokenIdx'), - start: associatedEntityInRange.startPos, - end: associatedEntityInRange.endPos - } - extracted.slot.value = associatedEntityInRange.value - } - return extracted - }) +const PTBSlotDefinition = new ptb.PTBMessage('SlotDefinition', { + name: { type: 'string', id: 1, rule: 'required' }, + entities: { type: 'string', id: 2, rule: 'repeated' } +}) + +const PTBIntentSlotFeatures = new ptb.PTBMessage('IntentSlotFeatures', { + name: { type: 'string', id: 1, rule: 'required' }, + vocab: { type: 'string', id: 2, rule: 'repeated' }, + slot_entities: { type: 'string', id: 3, rule: 'repeated' } +}) + +const PTBSlotTaggerModel = new ptb.PTBMessage('SlotTaggerModel', { + crfModel: { type: MLToolkit.CRF.Tagger.modelType, id: 1, rule: 'optional' }, + intentFeatures: { type: PTBIntentSlotFeatures, id: 2, rule: 'required' }, + slot_definitions: { type: PTBSlotDefinition, id: 3, rule: 'repeated' } +}) + +type Model = { + crfModel: ModelOf | undefined + intentFeatures: IntentSlotFeatures + slot_definitions: SlotDefinition[] } -interface TrainInput { +type TrainInput = { intent: Intent list_entites: ListEntityModel[] } -export interface Model { - crfModel: Buffer | undefined - intentFeatures: IntentSlotFeatures - slot_definitions: SlotDefinition[] -} - -interface Predictors { +type Predictors = { crfTagger: MLToolkit.CRF.Tagger | undefined intentFeatures: IntentSlotFeatures slot_definitions: SlotDefinition[] } -const intentSlotFeaturesSchema = Joi.object() - .keys({ - name: Joi.string().required(), - vocab: Joi.array().items(Joi.string().allow('')).required(), - slot_entities: Joi.array().items(Joi.string()).required() - }) - .required() - -export const modelSchema = Joi.object() - .keys({ - crfModel: Joi.binary().optional(), - intentFeatures: intentSlotFeaturesSchema, - slot_definitions: Joi.array().items(SlotDefinitionSchema).required() - }) - .required() - -export default class SlotTagger { - private static _name = 'CRF Slot Tagger' - - private model: Model | undefined +export class SlotTagger + implements PipelineComponent { + private static _displayName = 'CRF Slot Tagger' + private static _name = 'crf-slot-tagger' + private predictors: Predictors | undefined private mlToolkit: typeof MLToolkit + public get name() { + return SlotTagger._name + } + + public static get modelType() { + return PTBSlotTaggerModel + } + + public get modelType() { + return PTBSlotTaggerModel + } + constructor(tools: Tools, private logger: Logger) { this.mlToolkit = tools.mlToolkit } - public load = async (serialized: string) => { + public load = async (serialized: ptb.Infer) => { try { - const raw: Model = JSON.parse(serialized) - raw.crfModel = raw.crfModel && Buffer.from(raw.crfModel) + const model = this.deserializeModel(serialized) + this.predictors = await this._makePredictors(model) + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + throw new ModelLoadingError(SlotTagger._displayName, err) + } + } - const model: Model = await validate(raw, modelSchema) + private deserializeModel = (serialized: ptb.Infer): Model => { + const { crfModel, intentFeatures, slot_definitions } = serialized + return { + crfModel, + intentFeatures: { + ...intentFeatures, + vocab: intentFeatures.vocab ?? [], + slot_entities: intentFeatures.slot_entities ?? [] + }, + slot_definitions: slot_definitions ? slot_definitions.map(this.deserializeSlotDef) : [] + } + } - this.predictors = await this._makePredictors(model) - this.model = model - } catch (err) { - throw new ModelLoadingError(SlotTagger._name, err) + private deserializeSlotDef = (encoded: ptb.Infer): SlotDefinition => { + const { entities, name } = encoded + return { + name, + entities: entities ?? [] } } private async _makePredictors(model: Model): Promise { const { intentFeatures, crfModel, slot_definitions } = model - const crfTagger = crfModel && (await this._makeCrfTagger(crfModel)) - + const crfTagger = crfModel ? await this._makeCrfTagger(crfModel) : undefined return { crfTagger, intentFeatures, @@ -203,35 +125,27 @@ export default class SlotTagger { } } - private async _makeCrfTagger(crfModel: Buffer) { - const crfModelFn = tmp.tmpNameSync() - fse.writeFileSync(crfModelFn, crfModel) - const crfTagger = new this.mlToolkit.CRF.Tagger() - await crfTagger.initialize() - crfTagger.open(crfModelFn) + private async _makeCrfTagger(crfModel: ModelOf) { + const crfTagger = new this.mlToolkit.CRF.Tagger(this.logger) + await crfTagger.load(crfModel) return crfTagger } - serialize(): string { - if (!this.model) { - throw new Error(`${SlotTagger._name} must be trained before calling serialize.`) - } - return JSON.stringify(this.model) - } - - async train(trainSet: TrainInput, progress: (p: number) => void): Promise { + public async train( + trainSet: TrainInput, + progress: (p: number) => void + ): Promise> { const { intent, list_entites } = trainSet const intentFeatures = getEntitiesAndVocabOfIntent(intent, list_entites) const { slot_definitions } = intent if (slot_definitions.length <= 0) { - this.model = { + progress(1) + return { crfModel: undefined, intentFeatures, slot_definitions } - progress(1) - return } const elements: MLToolkit.CRF.DataPoint[] = [] @@ -245,20 +159,16 @@ export default class SlotTagger { elements.push({ features, labels }) } - const trainer = new this.mlToolkit.CRF.Trainer(this.logger) - await trainer.initialize() const dummyProgress = () => {} - const crfModelFn = await trainer.train(elements, CRF_TRAINER_PARAMS, dummyProgress) - - const crfModel = await fse.readFile(crfModelFn) + const crf = new this.mlToolkit.CRF.Tagger(this.logger) + const crfModel = await crf.train({ elements, options: CRF_TRAINER_PARAMS }, dummyProgress) + progress(1) - this.model = { + return { crfModel, intentFeatures, slot_definitions } - - progress(1) } private tokenSliceFeatures( @@ -335,13 +245,9 @@ export default class SlotTagger { .value() } - async predict(utterance: Utterance): Promise { + public async predict(utterance: Utterance): Promise { if (!this.predictors) { - if (!this.model) { - throw new Error(`${SlotTagger._name} must be trained before calling predict.`) - } - - this.predictors = await this._makePredictors(this.model) + throw new Error(`${SlotTagger._displayName} must load model before calling predict.`) } const { intentFeatures, crfTagger, slot_definitions } = this.predictors @@ -352,7 +258,7 @@ export default class SlotTagger { const features = this._getSequenceFeatures(intentFeatures, utterance, true) - const predictions = crfTagger.marginal(features) + const predictions = await crfTagger.marginal(features) return _.chain(predictions) .map(predictionLabelToTagResult) diff --git a/packages/nlu-engine/src/engine/slots/typings.ts b/packages/nlu-engine/src/engine/slots/typings.ts index 12a58938..62e7a7d9 100644 --- a/packages/nlu-engine/src/engine/slots/typings.ts +++ b/packages/nlu-engine/src/engine/slots/typings.ts @@ -1,12 +1,4 @@ -import { Tag } from '../typings' - -export interface TagResult { - tag: Tag | string - name: string - probability: number -} - -export interface IntentSlotFeatures { +export type IntentSlotFeatures = { name: string vocab: string[] slot_entities: string[] diff --git a/packages/nlu-engine/src/engine/test-utils/fake-kmeans.ts b/packages/nlu-engine/src/engine/test-utils/fake-kmeans.ts index 5722c0b9..0be949bd 100644 --- a/packages/nlu-engine/src/engine/test-utils/fake-kmeans.ts +++ b/packages/nlu-engine/src/engine/test-utils/fake-kmeans.ts @@ -1,4 +1,4 @@ -import { MLToolkit } from '../../ml/typings' +import * as MLToolkit from '../../ml/toolkit' export const fakeKmeans: typeof MLToolkit.KMeans = { kmeans: (data: MLToolkit.KMeans.DataPoint[], k: number, options: MLToolkit.KMeans.KMeansOptions) => { diff --git a/packages/nlu-engine/src/engine/test-utils/fake-svm.ts b/packages/nlu-engine/src/engine/test-utils/fake-svm.ts index 845f7800..4b6ce5f2 100644 --- a/packages/nlu-engine/src/engine/test-utils/fake-svm.ts +++ b/packages/nlu-engine/src/engine/test-utils/fake-svm.ts @@ -1,45 +1,39 @@ import _ from 'lodash' -import { MLToolkit } from '../../ml/typings' +import { ModelOf } from 'src/component' +import * as MLToolkit from '../../ml/toolkit' -export class FakeSvmTrainer implements MLToolkit.SVM.Trainer { - private _isTrained = false - constructor() {} - async train( - points: MLToolkit.SVM.DataPoint[], - options?: MLToolkit.SVM.SVMOptions | undefined, - callback?: MLToolkit.SVM.TrainProgressCallback | undefined - ): Promise { +export class FakeSvm extends MLToolkit.SVM.Classifier { + private model: ModelOf | undefined + + public async train( + input: MLToolkit.SVM.SVMTrainInput, + callback: MLToolkit.SVM.TrainProgressCallback | undefined + ): Promise> { + const { points } = input if (!points.length) { throw new Error('fake SVM needs datapoints') } - this._isTrained = true callback?.(1) - return _(points) + + const labels_idx = _(points) .map((p) => p.label) .uniq() .value() - .join(',') - } - isTrained(): boolean { - return this._isTrained - } -} -export class FakeSvmPredictor implements MLToolkit.SVM.Predictor { - constructor(private model: string) {} - - async predict(coordinates: number[]): Promise { - const labels = this.model.split(',') - return labels.map((label) => ({ label, confidence: 1 / labels.length })) + return { + labels_idx + } as ModelOf } - async initialize() {} - - isLoaded(): boolean { - return true + public load = async (model: ModelOf) => { + this.model = model } - getLabels(): string[] { - return this.model.split(',') + public async predict(coordinates: number[]): Promise { + if (!this.model) { + throw new Error('Fake SVm must load model before calling predict.') + } + const labels = this.model.labels_idx ?? [] + return labels.map((label) => ({ label, confidence: 1 / labels.length })) } } diff --git a/packages/nlu-engine/src/engine/test-utils/fake-tools.ts b/packages/nlu-engine/src/engine/test-utils/fake-tools.ts index 52eb645b..446c9380 100644 --- a/packages/nlu-engine/src/engine/test-utils/fake-tools.ts +++ b/packages/nlu-engine/src/engine/test-utils/fake-tools.ts @@ -1,12 +1,12 @@ import _ from 'lodash' -import { MLToolkit } from '../../ml/typings' +import * as MLToolkit from '../../ml/toolkit' import { POSClass, POS_CLASSES } from '../language/pos-tagger' +import { nonSpaceSeparatedLanguages } from '../language/space-separated' import { SPACE, splitSpaceToken } from '../tools/token-utils' import { SystemEntityExtractor, Tools } from '../typings' import { fakeKmeans } from './fake-kmeans' -import { FakeSvmPredictor, FakeSvmTrainer } from './fake-svm' -import { nonSpaceSeparatedLanguages } from '../language/space-separated' +import { FakeSvm } from './fake-svm' /** * Basically mimics the language server tokenizer. Use this function for testing purposes @@ -42,36 +42,21 @@ export const makeFakeTools = (dim: number, languages: string[]): Tools => { return tokens.map((t) => randomlyVectorize(t, dim)) } - const partOfSpeechUtterances = async (utterances: string[][], languageCode: string) => { + const pos_utterances = async (utterances: string[][], languageCode: string) => { return utterances.map(randomlyPOSTag) } - const generateSimilarJunkWords = async (vocabulary: string[], languageCode: string) => { - return vocabulary - } - const getStopWordsForLang = async (languageCode: string) => { return ['the', 'this'] } - const getHealth = () => { - return { - isEnabled: true, - validProvidersCount: 1, - validLanguages: [...languages] - } - } - const getLanguages = () => [...languages] - const getSpecifications = () => { + const getLangServerSpecs = () => { return { - nluVersion: '1.0.0', - languageServer: { - dimensions: dim, - domain: 'domain', - version: '1.0.0' - } + dimensions: dim, + domain: 'domain', + version: '1.0.0' } } @@ -91,10 +76,7 @@ export const makeFakeTools = (dim: number, languages: string[]): Tools => { } const fakeMlToolkit: Partial = { - SVM: { - Predictor: FakeSvmPredictor, - Trainer: FakeSvmTrainer - }, + SVM: { Classifier: FakeSvm }, KMeans: fakeKmeans } @@ -102,12 +84,10 @@ export const makeFakeTools = (dim: number, languages: string[]): Tools => { identify_language: async (utt: string) => 'en', tokenize_utterances, vectorize_tokens, - partOfSpeechUtterances, - generateSimilarJunkWords, + pos_utterances, getStopWordsForLang, - getHealth, getLanguages, - getSpecifications, + getLangServerSpecs, isSpaceSeparated, seededLodashProvider: fakeSeededLodash, systemEntityExtractor: fakeSystemEntityExtractor, diff --git a/packages/nlu-engine/src/engine/tools/patterns-utils.ts b/packages/nlu-engine/src/engine/tools/patterns-utils.ts index aedb9efb..b64f4adb 100644 --- a/packages/nlu-engine/src/engine/tools/patterns-utils.ts +++ b/packages/nlu-engine/src/engine/tools/patterns-utils.ts @@ -1,4 +1,4 @@ -interface ExtractedPattern { +type ExtractedPattern = { value: string sourceIndex: number } diff --git a/packages/nlu-engine/src/engine/tools/tag-spaces.ts b/packages/nlu-engine/src/engine/tools/tag-spaces.ts new file mode 100644 index 00000000..b0cf43a5 --- /dev/null +++ b/packages/nlu-engine/src/engine/tools/tag-spaces.ts @@ -0,0 +1,25 @@ +import { SPACE } from './token-utils' + +type Span = { + charStart: number + charEnd: number + length: number +} + +const getSpan = (regexpMatch: RegExpExecArray): Span => ({ + charStart: regexpMatch.index, + charEnd: regexpMatch.index + regexpMatch[0].length, + length: regexpMatch[0].length +}) + +export const tagAllSpaces = (utt: string): Span[] => { + const SPACE_GROUP = new RegExp(`[\\s${SPACE}]{1,}`, 'g') + const spans: Span[] = [] + let match = SPACE_GROUP.exec(utt) + while (match) { + const span = getSpan(match) + spans.push(span) + match = SPACE_GROUP.exec(utt) + } + return spans +} diff --git a/packages/nlu-engine/src/engine/tools/token-utils.test.ts b/packages/nlu-engine/src/engine/tools/token-utils.test.ts index 2382c195..82f39b01 100644 --- a/packages/nlu-engine/src/engine/tools/token-utils.test.ts +++ b/packages/nlu-engine/src/engine/tools/token-utils.test.ts @@ -1,8 +1,10 @@ +import _ from 'lodash' import { LATIN_CHARSET } from './chars' import { isWord, mergeSimilarCharsetTokens, processUtteranceTokens, + restoreOriginalSpaces, restoreOriginalUtteranceCasing, SPACE } from './token-utils' @@ -118,7 +120,47 @@ describe('Raw token processing', () => { expect(processUtteranceTokens(moreToks)).toEqual(['jag', SPACE, 'är', SPACE, 'väldigt', SPACE, 'hungrig']) }) - test('restoreUtteranceTokens', () => { + test('restoreUtteranceTokensSpacing', () => { + const original = ' I left NASA to work at Botpress ' + const tokens = [ + 'I', + SPACE, + 'left', + SPACE, + 'NASA', + SPACE, + 'to', + SPACE, + 'work', + SPACE, + 'at', + SPACE, + 'Bot', + 'press', + SPACE + ] + + expect(restoreOriginalSpaces(tokens, original)).toEqual([ + SPACE, + 'I', + SPACE, + 'left', + SPACE, + 'NASA', + _.repeat(SPACE, 3), + 'to', + SPACE, + 'work', + SPACE, + 'at', + _.repeat(SPACE, 2), + 'Bot', + 'press', + _.repeat(SPACE, 2) + ]) + }) + + test('restoreUtteranceTokensCasing', () => { const original = 'I left NASA to work at Botpress' const tokens = ['i', SPACE, 'left', SPACE, 'nasa', SPACE, 'to', SPACE, 'work', SPACE, 'at', SPACE, 'bot', 'press'] diff --git a/packages/nlu-engine/src/engine/tools/token-utils.ts b/packages/nlu-engine/src/engine/tools/token-utils.ts index 678fcd22..243c0ccf 100644 --- a/packages/nlu-engine/src/engine/tools/token-utils.ts +++ b/packages/nlu-engine/src/engine/tools/token-utils.ts @@ -1,6 +1,7 @@ import _ from 'lodash' import { LATIN_CHARSET, SPECIAL_CHARSET } from './chars' +import { tagAllSpaces } from './tag-spaces' import getVocabTokenizer from './vocab-tokenizer' export const SPACE = '\u2581' @@ -72,6 +73,30 @@ export const processUtteranceTokens = (tokens: string[], vocab: string[] = []): .value() } +export const restoreOriginalSpaces = (utteranceTokens: string[], utterance: string): string[] => { + const nonSpaceTokens = utteranceTokens.filter((t) => !isSpace(t)) + const spans = tagAllSpaces(utterance) + + const tokens: string[] = [] + + let idx = 0 + for (const span of spans) { + let next = nonSpaceTokens[0] + while (next && idx + next.length <= span.charStart) { + nonSpaceTokens.shift() + tokens.push(next) + idx += next.length + next = nonSpaceTokens[0] + } + const spaceToken = _.repeat(SPACE, span.length) + tokens.push(spaceToken) + idx += spaceToken.length + } + + tokens.push(...nonSpaceTokens) + return tokens +} + export const restoreOriginalUtteranceCasing = (utteranceTokens: string[], utterance: string): string[] => { let offset = 0 return utteranceTokens.map((t) => { diff --git a/packages/nlu-engine/src/engine/tools/vocab-tokenizer.ts b/packages/nlu-engine/src/engine/tools/vocab-tokenizer.ts index a63b2def..0429efdd 100644 --- a/packages/nlu-engine/src/engine/tools/vocab-tokenizer.ts +++ b/packages/nlu-engine/src/engine/tools/vocab-tokenizer.ts @@ -1,6 +1,6 @@ import _ from 'lodash' -interface VocabMatch { +type VocabMatch = { start: number end: number length: number diff --git a/packages/nlu-engine/src/engine/training-pipeline.test.ts b/packages/nlu-engine/src/engine/training-pipeline.test.ts deleted file mode 100644 index a6696f1b..00000000 --- a/packages/nlu-engine/src/engine/training-pipeline.test.ts +++ /dev/null @@ -1,50 +0,0 @@ -import _ from 'lodash' - -import { tokenizeLatinTextForTests } from './test-utils/fake-tools' -import { TfidfTokens, TrainStep } from './training-pipeline' -import { Intent } from './typings' -import Utterance from './utterance/utterance' - -test('tfidf has a value for all tokens of the training set', async () => { - // arrange - const makeUtterance = (utt: string) => { - const tokens = tokenizeLatinTextForTests(utt) - return new Utterance( - tokens, - tokens.map((t) => Array(300).fill(0)), - tokens.map((t) => 'NOUN'), - 'en' - ) - } - - const makeIntent = (name: string, utterances: string[]) => { - return >{ - name, - contexts: ['global'], - slot_definitions: [], - utterances: utterances.map(makeUtterance) - } - } - - const installBpIntent = makeIntent('install-bp', [ - 'How can I install Botpress?', - 'Can you help me with Botpress install?' - ]) - const reportBugIntent = makeIntent('report-bug', ['There seems to be a bug with Botpress...', 'I have a problem']) - - const intents: Intent[] = [installBpIntent, reportBugIntent] - - // act - const { tfIdf } = await TfidfTokens({ intents } as TrainStep) - - // assert - const botpressToken = 'botpress' - - const utterances = _.flatMap(intents, (i) => i.utterances) - const tokens = _.flatMap(utterances, (u) => u.tokens) - const desiredToken = tokens.find((t) => t.toString({ lowerCase: true }) === botpressToken) - - expect(tfIdf).toBeDefined() - expect(_.round(tfIdf![botpressToken], 2)).toBe(0.54) - expect(_.round(desiredToken!.tfidf, 2)).toBe(0.54) -}) diff --git a/packages/nlu-engine/src/engine/training-pipeline.ts b/packages/nlu-engine/src/engine/training-pipeline.ts index 1d38f2fd..0ae1880e 100644 --- a/packages/nlu-engine/src/engine/training-pipeline.ts +++ b/packages/nlu-engine/src/engine/training-pipeline.ts @@ -1,27 +1,28 @@ import Bluebird from 'bluebird' import _ from 'lodash' -import { MLToolkit } from '../ml/typings' +import { ModelOf } from 'src/component' +import { Override } from 'src/utils/override-type' +import * as MLToolkit from '../ml/toolkit' import { Logger } from '../typings' import { watchDog } from '../utils/watch-dog' -import { serializeKmeans } from './clustering' +import { computeKmeans, serializeKmeans } from './clustering' import { CustomEntityExtractor } from './entities/custom-extractor' import { MultiThreadCustomEntityExtractor } from './entities/custom-extractor/multi-thread-extractor' -import { warmEntityCache } from './entities/entity-cache-manager' +import { warmEntityCache } from './entities/entity-cache' +import { makeListEntityModel } from './entities/list-entity-model' import { getCtxFeatures } from './intents/context-featurizer' import { OOSIntentClassifier } from './intents/oos-intent-classfier' import { SvmIntentClassifier } from './intents/svm-intent-classifier' -import SlotTagger from './slots/slot-tagger' -import { replaceConsecutiveSpaces } from './tools/strings' +import { SlotTagger } from './slots/slot-tagger' + import tfidf from './tools/tfidf' -import { convertToRealSpaces } from './tools/token-utils' import { ColdListEntityModel, - EntityCacheDump, EntityExtractionResult, Intent, - ListEntity, + ListEntityWithCache, PatternEntity, SerializedKmeansResult, TFIDF, @@ -31,11 +32,7 @@ import { } from './typings' import Utterance, { buildUtteranceBatch, UtteranceToken } from './utterance/utterance' -type ListEntityWithCache = ListEntity & { - cache: EntityCacheDump -} - -export type TrainInput = Readonly<{ +export type TrainInput = { trainId: string nluSeed: number languageCode: string @@ -43,119 +40,63 @@ export type TrainInput = Readonly<{ list_entities: ListEntityWithCache[] contexts: string[] intents: Intent[] - ctxToTrain: string[] minProgressHeartbeat: number -}> +} -export type TrainStep = Readonly<{ - trainId: string - nluSeed: number - languageCode: string - list_entities: WarmedListEntityModel[] - pattern_entities: PatternEntity[] - contexts: string[] - intents: Intent[] - vocabVectors: Token2Vec - tfIdf?: TFIDF - kmeans?: MLToolkit.KMeans.KmeansResult - ctxToTrain: string[] -}> - -export interface TrainOutput { +type PreprocessTrainStep = Override< + TrainInput, + { + list_entities: WarmedListEntityModel[] + intents: Intent[] + vocabVectors: Token2Vec + } +> +type TfIdfTrainStep = PreprocessTrainStep & { tfIdf: TFIDF } +type ClusterTrainStep = TfIdfTrainStep & { kmeans?: MLToolkit.KMeans.KmeansResult } +type SerialTrainOuput = ClusterTrainStep + +export type TrainOutput = { list_entities: ColdListEntityModel[] tfidf: TFIDF vocab: string[] - kmeans: SerializedKmeansResult | undefined contexts: string[] - ctx_model: string - intent_model_by_ctx: _.Dictionary - slots_model_by_intent: _.Dictionary + kmeans: SerializedKmeansResult | undefined + ctx_model: ModelOf + intent_model_by_ctx: _.Dictionary> + slots_model_by_intent: _.Dictionary> } -interface Tools extends LanguageTools { +type Tools = { logger: Logger -} +} & LanguageTools type progressCB = (p?: number) => void -const NUM_CLUSTERS = 8 -const KMEANS_OPTIONS = { - iterations: 250, - initialization: 'random', - seed: 666 // so training is consistent -} as MLToolkit.KMeans.KMeansOptions - -async function PreprocessInput(input: TrainInput, tools: Tools): Promise { - input = _.cloneDeep(input) - const list_entities = await Bluebird.map(input.list_entities, (list) => - makeListEntityModel(list, input.languageCode, tools) - ) - - const intents = await ProcessIntents(input.intents, input.languageCode, tools) - const vocabVectors = buildVectorsVocab(intents) - - const { trainId, nluSeed, languageCode, pattern_entities, contexts, ctxToTrain } = input - return { - trainId, - nluSeed, - languageCode, - pattern_entities, - contexts, - ctxToTrain, - list_entities, - intents, - vocabVectors - } -} - -async function makeListEntityModel(entity: ListEntityWithCache, languageCode: string, tools: Tools) { - const allValues = _.uniq(Object.keys(entity.synonyms).concat(..._.values(entity.synonyms))) - const allTokens = (await tools.tokenize_utterances(allValues, languageCode)).map((toks) => - toks.map(convertToRealSpaces) - ) - +/** + * ############################## + * ### Step 1 : Preprocessing ### + * ############################## + */ +async function makeWarmListEntityModel( + entity: ListEntityWithCache, + languageCode: string, + tools: Tools +): Promise { const cache = warmEntityCache(entity.cache) - - return { - type: 'custom.list', - id: `custom.list.${entity.name}`, - languageCode, - entityName: entity.name, - fuzzyTolerance: entity.fuzzyTolerance, - sensitive: entity.sensitive, - mappingsTokens: _.mapValues(entity.synonyms, (synonyms, name) => - [...synonyms, name].map((syn) => { - const idx = allValues.indexOf(syn) - return allTokens[idx] - }) - ), - cache - } + const model = await makeListEntityModel(entity, languageCode, tools) + return { ...model, cache } } -function computeKmeans(intents: Intent[], tools: Tools): MLToolkit.KMeans.KmeansResult | undefined { - const data = _.chain(intents) - .flatMap((i) => i.utterances) - .flatMap((u) => u.tokens) - .uniqBy((t: UtteranceToken) => t.value) - .map((t: UtteranceToken) => t.vector) - .value() as number[][] - - if (data.length < 2) { - return - } - - const k = data.length > NUM_CLUSTERS ? NUM_CLUSTERS : 2 - - return tools.mlToolkit.KMeans.kmeans(data, k, KMEANS_OPTIONS) -} - -async function ClusterTokens(input: TrainStep, tools: Tools): Promise { - const kmeans = computeKmeans(input.intents, tools) - const copy = { ...input, kmeans } - copy.intents.forEach((x) => x.utterances.forEach((u) => u.setKmeans(kmeans))) - - return copy +async function processIntents( + intents: Intent[], + languageCode: string, + tools: Tools +): Promise[]> { + return Bluebird.map(intents, async (intent) => { + const cleaned = intent.utterances.map((u) => u.trim()) + const utterances = await buildUtteranceBatch(cleaned, languageCode, tools, []) + return { ...intent, utterances } + }) } function buildVectorsVocab(intents: Intent[]): _.Dictionary { @@ -169,108 +110,53 @@ function buildVectorsVocab(intents: Intent[]): _.Dictionary .value() } -async function TrainIntentClassifiers( - input: TrainStep, - tools: Tools, - progress: progressCB -): Promise<_.Dictionary> { - const { list_entities, pattern_entities, intents, ctxToTrain, nluSeed, languageCode } = input +async function preprocessInput(input: TrainInput, tools: Tools): Promise { + input = _.cloneDeep(input) + const list_entities = await Bluebird.map(input.list_entities, (list) => + makeWarmListEntityModel(list, input.languageCode, tools) + ) - const progressPerCtx: _.Dictionary = {} + const intents = await processIntents(input.intents, input.languageCode, tools) + const vocabVectors = buildVectorsVocab(intents) - const clampedProgress = (p: number) => progress(Math.min(0.99, p)) - const reportProgress = () => { - const n = ctxToTrain.length - const total = _(progressPerCtx).values().sum() - clampedProgress(total / n) + return { + ...input, + list_entities, + intents, + vocabVectors } - - const models = await Bluebird.map(ctxToTrain, async (ctx) => { - const taskName = `train Clf for Ctx "${ctx}"` - tools.logger.debug(taskStarted(input.trainId, taskName)) - - const allUtterances = _.flatMap(intents, (i) => i.utterances) - const trainableIntents = intents.filter((i) => i.contexts.includes(ctx)) - - const intentClf = new OOSIntentClassifier(tools, tools.logger) - await intentClf.train( - { - languageCode, - intents: trainableIntents, - list_entities, - nluSeed, - pattern_entities, - allUtterances - }, - (p) => { - progressPerCtx[ctx] = p - reportProgress() - } - ) - - tools.logger.debug(taskDone(input.trainId, taskName)) - const model = intentClf.serialize() - return { ctx, model } - }) - - progress(1) - - return _(models) - .map(({ ctx, model }) => [ctx, model]) - .fromPairs() - .value() } -async function TrainContextClassifier(input: TrainStep, tools: Tools, progress: progressCB): Promise { - const { languageCode, intents, contexts, list_entities, pattern_entities, nluSeed } = input - - const clampedProgress = (p: number) => progress(Math.min(0.99, p)) - - const rootIntents = contexts.map((ctx) => { - const utterances = _(intents) - .filter((intent) => intent.contexts.includes(ctx)) - .flatMap((intent) => intent.utterances) - .value() - - return >{ - name: ctx, - contexts: [], - slot_definitions: [], - utterances - } - }) - - const rootIntentClassifier = new SvmIntentClassifier(tools, getCtxFeatures, tools.logger) - await rootIntentClassifier.train( - { - intents: rootIntents, - languageCode, - list_entities, - pattern_entities, - nluSeed - }, - (p) => { - clampedProgress(p) - } +async function tfidfTokens(input: PreprocessTrainStep): Promise { + const tfidfInput = input.intents.reduce( + (tfidfInput, intent) => ({ + ...tfidfInput, + [intent.name]: _.flatMapDeep(intent.utterances.map((u) => u.tokens.map((t) => t.toString({ lowerCase: true })))) + }), + {} as _.Dictionary ) - progress(1) - return rootIntentClassifier.serialize() + const { __avg__: avg_tfidf } = tfidf(tfidfInput) + const copy = { ...input, tfIdf: avg_tfidf } + copy.intents.forEach((x) => x.utterances.forEach((u) => u.setGlobalTfidf(avg_tfidf))) + + return copy } -async function ProcessIntents( - intents: Intent[], - languageCode: string, - tools: Tools -): Promise[]> { - return Bluebird.map(intents, async (intent) => { - const cleaned = intent.utterances.map(_.flow([_.trim, replaceConsecutiveSpaces])) - const utterances = await buildUtteranceBatch(cleaned, languageCode, tools) - return { ...intent, utterances } - }) +async function clusterTokens(input: TfIdfTrainStep, tools: Tools): Promise { + const kmeans = computeKmeans(input.intents, tools) + const copy = { ...input, kmeans } + copy.intents.forEach((x) => x.utterances.forEach((u) => u.setKmeans(kmeans))) + return copy } -async function ExtractEntities(input: TrainStep, tools: Tools, progress: progressCB): Promise { +/** + * ######################### + * ### Step 2 : Entities ### + * ######################### + */ + +async function extractEntities(input: ClusterTrainStep, tools: Tools, progress: progressCB): Promise { const utterances: Utterance[] = _.chain(input.intents).flatMap('utterances').value() tools.logger?.debug('Extracting system entities') @@ -290,7 +176,7 @@ async function ExtractEntities(input: TrainStep, tools: Tools, progress: progres step = 1 const customEntityExtractor = process.env.TS_NODE_DEV - ? new CustomEntityExtractor() // worker_threads do not work with ts-node + ? new CustomEntityExtractor() // worker_threads does not work with ts-node : new MultiThreadCustomEntityExtractor(tools.logger) const allListEntities = await customEntityExtractor.extractMultipleListEntities( @@ -329,24 +215,109 @@ async function ExtractEntities(input: TrainStep, tools: Tools, progress: progres return input } -export async function TfidfTokens(input: TrainStep): Promise { - const tfidfInput = input.intents.reduce( - (tfidfInput, intent) => ({ - ...tfidfInput, - [intent.name]: _.flatMapDeep(intent.utterances.map((u) => u.tokens.map((t) => t.toString({ lowerCase: true })))) - }), - {} as _.Dictionary +/** + * ############################ + * ### Steps 3-5 : Parallel ### + * ############################ + */ +async function trainContextClassifier( + input: SerialTrainOuput, + tools: Tools, + progress: progressCB +): Promise> { + const { languageCode, intents, contexts, list_entities, pattern_entities, nluSeed } = input + + const clampedProgress = (p: number) => progress(Math.min(0.99, p)) + + const rootIntents = contexts.map((ctx) => { + const utterances = _(intents) + .filter((intent) => intent.contexts.includes(ctx)) + .flatMap((intent) => intent.utterances) + .value() + + return >{ + name: ctx, + contexts: [], + slot_definitions: [], + utterances + } + }) + + const rootIntentClassifier = new SvmIntentClassifier(tools, getCtxFeatures, tools.logger) + const model = await rootIntentClassifier.train( + { + intents: rootIntents, + languageCode, + list_entities, + pattern_entities, + nluSeed + }, + (p) => { + clampedProgress(p) + } ) - const { __avg__: avg_tfidf } = tfidf(tfidfInput) - const copy = { ...input, tfIdf: avg_tfidf } - copy.intents.forEach((x) => x.utterances.forEach((u) => u.setGlobalTfidf(avg_tfidf))) + progress(1) + return model +} - return copy +async function trainIntentClassifiers( + input: SerialTrainOuput, + tools: Tools, + progress: progressCB +): Promise<_.Dictionary>> { + const { list_entities, pattern_entities, intents, contexts, nluSeed, languageCode } = input + + const progressPerCtx: _.Dictionary = {} + + const clampedProgress = (p: number) => progress(Math.min(0.99, p)) + const reportProgress = () => { + const n = contexts.length + const total = _(progressPerCtx).values().sum() + clampedProgress(total / n) + } + + const models = await Bluebird.map(contexts, async (ctx) => { + const taskName = `train Clf for Ctx "${ctx}"` + tools.logger.debug(taskStarted(input.trainId, taskName)) + + const allUtterances = _.flatMap(intents, (i) => i.utterances) + const trainableIntents = intents.filter((i) => i.contexts.includes(ctx)) + + const intentClf = new OOSIntentClassifier(tools, tools.logger) + const model = await intentClf.train( + { + languageCode, + intents: trainableIntents, + list_entities, + nluSeed, + pattern_entities, + allUtterances + }, + (p) => { + progressPerCtx[ctx] = p + reportProgress() + } + ) + + tools.logger.debug(taskDone(input.trainId, taskName)) + return { ctx, model } + }) + + progress(1) + + return _(models) + .map(({ ctx, model }) => [ctx, model]) + .fromPairs() + .value() } -async function TrainSlotTaggers(input: TrainStep, tools: Tools, progress: progressCB): Promise<_.Dictionary> { - const slotModelByIntent: _.Dictionary = {} +async function trainSlotTaggers( + input: SerialTrainOuput, + tools: Tools, + progress: progressCB +): Promise<_.Dictionary>> { + const slotModelByIntent: _.Dictionary> = {} const clampedProgress = (p: number) => progress(Math.min(0.99, p)) @@ -355,7 +326,7 @@ async function TrainSlotTaggers(input: TrainStep, tools: Tools, progress: progre const slotTagger = new SlotTagger(tools, tools.logger) - await slotTagger.train( + const model = await slotTagger.train( { intent, list_entites: input.list_entities @@ -366,7 +337,7 @@ async function TrainSlotTaggers(input: TrainStep, tools: Tools, progress: progre } ) - slotModelByIntent[intent.name] = slotTagger.serialize() + slotModelByIntent[intent.name] = model } progress(1) @@ -379,20 +350,22 @@ type AsyncFunction> = (...args: A) => R const taskStarted = (id: string, taskName: string) => `[${id}] Started ${taskName}` const taskDone = (id: string, taskName: string) => `[${id}] Done ${taskName}` -const makeLogger = (trainId: string, logger: Logger) => { +const makeLogDecorator = (trainId: string, logger: Logger) => { return >(fn: AsyncFunction) => (...args: A): R => { logger.debug(taskStarted(trainId, fn.name)) const ret = fn(...args) // awaiting if not responsibility of this logger decorator - // eslint-disable-next-line @typescript-eslint/no-floating-promises - ret.then(() => logger.debug(taskDone(trainId, fn.name))).catch((_err) => {}) - + void ret.then(() => logger.debug(taskDone(trainId, fn.name))).catch((_err) => {}) return ret } } -export const Trainer = async (input: TrainInput, tools: Tools, progress: (x: number) => void): Promise => { +export const trainingPipeline = async ( + input: TrainInput, + tools: Tools, + progress: (x: number) => void +): Promise => { tools.logger.debug(`[${input.trainId}] Started running training pipeline.`) let totalProgress = 0 @@ -413,39 +386,41 @@ export const Trainer = async (input: TrainInput, tools: Tools, progress: (x: num normalizedProgress = scaledProgress progressWatchDog.run() } - const logger = makeLogger(input.trainId, tools.logger) + const log = makeLogDecorator(input.trainId, tools.logger) progress(0) // 0% - let step = await logger(PreprocessInput)(input, tools) - step = await logger(TfidfTokens)(step) - step = await logger(ClusterTokens)(step, tools) + const preprocessStep = await log(preprocessInput)(input, tools) + const tfIdfStep = await log(tfidfTokens)(preprocessStep) + const clusterStep = await log(clusterTokens)(tfIdfStep, tools) + reportProgress(1) // 20% - step = await logger(ExtractEntities)(step, tools, reportProgress) + const serialOutput = await log(extractEntities)(clusterStep, tools, reportProgress) + const models = await Promise.all([ - logger(TrainContextClassifier)(step, tools, reportProgress), - logger(TrainIntentClassifiers)(step, tools, reportProgress), - logger(TrainSlotTaggers)(step, tools, reportProgress) + log(trainContextClassifier)(serialOutput, tools, reportProgress), + log(trainIntentClassifiers)(serialOutput, tools, reportProgress), + log(trainSlotTaggers)(serialOutput, tools, reportProgress) ]) progressWatchDog.stop() const [ctx_model, intent_model_by_ctx, slots_model_by_intent] = models - const coldEntities: ColdListEntityModel[] = step.list_entities.map((e) => ({ + const coldEntities: ColdListEntityModel[] = serialOutput.list_entities.map((e) => ({ ...e, cache: e.cache.dump() })) const output: TrainOutput = { list_entities: coldEntities, - tfidf: step.tfIdf!, + tfidf: serialOutput.tfIdf, ctx_model, intent_model_by_ctx, slots_model_by_intent, contexts: input.contexts, - vocab: Object.keys(step.vocabVectors), - kmeans: step.kmeans && serializeKmeans(step.kmeans) + vocab: Object.keys(serialOutput.vocabVectors), + kmeans: serialOutput.kmeans && serializeKmeans(serialOutput.kmeans) } tools.logger.debug(`[${input.trainId}] Done running training pipeline.`) diff --git a/packages/nlu-engine/src/engine/training-process-pool/error-handler.ts b/packages/nlu-engine/src/engine/training-process-pool/error-handler.ts new file mode 100644 index 00000000..8c36d2ad --- /dev/null +++ b/packages/nlu-engine/src/engine/training-process-pool/error-handler.ts @@ -0,0 +1,36 @@ +import { ErrorSerializer, ErrorDeserializer, SerializedError } from '@botpress/worker' +import _ from 'lodash' +import { LangServerError, DucklingServerError } from '../errors' + +export class ErrorHandler implements ErrorSerializer, ErrorDeserializer { + public deserializeError(err: SerializedError): Error { + const { message, stack, data } = err + if (data.errClass === LangServerError.name) { + const { code, type } = data + return new LangServerError({ message, stack, type, code }) + } + if (data.errClass === DucklingServerError.name) { + return new DucklingServerError(message, stack) + } + + const newErr = new Error(err.message) + newErr.stack = err.stack + return newErr + } + + public serializeError(err: Error): SerializedError { + if (err instanceof LangServerError) { + const { code, message, type, stack } = err + const errClass = LangServerError.name + return { message, stack, data: { errClass, code, type } } + } + if (err instanceof DucklingServerError) { + const { message, stack } = err + const errClass = DucklingServerError.name + return { message, stack, data: { errClass } } + } + + const { message, stack } = err + return { message, stack, data: {} } + } +} diff --git a/packages/nlu-engine/src/engine/training-process-pool/index.ts b/packages/nlu-engine/src/engine/training-process-pool/index.ts index a16790d9..87f653aa 100644 --- a/packages/nlu-engine/src/engine/training-process-pool/index.ts +++ b/packages/nlu-engine/src/engine/training-process-pool/index.ts @@ -1,9 +1,10 @@ import { errors, makeProcessPool, ProcessPool } from '@botpress/worker' import _ from 'lodash' -import { TrainingAlreadyStarted, TrainingCanceled, TrainingExitedUnexpectedly } from '../../errors' - import { LanguageConfig, Logger } from '../../typings' +import { TrainingAlreadyStartedError, TrainingCanceledError, TrainingExitedUnexpectedlyError } from '../errors' + import { TrainInput, TrainOutput } from '../training-pipeline' +import { ErrorHandler } from './error-handler' import { ENTRY_POINT } from './process-entry-point' export class TrainingProcessPool { @@ -18,7 +19,8 @@ export class TrainingProcessPool { this._processPool = makeProcessPool(this._logger, { maxWorkers: Number.POSITIVE_INFINITY, entryPoint: ENTRY_POINT, - env + env, + errorHandler: new ErrorHandler() }) } @@ -28,17 +30,18 @@ export class TrainingProcessPool { public async startTraining(input: TrainInput, progress: (x: number) => void): Promise { try { - const output = await this._processPool.run(input.trainId, input, progress) - return output - } catch (err) { - if (errors.isTaskCanceled(err)) { - throw new TrainingCanceled() + const ouput = await this._processPool.run(input.trainId, input, progress) + return ouput + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + if (err instanceof errors.TaskCanceledError) { + throw new TrainingCanceledError() } - if (errors.isTaskAlreadyStarted(err)) { - throw new TrainingAlreadyStarted() + if (err instanceof errors.TaskAlreadyStartedError) { + throw new TrainingAlreadyStartedError() } - if (errors.isTaskExitedUnexpectedly(err)) { - throw new TrainingExitedUnexpectedly(err.pid, err.info) + if (err instanceof errors.TaskExitedUnexpectedlyError) { + throw new TrainingExitedUnexpectedlyError(err.wid!, err) } throw err } diff --git a/packages/nlu-engine/src/engine/training-process-pool/process-entry-point.ts b/packages/nlu-engine/src/engine/training-process-pool/process-entry-point.ts index b820d0ab..7c062d18 100644 --- a/packages/nlu-engine/src/engine/training-process-pool/process-entry-point.ts +++ b/packages/nlu-engine/src/engine/training-process-pool/process-entry-point.ts @@ -1,10 +1,13 @@ import { makeProcessEntryPoint, TaskDefinition } from '@botpress/worker' import { initializeTools } from '../initialize-tools' -import { Trainer, TrainInput, TrainOutput } from '../training-pipeline' +import { trainingPipeline, TrainInput, TrainOutput } from '../training-pipeline' +import { ErrorHandler } from './error-handler' export const ENTRY_POINT = __filename -const processEntryPoint = makeProcessEntryPoint() +const processEntryPoint = makeProcessEntryPoint({ + errorHandler: new ErrorHandler() +}) const main = async () => { const config = JSON.parse(process.env.NLU_CONFIG!) @@ -14,12 +17,12 @@ const main = async () => { try { const tools = await initializeTools(config, processEntryPoint.logger) - processEntryPoint.listenForTask(async (taskDef: TaskDefinition) => { + processEntryPoint.listenForTask(async (taskDef: TaskDefinition) => { const { input, logger, progress } = taskDef tools.seededLodashProvider.setSeed(input.nluSeed) try { - const output = await Trainer(input, { ...tools, logger }, progress) + const output = await trainingPipeline(input, { ...tools, logger }, progress) return output } finally { tools.seededLodashProvider.resetSeed() @@ -27,7 +30,8 @@ const main = async () => { }) await processEntryPoint.initialize() - } catch (err) { + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) processEntryPoint.logger.error('An unhandled error occured in the process', err) process.exit(1) } diff --git a/packages/nlu-engine/src/engine/typings.ts b/packages/nlu-engine/src/engine/typings.ts index b60aa182..0c30f3af 100644 --- a/packages/nlu-engine/src/engine/typings.ts +++ b/packages/nlu-engine/src/engine/typings.ts @@ -1,10 +1,8 @@ -import { AxiosInstance } from 'axios' import _ from 'lodash' import LRUCache from 'lru-cache' -import { Health, Specifications } from 'src/typings' -import { MLToolkit } from '../ml/typings' +import { LangServerSpecs } from 'src/typings' +import * as MLToolkit from '../ml/toolkit' -import { LanguageSource } from '../typings' import { Predictors } from './predict-pipeline' export const BIO = { @@ -15,36 +13,16 @@ export const BIO = { export type Tag = 'o' | 'B' | 'I' -export interface Token2Vec { +export type Token2Vec = { [token: string]: number[] } -export interface LangServerInfo { +export type LangServerInfo = { version: string domain: string dim: number } -export interface Gateway { - source: LanguageSource - client: AxiosInstance - errors: number - disabledUntil?: Date -} - -export interface LangsGateway { - [lang: string]: Gateway[] -} - -export interface LanguageProvider { - languages: string[] - langServerInfo: LangServerInfo - vectorize(tokens: string[], lang: string): Promise - tokenize(utterances: string[], lang: string, vocab?: string[]): Promise - generateSimilarJunkWords(subsetVocab: string[], lang: string): Promise - getHealth(): Partial -} - export type TFIDF = _.Dictionary export type PatternEntity = Readonly<{ @@ -65,10 +43,13 @@ export type ListEntity = Readonly<{ export type EntityCache = LRUCache export type EntityCacheDump = LRUCache.Entry[] -export interface ListEntityModel { +export type ListEntityWithCache = ListEntity & { + cache: EntityCacheDump +} + +export type ListEntityModel = { type: 'custom.list' id: string - languageCode: string entityName: string fuzzyTolerance: number sensitive: boolean @@ -84,7 +65,7 @@ export type WarmedListEntityModel = ListEntityModel & { cache: EntityCache } -export interface ExtractedSlot { +export type ExtractedSlot = { confidence: number name: string source: string @@ -92,13 +73,13 @@ export interface ExtractedSlot { entity?: EntityExtractionResult } -export interface SlotExtractionResult { +export type SlotExtractionResult = { slot: ExtractedSlot start: number end: number } export type EntityExtractor = 'system' | 'list' | 'pattern' -export interface ExtractedEntity { +export type ExtractedEntity = { confidence: number type: string metadata: { @@ -113,45 +94,42 @@ export interface ExtractedEntity { } export type EntityExtractionResult = ExtractedEntity & { start: number; end: number } -export interface KeyedItem { +export type KeyedItem = { input: string idx: number entities?: EntityExtractionResult[] } -export interface SeededLodashProvider { +export type SeededLodashProvider = { setSeed(seed: number): void getSeededLodash(): _.LoDashStatic resetSeed(): void } -export interface Tools { +export type Tools = { + getLanguages(): string[] + getLangServerSpecs(): LangServerSpecs + identify_language(utterance: string, predictorsByLang: _.Dictionary): Promise - // pre-trained language focused tools tokenize_utterances(utterances: string[], languageCode: string, vocab?: string[]): Promise vectorize_tokens(tokens: string[], languageCode: string): Promise - partOfSpeechUtterances(utterances: string[][], languageCode: string): Promise - generateSimilarJunkWords(vocabulary: string[], languageCode: string): Promise + pos_utterances(utterances: string[][], languageCode: string): Promise + getStopWordsForLang(lang: string): Promise isSpaceSeparated(lang: string): boolean - // system info - getHealth(): Health - getLanguages(): string[] - getSpecifications(): Specifications - seededLodashProvider: SeededLodashProvider mlToolkit: typeof MLToolkit systemEntityExtractor: SystemEntityExtractor } -export interface SystemEntityExtractor { +export type SystemEntityExtractor = { extractMultiple( input: string[], lang: string, progress: (p: number) => void, - useCache?: Boolean + useCache?: boolean ): Promise extract(input: string, lang: string): Promise } diff --git a/packages/nlu-engine/src/engine/utterance/utterance-parser.ts b/packages/nlu-engine/src/engine/utterance/utterance-parser.ts index 1b05887e..f69bd09c 100644 --- a/packages/nlu-engine/src/engine/utterance/utterance-parser.ts +++ b/packages/nlu-engine/src/engine/utterance/utterance-parser.ts @@ -1,6 +1,6 @@ import _ from 'lodash' -export interface ParsedSlot { +export type ParsedSlot = { name: string value: string rawPosition: { @@ -13,12 +13,12 @@ export interface ParsedSlot { } } -export interface UtterancePart { +export type UtterancePart = { text: string slot?: ParsedSlot } -export interface ParsedUtterance { +export type ParsedUtterance = { utterance: string parsedSlots: ParsedSlot[] parts: UtterancePart[] diff --git a/packages/nlu-engine/src/engine/utterance/utterance.test.ts b/packages/nlu-engine/src/engine/utterance/utterance.test.ts index f3df1b9e..7e8ef6be 100644 --- a/packages/nlu-engine/src/engine/utterance/utterance.test.ts +++ b/packages/nlu-engine/src/engine/utterance/utterance.test.ts @@ -1,8 +1,8 @@ -import { MLToolkit } from '../../ml/typings' import _ from 'lodash' +import * as MLToolkit from '../../ml/toolkit' +import { POSClass } from '../language/pos-tagger' import { tokenizeLatinTextForTests } from '../test-utils/fake-tools' -import { POSClass } from '../language/pos-tagger' import { SPACE } from '../tools/token-utils' import { EntityExtractor, ExtractedEntity, ExtractedSlot } from '../typings' diff --git a/packages/nlu-engine/src/engine/utterance/utterance.ts b/packages/nlu-engine/src/engine/utterance/utterance.ts index 3d76b0ec..120038cb 100644 --- a/packages/nlu-engine/src/engine/utterance/utterance.ts +++ b/packages/nlu-engine/src/engine/utterance/utterance.ts @@ -1,5 +1,5 @@ import _ from 'lodash' -import { MLToolkit } from '../../ml/typings' +import * as MLToolkit from '../../ml/toolkit' import { POSClass } from '../language/pos-tagger' import { computeNorm, scalarDivide, scalarMultiply, vectorAdd, zeroes } from '../tools/math' @@ -15,19 +15,19 @@ type UtteranceToStringStrategy = | 'keep-token' // token keeps it's original value | 'ignore' // token won't be in resulting string -export interface UtteranceToStringOptions { +export type UtteranceToStringOptions = { lowerCase: boolean onlyWords: boolean strategy: UtteranceToStringStrategy } -export interface TokenToStringOptions { +export type TokenToStringOptions = { lowerCase?: boolean trim?: boolean realSpaces?: boolean } -export interface UtteranceRange { +export type UtteranceRange = { startTokenIdx: number endTokenIdx: number startPos: number @@ -68,6 +68,52 @@ export default class Utterance { private _kmeans?: MLToolkit.KMeans.KmeansResult private _sentenceEmbedding?: number[] + public static toString(tokens: ReadonlyArray, opt?: Partial): string { + const nonEmptyOptions = _.pickBy(opt, (v) => v !== undefined) + const options: UtteranceToStringOptions = { ...DEFAULT_UTT_TO_STRING_OPTIONS, ...nonEmptyOptions } + + let final = '' + let ret = [...tokens] + if (options.onlyWords) { + ret = ret.filter((tok) => tok.slots.length || tok.isWord) + } + + for (const tok of ret) { + final += this._replaceToken(tok, options) + } + + if (options.lowerCase) { + final = final.toLowerCase() + } + + return final.replace(new RegExp(SPACE, 'g'), ' ') + } + + private static _replaceToken(tok: UtteranceToken, options: UtteranceToStringOptions): string { + if (!tok.slots.length && !tok.entities.length) { + return tok.value + } + + if (options.strategy === 'keep-token') { + return tok.value + } + + if (tok.entities.length && options.strategy === 'replace-entity-name') { + return tok.entities[0].type + } + + if (tok.entities.length && options.strategy === 'replace-entity-value') { + return tok.entities[0].value.toString() + } + + if (tok.slots.length && options.strategy === 'replace-slot-name') { + return tok.slots[0].name + } + + // options.strategy === 'ignore' + return '' + } + constructor(tokens: string[], vectors: number[][], posTags: POSClass[], public languageCode: Readonly) { const allSameLength = [tokens, vectors, posTags].every((arr) => arr.length === tokens.length) if (!allSameLength) { @@ -124,11 +170,11 @@ export default class Utterance { this._tokens = arr } - get tokens(): ReadonlyArray { + public get tokens(): ReadonlyArray { return this._tokens } - sentenceEmbedding( + public sentenceEmbedding( options?: Partial<{ keepToken: (t: UtteranceToken) => boolean }> @@ -172,62 +218,19 @@ export default class Utterance { return this._sentenceEmbedding } - setGlobalTfidf(tfidf: TFIDF) { + public setGlobalTfidf(tfidf: TFIDF) { this._globalTfidf = _.mapKeys(tfidf, (tfidf, token) => token.toLowerCase()) } - setKmeans(kmeans?: MLToolkit.KMeans.KmeansResult) { + public setKmeans(kmeans?: MLToolkit.KMeans.KmeansResult) { this._kmeans = kmeans } - private _replaceToken(tok: UtteranceToken, options: UtteranceToStringOptions): string { - if (!tok.slots.length && !tok.entities.length) { - return tok.value - } - - if (options.strategy === 'keep-token') { - return tok.value - } - - if (tok.entities.length && options.strategy === 'replace-entity-name') { - return tok.entities[0].type - } - - if (tok.entities.length && options.strategy === 'replace-entity-value') { - return tok.entities[0].value.toString() - } - - if (tok.slots.length && options.strategy === 'replace-slot-name') { - return tok.slots[0].name - } - - // options.strategy === 'ignore' - return '' + public toString(opt?: Partial): string { + return Utterance.toString(this.tokens, opt) } - // TODO memoize this for better perf - toString(opt?: Partial): string { - const nonEmptyOptions = _.pickBy(opt, (v) => v !== undefined) - const options: UtteranceToStringOptions = { ...DEFAULT_UTT_TO_STRING_OPTIONS, ...nonEmptyOptions } - - let final = '' - let ret = [...this.tokens] - if (options.onlyWords) { - ret = ret.filter((tok) => tok.slots.length || tok.isWord) - } - - for (const tok of ret) { - final += this._replaceToken(tok, options) - } - - if (options.lowerCase) { - final = final.toLowerCase() - } - - return final.replace(new RegExp(SPACE, 'g'), ' ') - } - - clone(copyEntities: boolean, copySlots: boolean): Utterance { + public clone(copyEntities: boolean, copySlots: boolean): Utterance { const tokens = this.tokens.map((x) => x.value) const vectors = this.tokens.map((x) => x.vector) const POStags = this.tokens.map((x) => x.POS) @@ -247,14 +250,14 @@ export default class Utterance { private _validateRange(start: number, end: number) { const lastTok = _.last(this._tokens) - const maxEnd = _.get(lastTok, 'offset', 0) + _.get(lastTok, 'value.length', 0) + const maxEnd = (lastTok?.offset ?? 0) + (lastTok?.value.length ?? 0) if (start < 0 || start > end || start > maxEnd || end > maxEnd) { throw new Error('Invalid range') } } - tagEntity(entity: ExtractedEntity, start: number, end: number) { + public tagEntity(entity: ExtractedEntity, start: number, end: number) { this._validateRange(start, end) const range = this.tokens.filter((x) => x.offset >= start && x.offset + x.value.length <= end) if (_.isEmpty(range)) { @@ -271,7 +274,7 @@ export default class Utterance { this.entities = [...this.entities, entityWithPos] } - tagSlot(slot: ExtractedSlot, start: number, end: number) { + public tagSlot(slot: ExtractedSlot, start: number, end: number) { this._validateRange(start, end) const range = this.tokens.filter((x) => x.offset >= start && x.offset + x.value.length <= end) if (_.isEmpty(range)) { @@ -296,18 +299,23 @@ export async function buildUtteranceBatch( raw_utterances: string[], language: string, tools: Tools, - vocab?: string[] + vocab: string[], + opt: { vectorize: boolean; preprocess: boolean } = { vectorize: true, preprocess: true } ): Promise { - const preprocessed = raw_utterances.map(preprocessRawUtterance) + const preprocessed = opt.preprocess ? raw_utterances.map(preprocessRawUtterance) : raw_utterances const parsed = preprocessed.map(parseUtterance) const tokenUtterances = await tools.tokenize_utterances( parsed.map((p) => p.utterance), language, - vocab ?? [] + vocab ) - const POSUtterances = (await tools.partOfSpeechUtterances(tokenUtterances, language)) as POSClass[][] + const POSUtterances = (await tools.pos_utterances(tokenUtterances, language)) as POSClass[][] const uniqTokens = _.uniq(_.flatten(tokenUtterances)) - const vectors = await tools.vectorize_tokens(uniqTokens, language) + + const vectorDim = tools.getLangServerSpecs().dimensions + const vectors = opt.vectorize + ? await tools.vectorize_tokens(uniqTokens, language) + : uniqTokens.map(() => zeroes(vectorDim)) const vectorMap = _.zipObject(uniqTokens, vectors) return _.zipWith(tokenUtterances, POSUtterances, parsed, (tokUtt, POSUtt, parsed) => ({ tokUtt, POSUtt, parsed })) diff --git a/packages/nlu-engine/src/engine/warm-training-handler.test.ts b/packages/nlu-engine/src/engine/warm-training-handler.test.ts deleted file mode 100644 index 9588bc3d..00000000 --- a/packages/nlu-engine/src/engine/warm-training-handler.test.ts +++ /dev/null @@ -1,304 +0,0 @@ -import _ from 'lodash' - -import { TrainOutput } from './training-pipeline' -import { Intent } from './typings' -import { getModifiedContexts, mergeModelOutputs } from './warm-training-handler' - -const _makeIntent = (name: string, contexts: string[]): Intent => { - return { - contexts, - name, - slot_definitions: [], - utterances: [name] - } -} - -const _makeTrainOuput = ( - intentModels: { ctx: string; model: string }[], - oosModels: { ctx: string; model: string }[] -): TrainOutput => { - const contexts = _.uniq([...intentModels.map((i) => i.ctx), ...oosModels.map((i) => i.ctx)]) - - return { - contexts, - ctx_model: '', - list_entities: [], - slots_model_by_intent: {}, - tfidf: {}, - vocab: [], - intent_model_by_ctx: _(intentModels) - .map((i) => [i.ctx, i.model]) - .fromPairs() - .value(), - kmeans: { - centroids: [], - clusters: [], - iterations: 0 - } - } -} - -describe('getModifiedContexts', () => { - test('when no change at all, returns empty ds', () => { - // arrange - const previousIntents = [_makeIntent('A', ['global']), _makeIntent('B', ['global'])] - const currentIntents = [_makeIntent('A', ['global']), _makeIntent('B', ['global'])] - - // act - const changeLog = getModifiedContexts(currentIntents, previousIntents) - - // assert - expect(changeLog.createdContexts.length).toBe(0) - expect(changeLog.modifiedContexts.length).toBe(0) - expect(changeLog.deletedContexts.length).toBe(0) - }) - - test('when no change at all, returns empty ds', () => { - // arrange - const previousIntents = [_makeIntent('A', ['global']), _makeIntent('B', ['global'])] - const currentIntents = [_makeIntent('A', ['global']), _makeIntent('C', ['global'])] - - // act - const changeLog = getModifiedContexts(currentIntents, previousIntents) - - // assert - expect(changeLog.createdContexts.length).toBe(0) - expect(changeLog.modifiedContexts.length).toBe(1) - expect(changeLog.modifiedContexts[0]).toBe('global') - expect(changeLog.deletedContexts.length).toBe(0) - }) - - test('when one ctx created, returns one created ctx', () => { - // arrange - const previousIntents = [_makeIntent('A', ['global']), _makeIntent('B', ['global'])] - const currentIntents = [ - _makeIntent('A', ['global']), - _makeIntent('B', ['global']), - _makeIntent('C', ['not-global']) - ] - - // act - const changeLog = getModifiedContexts(currentIntents, previousIntents) - - // assert - expect(changeLog.createdContexts.length).toBe(1) - expect(changeLog.createdContexts[0]).toBe('not-global') - expect(changeLog.modifiedContexts.length).toBe(0) - expect(changeLog.deletedContexts.length).toBe(0) - }) - - test('when one ctx deleted, returns one deleted ctx', () => { - // arrange - const previousIntents = [ - _makeIntent('A', ['global']), - _makeIntent('B', ['global']), - _makeIntent('C', ['not-global']) - ] - const currentIntents = [_makeIntent('A', ['global']), _makeIntent('B', ['global'])] - - // act - const changeLog = getModifiedContexts(currentIntents, previousIntents) - - // assert - expect(changeLog.createdContexts.length).toBe(0) - expect(changeLog.modifiedContexts.length).toBe(0) - - expect(changeLog.deletedContexts.length).toBe(1) - expect(changeLog.deletedContexts[0]).toBe('not-global') - }) - - test('when two ctxs deleted, two created and two change, returns two created, two deleted and two change', () => { - // arrange - const previousIntents = [ - _makeIntent('A', ['A1', 'A2']), - _makeIntent('B', ['B1', 'B2']), - _makeIntent('C', ['C1', 'C2']) - ] - - const currentIntents = [ - _makeIntent('A', ['A1', 'A3']), - _makeIntent('B_hat', ['B1', 'B3']), - _makeIntent('C_hat', ['C1', 'C2']) - ] - - // act - const changeLog = getModifiedContexts(currentIntents, previousIntents) - - // assert - expect(changeLog.createdContexts.length).toBe(2) - expect(changeLog.deletedContexts.length).toBe(2) - expect(changeLog.modifiedContexts.length).toBe(3) - - expect(changeLog.createdContexts.includes('A3')).toBe(true) - expect(changeLog.createdContexts.includes('B3')).toBe(true) - - expect(changeLog.deletedContexts.includes('A2')).toBe(true) - expect(changeLog.deletedContexts.includes('B2')).toBe(true) - - expect(changeLog.modifiedContexts.includes('B1')).toBe(true) - expect(changeLog.modifiedContexts.includes('C1')).toBe(true) - expect(changeLog.modifiedContexts.includes('C2')).toBe(true) - }) - - test('when context contains more than one intent', () => { - // arrange - const previousIntents = [ - _makeIntent('A', ['global', 'A']), - _makeIntent('B', ['global', 'B']), - _makeIntent('C', ['global', 'C']), - _makeIntent('D', ['D1']) - ] - - const currentIntents = [_makeIntent('C', ['global', 'C']), _makeIntent('D', ['D1', 'D2'])] - - // act - const changeLog = getModifiedContexts(currentIntents, previousIntents) - - // assert - expect(changeLog.createdContexts.length).toBe(1) - expect(changeLog.deletedContexts.length).toBe(2) - expect(changeLog.modifiedContexts.length).toBe(1) - - expect(changeLog.modifiedContexts[0]).toBe('global') - expect(changeLog.createdContexts[0]).toBe('D2') - - expect(changeLog.deletedContexts.includes('A')).toBe(true) - expect(changeLog.deletedContexts.includes('B')).toBe(true) - }) -}) - -describe('mergeModelsOutputs', () => { - test('when one context deleted should contain all current contexts', () => { - // arrange - const previousTrainOutput = _makeTrainOuput( - [ - { ctx: 'A', model: 'intent model for A' }, - { ctx: 'B', model: 'intent model for B' } - ], - [ - { ctx: 'A', model: 'oos model for A' }, - { ctx: 'B', model: 'oos model for B' } - ] - ) - - const currentTrainOutput = _makeTrainOuput( - [{ ctx: 'A', model: 'intent model for A' }], - [{ ctx: 'A', model: 'oos model for A' }] - ) - - // act - const output = mergeModelOutputs(currentTrainOutput, previousTrainOutput, ['A']) - - // assert - expect(output.contexts.length).toBe(1) - expect(output.contexts[0]).toBe('A') - - expect(output.intent_model_by_ctx['A']).toBe('intent model for A') - }) - - test('when one context modified should contain the modified context', () => { - // arrange - const previousTrainOutput = _makeTrainOuput( - [ - { ctx: 'A', model: 'intent model for A' }, - { ctx: 'B', model: 'intent model for B' } - ], - [ - { ctx: 'A', model: 'oos model for A' }, - { ctx: 'B', model: 'oos model for B' } - ] - ) - - const currentTrainOutput = _makeTrainOuput( - [ - { ctx: 'A', model: 'intent model for A' }, - { ctx: 'B', model: 'modified intent model for B' } - ], - [ - { ctx: 'A', model: 'oos model for A' }, - { ctx: 'B', model: 'modified oos model for B' } - ] - ) - - // act - const output = mergeModelOutputs(currentTrainOutput, previousTrainOutput, ['A', 'B']) - - // assert - expect(output.contexts.length).toBe(2) - expect(output.contexts.sort()[0]).toBe('A') - expect(output.contexts.sort()[1]).toBe('B') - - expect(output.intent_model_by_ctx['A']).toBe('intent model for A') - expect(output.intent_model_by_ctx['B']).toBe('modified intent model for B') - }) - - test('when one context created should contain the created context', () => { - // arrange - const previousTrainOutput = _makeTrainOuput( - [{ ctx: 'A', model: 'intent model for A' }], - [{ ctx: 'A', model: 'oos model for A' }] - ) - - const currentTrainOutput = _makeTrainOuput( - [ - { ctx: 'A', model: 'intent model for A' }, - { ctx: 'B', model: 'created intent model for B' } - ], - [ - { ctx: 'A', model: 'oos model for A' }, - { ctx: 'B', model: 'created oos model for B' } - ] - ) - - // act - const output = mergeModelOutputs(currentTrainOutput, previousTrainOutput, ['A', 'B']) - - // assert - expect(output.contexts.length).toBe(2) - expect(output.contexts.sort()[0]).toBe('A') - expect(output.contexts.sort()[1]).toBe('B') - - expect(output.intent_model_by_ctx['A']).toBe('intent model for A') - expect(output.intent_model_by_ctx['B']).toBe('created intent model for B') - }) - - test('when both one context created and one modified should contain both the created context and the modified one', () => { - // arrange - const previousTrainOutput = _makeTrainOuput( - [ - { ctx: 'A', model: 'intent model for A' }, - { ctx: 'D', model: 'intent model for D' } - ], - [ - { ctx: 'A', model: 'oos model for A' }, - { ctx: 'D', model: 'intent model for D' } - ] - ) - - const currentTrainOutput = _makeTrainOuput( - [ - { ctx: 'A', model: 'modified intent model for A' }, - { ctx: 'B', model: 'modified intent model for B' }, - { ctx: 'C', model: 'created intent model for C' } - ], - [ - { ctx: 'A', model: 'modified oos model for A' }, - { ctx: 'B', model: 'modified oos model for B' }, - { ctx: 'C', model: 'created oos model for C' } - ] - ) - - // act - const output = mergeModelOutputs(currentTrainOutput, previousTrainOutput, ['A', 'B', 'C']) - - // assert - expect(output.contexts.length).toBe(3) - expect(output.contexts.sort()[0]).toBe('A') - expect(output.contexts.sort()[1]).toBe('B') - expect(output.contexts.sort()[2]).toBe('C') - - expect(output.intent_model_by_ctx['A']).toBe('modified intent model for A') - expect(output.intent_model_by_ctx['B']).toBe('modified intent model for B') - expect(output.intent_model_by_ctx['C']).toBe('created intent model for C') - }) -}) diff --git a/packages/nlu-engine/src/engine/warm-training-handler.ts b/packages/nlu-engine/src/engine/warm-training-handler.ts deleted file mode 100644 index ec849735..00000000 --- a/packages/nlu-engine/src/engine/warm-training-handler.ts +++ /dev/null @@ -1,68 +0,0 @@ -import crypto from 'crypto' -import _ from 'lodash' - -import { TrainOutput } from './training-pipeline' -import { Intent } from './typings' - -interface ContextChangeLog { - createdContexts: string[] - deletedContexts: string[] - modifiedContexts: string[] -} - -export const getModifiedContexts = ( - currentIntents: Intent[], - previousIntents: Intent[] -): ContextChangeLog => { - const ctx = (i: Intent) => i.contexts - - const currentContexts = _.flatten(currentIntents.map(ctx)) - const previousContexts = _.flatten(previousIntents.map(ctx)) - - const createdContexts = currentContexts.filter((c) => !previousContexts.includes(c)) - const deletedContexts = previousContexts.filter((c) => !currentContexts.includes(c)) - - const allContexts = _.uniq([...currentContexts, ...previousContexts]) - const alreadyExistingContexts = allContexts.filter( - (c) => !createdContexts.includes(c) && !deletedContexts.includes(c) - ) - - const changeDetector = _ctxHasChanged(currentIntents, previousIntents) - const modifiedContexts: string[] = alreadyExistingContexts.filter(changeDetector) - - return { - createdContexts, - deletedContexts, - modifiedContexts - } -} - -const _ctxHasChanged = (currentIntents: Intent[], previousIntents: Intent[]) => (ctx: string) => { - const prevHash = _computeCtxHash(previousIntents, ctx) - const currHash = _computeCtxHash(currentIntents, ctx) - return prevHash !== currHash -} - -const _computeCtxHash = (intents: Intent[], ctx: string) => { - const intentsOfCtx = intents.filter((i) => i.contexts.includes(ctx)) - const informationToTrack = intentsOfCtx.map((i) => ({ - name: i.name, - slot_definitions: i.slot_definitions, - utterances: i.utterances - })) - - return crypto.createHash('md5').update(JSON.stringify(informationToTrack)).digest('hex') -} - -export const mergeModelOutputs = ( - currentOutput: TrainOutput, - previousOutput: TrainOutput, - contexts: string[] -): TrainOutput => { - const output = { ...currentOutput } - - const previousIntents = _.pick(previousOutput.intent_model_by_ctx, contexts) - - output.intent_model_by_ctx = { ...previousIntents, ...currentOutput.intent_model_by_ctx } - return output -} diff --git a/packages/nlu-engine/src/errors.ts b/packages/nlu-engine/src/errors.ts deleted file mode 100644 index 5c6176ab..00000000 --- a/packages/nlu-engine/src/errors.ts +++ /dev/null @@ -1,22 +0,0 @@ -export class TrainingCanceled extends Error {} -export function isTrainingCanceled(err: Error): err is TrainingCanceled { - return err instanceof TrainingCanceled -} - -export class TrainingAlreadyStarted extends Error {} -export function isTrainingAlreadyStarted(err: Error): err is TrainingAlreadyStarted { - return err instanceof TrainingAlreadyStarted -} - -export class TrainingExitedUnexpectedly extends Error { - constructor(srcWorkerId: number, info: { exitCode: number; signal: string }) { - const { exitCode, signal } = info - super(`Training worker ${srcWorkerId} exited with exit code ${exitCode} and signal ${signal}.`) - } -} - -export class ModelLoadingError extends Error { - constructor(component: string, innerError: Error | undefined) { - super(`${component} could load model. Inner error is: "${innerError?.message}"`) - } -} diff --git a/packages/nlu-engine/src/index.ts b/packages/nlu-engine/src/index.ts index 24a098c9..f155a316 100644 --- a/packages/nlu-engine/src/index.ts +++ b/packages/nlu-engine/src/index.ts @@ -1,28 +1,51 @@ import _ from 'lodash' import path from 'path' import Engine from './engine' -import { DUCKLING_ENTITIES } from './engine/entities/duckling-extractor/enums' -import { isTrainingAlreadyStarted, isTrainingCanceled } from './errors' +import { + TrainingAlreadyStartedError, + TrainingCanceledError, + LangServerError, + DucklingServerError +} from './engine/errors' import LanguageService from './language-service' import _modelIdService from './model-id-service' -import { Config, Logger } from './typings' +import { requireJSON } from './require-json' +import * as types from './typings' -export const SYSTEM_ENTITIES = DUCKLING_ENTITIES +const rootPkgDirectory = path.resolve(__dirname, '..') +const packageJsonPath = path.resolve(rootPkgDirectory, 'package.json') +const assetsPath = path.resolve(rootPkgDirectory, 'assets') +const packageJson = requireJSON<{ version: string }>(packageJsonPath) +if (!packageJson) { + throw new Error('Could not find package.json at the root of nlu-engine.') +} + +const { version: pkgVersion } = packageJson + +export { SLOT_ANY, SYSTEM_ENTITIES } from './constants' -export const errors: _.Dictionary<(err: Error) => boolean> = { - isTrainingAlreadyStarted, - isTrainingCanceled +export const errors: typeof types.errors = { + TrainingAlreadyStartedError, + TrainingCanceledError, + LangServerError, + DucklingServerError } -export const makeEngine = async (config: Config, logger: Logger) => { - const { ducklingEnabled, ducklingURL, languageSources, modelCacheSize, legacyElection, cachePath } = config - const assetsPath = path.join(__dirname, '..', 'assets') - const langConfig = { ducklingEnabled, ducklingURL, languageSources, assetsPath, cachePath } - const engine = new Engine(logger, { cacheSize: modelCacheSize, legacyElection }) +export const makeEngine: typeof types.makeEngine = async (config: types.Config, logger: types.Logger) => { + const { ducklingEnabled, ducklingURL, languageURL, languageAuthToken, modelCacheSize, cachePath } = config + const langConfig: types.LanguageConfig & { assetsPath: string } = { + ducklingEnabled, + ducklingURL, + languageURL, + languageAuthToken, + assetsPath, + cachePath + } + const engine = new Engine(pkgVersion, logger, { cacheSize: modelCacheSize }) await engine.initialize(langConfig) return engine } -export const modelIdService = _modelIdService +export const modelIdService: typeof types.modelIdService = _modelIdService export { LanguageService } diff --git a/packages/nlu-engine/src/language-service/index.ts b/packages/nlu-engine/src/language-service/index.ts index c05b34ca..a08dd4de 100644 --- a/packages/nlu-engine/src/language-service/index.ts +++ b/packages/nlu-engine/src/language-service/index.ts @@ -7,15 +7,14 @@ import ms from 'ms' import os from 'os' import path from 'path' import process from 'process' -import { Logger, LanguageService as ILanguageService } from 'src/typings' +import { Logger, LanguageService as ILanguageService, InstalledModel } from 'src/typings' import { VError } from 'verror' -import toolkit from '../ml/toolkit' -import { MLToolkit } from '../ml/typings' +import * as MLToolkit from '../ml/toolkit' -import { LoadedBPEModel, LoadedFastTextModel, ModelFileInfo, ModelSet } from './typings' +import { LoadedBPEModel, LoadedFastTextModel, ModelFileInfo, ModelSet, AvailableModel } from './typings' -interface RamInfos { +type RamInfos = { free: number total: number prediction: number @@ -76,7 +75,7 @@ export default class LanguageService implements ILanguageService { } } - async initialize() { + public async initialize() { if (Object.keys(this._models).length) { throw new Error('Language Service already initialized') } @@ -98,11 +97,11 @@ export default class LanguageService implements ILanguageService { this._ready = true } - get isReady(): boolean { + public get isReady(): boolean { return this._ready } - async loadModel(lang: string) { + public async loadModel(lang: string) { if (!this._models[lang]) { this._models = { ...this._getModels(), @@ -125,7 +124,8 @@ export default class LanguageService implements ILanguageService { const fastTextModel = await this._loadFastTextModel(lang) const bpeModel = await this._loadBPEModel(lang) this._models[lang] = { fastTextModel, bpeModel } - } catch (err) { + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) this.logger?.error(`[${lang.toUpperCase()}] Error loading language. It will be unavailable.`, err) } } @@ -178,14 +178,14 @@ export default class LanguageService implements ILanguageService { throw new VError(`Could not find model '${lang}' in '${this.langDir}'`, err) } - const fastTextModel = { + const fastTextModel: AvailableModel = { name: lang, path: fastTextModelPath, sizeInMb: 0, loaded: false } - const bpeModel = { + const bpeModel: AvailableModel = { name: lang, path: bpeModelPath, sizeInMb: 0, @@ -197,7 +197,7 @@ export default class LanguageService implements ILanguageService { private async _loadFastTextModel(lang: string): Promise { const loadingAction = async (lang: string) => { - const model = new toolkit.FastText.Model(false, true, true) + const model = new MLToolkit.FastText.Model(false, true, true) const path = this._models[lang].fastTextModel.path await model.loadFromFile(path) return { model, path } @@ -215,7 +215,7 @@ export default class LanguageService implements ILanguageService { private async _loadBPEModel(lang: string): Promise { const loadingAction = async (lang) => { - const tokenizer = await toolkit.SentencePiece.createProcessor() + const tokenizer = await MLToolkit.SentencePiece.createProcessor() const path = this._models[lang].bpeModel.path tokenizer.loadModel(path) return Promise.resolve({ model: tokenizer, path }) @@ -287,7 +287,7 @@ export default class LanguageService implements ILanguageService { return Promise.all(tokens.map(await this._getQueryVectors(fastTextModel as LoadedFastTextModel))) } - public getModels() { + public getModels(): InstalledModel[] { const models = this._getModels() return Object.keys(models).map((lang) => { const loaded = this._models[lang] && this._models[lang].bpeModel.loaded && this._models[lang].fastTextModel.loaded diff --git a/packages/nlu-engine/src/language-service/typings.ts b/packages/nlu-engine/src/language-service/typings.ts index 430b660c..d9f1b1e2 100644 --- a/packages/nlu-engine/src/language-service/typings.ts +++ b/packages/nlu-engine/src/language-service/typings.ts @@ -1,27 +1,26 @@ -import { MLToolkit } from '../ml/typings' +import * as MLToolkit from '../ml/toolkit' -export interface ModelSet { +export type ModelSet = { bpeModel: AvailableModel | LoadedBPEModel fastTextModel: AvailableModel | LoadedFastTextModel } -export interface AvailableModel { +export type AvailableModel = { name: string path: string loaded: boolean + sizeInMb: number } -export interface LoadedFastTextModel extends AvailableModel { +export type LoadedFastTextModel = { model: MLToolkit.FastText.Model - sizeInMb: number -} +} & AvailableModel -export interface LoadedBPEModel extends AvailableModel { +export type LoadedBPEModel = { tokenizer: MLToolkit.SentencePiece.Processor - sizeInMb: number -} +} & AvailableModel -export interface ModelFileInfo { +export type ModelFileInfo = { domain: string langCode: string file: string diff --git a/packages/nlu-engine/src/linting.d.ts b/packages/nlu-engine/src/linting.d.ts new file mode 100644 index 00000000..365b8b98 --- /dev/null +++ b/packages/nlu-engine/src/linting.d.ts @@ -0,0 +1,117 @@ +export type DatasetReport = { + issues: DatasetIssue[] +} + +export type IssueCode = + | 'C_000' // tokens tagged with unexisting slot + | 'C_001' // slot has nonexistent entity + | 'C_002' // intent has no utterances + | 'C_003' // dataset has an unsupported language + | 'E_000' // token tagged with slot has incorrect type + | 'E_001' // utterance has incorrect language + | 'E_002' // duplicated utterances (in one or more intents) + | 'E_003' // the whole utterance is tagged as a slot + | 'W_000' // intents are overlapping + | 'I_000' // utterance contains dupplicated or untrimed spaces + +export type Raw = { raw: T } +export type Clean = { clean: T } +export type CleanOrRaw = Clean & Raw +export type Span = { start: number; end: number } + +export type IssueData = C extends 'C_000' + ? { + intent: string + utterance: string + slot: string + } + : C extends 'C_001' + ? { + intent: string + slot: string + entity: string + } + : C extends 'C_002' + ? { + intent: string + } + : C extends 'C_003' + ? { + language: string + } + : C extends 'E_000' + ? { + intent: string + utterance: Clean & { idx: number } + charPos: Clean + slot: string + entities: string[] + source: string + } + : C extends 'E_001' + ? { + intent: string + utterance: string + detectedLang: string + expectedLang: string + } + : C extends 'E_002' + ? { + intentA: string + intentB: string + utterance: string + } + : C extends 'E_003' + ? { + intent: string + utterance: string + slot: string + } + : C extends 'I_000' + ? { + intent: string + utterance: Raw & { idx: number } + charPos: Raw + } + : never + +export type IssueDefinition = { + code: C + severity: IssueSeverity + name: string +} + +export type DatasetIssue = IssueDefinition & { + id: string + message: string + data: IssueData +} + +export type LintingStatus = 'done' | 'linting-pending' | 'linting' | 'canceled' | 'errored' +export type LintingErrorType = 'lang-server' | 'duckling-server' | 'zombie-linting' | 'internal' + +export type LintingError = { + type: LintingErrorType + message: string + stack?: string +} + +export type LintingState = { + status: LintingStatus + currentCount: number + totalCount: number + error?: LintingError + issues: DatasetIssue[] +} + +export type IssueComputationSpeed = 'fastest' | 'fast' | 'slow' | 'slowest' + +export type IssueSeverity = C extends `C_${infer CodeSufix}` + ? 'critical' + : C extends `E_${infer CodeSufix}` + ? 'error' + : C extends `W_${infer CodeSufix}` + ? 'warning' + : C extends `I_${infer CodeSufix}` + ? 'info' + : never diff --git a/packages/nlu-engine/src/ml/crf/base.ts b/packages/nlu-engine/src/ml/crf/base.ts new file mode 100644 index 00000000..d256540b --- /dev/null +++ b/packages/nlu-engine/src/ml/crf/base.ts @@ -0,0 +1,85 @@ +import { Tagger as AddonTagger, makeTrainer, makeTagger } from '@botpress/node-crfsuite' +import * as ptb from '@bpinternal/ptb-schema' +import fse from 'fs-extra' +import { PipelineComponent } from 'src/component' +import { Logger } from 'src/typings' +import tmp from 'tmp' +import { MarginalPrediction, TagPrediction } from '.' +import { CRFTrainInput } from './typings' + +const PTBCRFTaggerModel = new ptb.PTBMessage('CRFTaggerModel', { + content: { type: 'bytes', id: 1, rule: 'required' } +}) + +type CRFTaggerModel = ptb.Infer + +export class CRFTagger + implements PipelineComponent { + private static _displayName = 'SVM Classifier' + private static _name = 'svm-classifier' + + private tagger: AddonTagger | undefined + + public get name() { + return CRFTagger._name + } + + public static get modelType() { + return PTBCRFTaggerModel + } + + public get modelType() { + return PTBCRFTaggerModel + } + + constructor(protected logger: Logger) {} + + public async train(input: CRFTrainInput, progressCallback: (iteration: number) => void): Promise { + const { options, elements } = input + const trainer = await makeTrainer({ debug: false }) + trainer.set_params(options) + + for (const { features, labels } of elements) { + trainer.append(features, labels) + } + + const crfModelFilename = tmp.fileSync({ postfix: '.bin' }).name + + trainer.train(crfModelFilename, (iteration) => { + progressCallback && progressCallback(iteration) + return 0 // return 1 to stop training + }) + + const content = await fse.readFile(crfModelFilename) + return { + content + } + } + + public async load({ content: crfModel }: CRFTaggerModel): Promise { + const tagger = await makeTagger() + const crfModelFn = tmp.tmpNameSync() + fse.writeFileSync(crfModelFn, crfModel) + const success = tagger.open(crfModelFn) + + if (!success) { + throw new Error('CRF Tagger could not open model.') + } + + this.tagger = tagger + } + + public async predict(xseq: string[][]): Promise { + if (!this.tagger) { + throw new Error(`${CRFTagger._displayName} must load model before calling predict.`) + } + return this.tagger.tag(xseq) + } + + public async marginal(xseq: string[][]): Promise { + if (!this.tagger) { + throw new Error(`${CRFTagger._displayName} must load model before calling marginal.`) + } + return this.tagger.marginal(xseq) + } +} diff --git a/packages/nlu-engine/src/ml/crf/index.ts b/packages/nlu-engine/src/ml/crf/index.ts index 5ecc2729..ac1407da 100644 --- a/packages/nlu-engine/src/ml/crf/index.ts +++ b/packages/nlu-engine/src/ml/crf/index.ts @@ -1,63 +1,9 @@ -import { Tagger as AddonTagger, Trainer as AddonTrainer, makeTrainer, makeTagger } from '@botpress/node-crfsuite' -import { Logger } from 'src/typings' -import tmp from 'tmp' -import { MLToolkit } from '../../ml/typings' +import { CRFTagger } from './base' +import { MultiThreadCRFTagger } from './multi-thread' -export class Trainer implements MLToolkit.CRF.Trainer { - private trainer!: AddonTrainer - private _cancelTraining = false +const isTsNode = !!process.env.TS_NODE_DEV // worker_threads do not work with ts-node - constructor(protected logger: Logger) {} +export * from './typings' - public async initialize() { - // debugging should be enabled but, this slows down crf training... TODO: find a solution - this.trainer = await makeTrainer({ debug: false }) - } - - public async train( - elements: MLToolkit.CRF.DataPoint[], - options: MLToolkit.CRF.TrainerOptions, - progressCallback: (iteration: number) => void - ): Promise { - this.trainer.set_params(options) - - for (const { features, labels } of elements) { - this.trainer.append(features, labels) - } - - const crfModelFilename = tmp.fileSync({ postfix: '.bin' }).name - - this.trainer.train(crfModelFilename, (iteration) => { - progressCallback && progressCallback(iteration) - return this._cancelTraining ? 1 : 0 - }) - - return crfModelFilename - } - - public cancelTraining() { - this._cancelTraining = true - } -} - -export class Tagger implements MLToolkit.CRF.Tagger { - private tagger!: AddonTagger - - constructor() {} - - public async initialize() { - this.tagger = await makeTagger() - } - - tag(xseq: string[][]): { probability: number; result: string[] } { - return this.tagger.tag(xseq) - } - - open(model_filename: string): boolean { - return this.tagger.open(model_filename) - } - - marginal(xseq: string[][]): { [label: string]: number }[] { - return this.tagger.marginal(xseq) - } -} +export type Tagger = CRFTagger +export const Tagger = isTsNode ? CRFTagger : MultiThreadCRFTagger diff --git a/packages/nlu-engine/src/ml/crf/multi-thread-trainer.ts b/packages/nlu-engine/src/ml/crf/multi-thread-trainer.ts deleted file mode 100644 index 2f02f5aa..00000000 --- a/packages/nlu-engine/src/ml/crf/multi-thread-trainer.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { nanoid } from 'nanoid' -import mLThreadPool from '../ml-thread-pool' -import { MLToolkit } from '../typings' -import { Trainer } from '.' - -export class MultiThreadTrainer extends Trainer { - public async train( - elements: MLToolkit.CRF.DataPoint[], - options: MLToolkit.CRF.TrainerOptions, - progressCallback: (iteration: number) => void - ) { - const id = nanoid() - const output = await mLThreadPool(this.logger).startCrfTraining(id, elements, options, progressCallback) - return output - } -} diff --git a/packages/nlu-engine/src/ml/crf/multi-thread.ts b/packages/nlu-engine/src/ml/crf/multi-thread.ts new file mode 100644 index 00000000..75859a5a --- /dev/null +++ b/packages/nlu-engine/src/ml/crf/multi-thread.ts @@ -0,0 +1,14 @@ +import { nanoid } from 'nanoid' +import mLThreadPool from '../ml-thread-pool' +import { CRFTagger } from './base' +import { CRFTrainInput } from './typings' + +export class MultiThreadCRFTagger extends CRFTagger { + public async train(input: CRFTrainInput, progressCallback: (iteration: number) => void) { + const { elements, options } = input + const id = nanoid() + const output = await mLThreadPool(this.logger).startCrfTraining(id, elements, options, progressCallback) + const bin = Buffer.from(output) + return CRFTagger.modelType.decode(bin) + } +} diff --git a/packages/nlu-engine/src/ml/crf/typings.ts b/packages/nlu-engine/src/ml/crf/typings.ts new file mode 100644 index 00000000..92956cfd --- /dev/null +++ b/packages/nlu-engine/src/ml/crf/typings.ts @@ -0,0 +1,20 @@ +export type TrainerOptions = { + [key: string]: string +} + +export type TrainProgressCallback = { + (iteration: number): void +} + +export type DataPoint = { + features: Array + labels: string[] +} + +export type CRFTrainInput = { + elements: DataPoint[] + options: TrainerOptions +} + +export type TagPrediction = { probability: number; result: string[] } +export type MarginalPrediction = { [label: string]: number } diff --git a/packages/nlu-engine/src/ml/fasttext/index.ts b/packages/nlu-engine/src/ml/fasttext/index.ts index 77ac83bb..355c602f 100644 --- a/packages/nlu-engine/src/ml/fasttext/index.ts +++ b/packages/nlu-engine/src/ml/fasttext/index.ts @@ -1,12 +1,15 @@ import { makeClassifier, makeQuery, Options, Query } from '@botpress/node-fasttext' import Bluebird from 'bluebird' import { VError } from 'verror' -import { MLToolkit } from '../typings' + +import { PredictResult, TrainArgs, TrainCommand } from './typings' const FAST_TEXT_VERBOSITY = parseInt(process.env.FAST_TEXT_VERBOSITY || '0') const FAST_TEXT_CLEANUP_MS = parseInt(process.env.FAST_TEXT_CLEANUP_MS || '60000') // 60s caching by default -export const DefaultTrainArgs: Partial = { +export * from './typings' + +export const DefaultTrainArgs: Partial = { bucket: 25000, dim: 15, epoch: 5, @@ -23,7 +26,7 @@ export const DefaultTrainArgs: Partial = { * allows to delay the loading of the model only when actually needed for prediction or query. * It also cleans up the model after 'x' ms of inactivity to free up memory. */ -export class FastTextModel implements MLToolkit.FastText.Model { +export class Model { private _modelPromise: Promise | undefined private _queryPromise: Promise | undefined private _modelTimeout: NodeJS.Timeout | undefined @@ -44,16 +47,12 @@ export class FastTextModel implements MLToolkit.FastText.Model { constructor(private lazy: boolean = true, private keepInMemory = false, private queryOnly = false) {} - cleanup() { + public cleanup() { this._modelPromise = undefined this._queryPromise = undefined } - async trainToFile( - method: MLToolkit.FastText.TrainCommand, - modelPath: string, - args: Partial - ): Promise { + public async trainToFile(method: TrainCommand, modelPath: string, args: Partial): Promise { const outPath = this._cleanPath(modelPath) const model = await makeClassifier() await model.train(method, { @@ -72,7 +71,7 @@ export class FastTextModel implements MLToolkit.FastText.Model { } } - async loadFromFile(modelPath: string): Promise { + public async loadFromFile(modelPath: string): Promise { this._modelPath = this._cleanPath(modelPath) if (!this.lazy) { if (!this.queryOnly) { @@ -83,7 +82,7 @@ export class FastTextModel implements MLToolkit.FastText.Model { } } - async predict(str: string, nbLabels: number): Promise { + public async predict(str: string, nbLabels: number): Promise { if (this.queryOnly) { throw new Error("This model is marked as Query Only, which doesn't support Prediction") } @@ -92,12 +91,12 @@ export class FastTextModel implements MLToolkit.FastText.Model { return model.predict(str, nbLabels) } - async queryWordVectors(word: string): Promise { + public async queryWordVectors(word: string): Promise { const query = await this._getQuery() return query.getWordVector(word) } - async queryNearestNeighbors(word: string, nb: number): Promise { + public async queryNearestNeighbors(word: string, nb: number): Promise { const query = await this._getQuery() const ret = await query.nn(word, nb) return ret.map((x) => x.label) @@ -137,7 +136,8 @@ export class FastTextModel implements MLToolkit.FastText.Model { await q.getWordVector('hydrate') // hydration as fastText loads models lazily resolve(q) this._resetQueryBomb() - } catch (err) { + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) reject(new VError(err, `Model = "${this.modelPath}"`)) } }) diff --git a/packages/nlu-engine/src/ml/fasttext/typings.ts b/packages/nlu-engine/src/ml/fasttext/typings.ts new file mode 100644 index 00000000..fb75271a --- /dev/null +++ b/packages/nlu-engine/src/ml/fasttext/typings.ts @@ -0,0 +1,50 @@ +export type TrainCommand = 'supervised' | 'quantize' | 'skipgram' | 'cbow' +export type Loss = 'hs' | 'softmax' + +export type TrainArgs = { + lr: number + dim: number + ws: number + epoch: number + minCount: number + minCountLabel: number + neg: number + wordNgrams: number + loss: Loss + model: string + input: string + bucket: number + minn: number + maxn: number + thread: number + lrUpdateRate: number + t: number + label: string + pretrainedVectors: string + qout: boolean + retrain: boolean + qnorm: boolean + cutoff: number + dsub: number +} + +export type PredictResult = { + label: string + value: number +} + +// export type Model = { +// cleanup: () => void +// trainToFile: (method: TrainCommand, modelPath: string, args: Partial) => Promise +// loadFromFile: (modelPath: string) => Promise +// predict: (str: string, nbLabels: number) => Promise +// queryWordVectors(word: string): Promise +// queryNearestNeighbors(word: string, nb: number): Promise +// } + +// export type ModelConstructor = { +// new (): Model +// new (lazy: boolean, keepInMemory: boolean, queryOnly: boolean): Model +// } + +// export const Model: ModelConstructor diff --git a/packages/nlu-engine/src/ml/kmeans.ts b/packages/nlu-engine/src/ml/kmeans.ts new file mode 100644 index 00000000..e41ce1da --- /dev/null +++ b/packages/nlu-engine/src/ml/kmeans.ts @@ -0,0 +1,39 @@ +import _kmeans from 'ml-kmeans' + +export type KMeansOptions = { + maxIterations?: number + tolerance?: number + withIterations?: boolean + distanceFunction?: DistanceFunction + seed?: number + initialization?: 'random' | 'kmeans++' | 'mostDistant' | number[][] +} + +export type Centroid = { + centroid: number[] + error: number + size: number +} + +// TODO convert this to class we build the source of ml-kmeans +export type KmeansResult = { + // constructor( + // clusters: number[], + // centroids: Centroid[], + // converged: boolean, + // iterations: number, + // distance: DistanceFunction + // ) + clusters: number[] + centroids: Centroid[] + iterations: number + nearest: (data: DataPoint[]) => number[] +} + +export type DataPoint = number[] + +export type DistanceFunction = (point0: DataPoint, point1: DataPoint) => number + +type KmeansFunc = (data: DataPoint[], K: number, options: KMeansOptions) => KmeansResult + +export const kmeans: KmeansFunc = _kmeans diff --git a/packages/nlu-engine/src/ml/ml-thread-pool/index.ts b/packages/nlu-engine/src/ml/ml-thread-pool/index.ts index 618da12c..41db4743 100644 --- a/packages/nlu-engine/src/ml/ml-thread-pool/index.ts +++ b/packages/nlu-engine/src/ml/ml-thread-pool/index.ts @@ -2,7 +2,7 @@ import { makeThreadPool, ThreadPool } from '@botpress/worker' import _ from 'lodash' import os from 'os' import { Logger } from 'src/typings' -import { MLToolkit } from '../typings' +import { CRF, SVM } from '../toolkit' import { ENTRY_POINT } from './thread-entry-point' import { TaskInput, TaskOutput } from './typings' @@ -26,8 +26,8 @@ class MLThreadPool { public startCrfTraining( trainId: string, - points: MLToolkit.CRF.DataPoint[], - options: MLToolkit.CRF.TrainerOptions, + points: CRF.DataPoint[], + options: CRF.TrainerOptions, progress: (p: number) => void ) { const input: TaskInput = { trainingType: 'crf', points, options } @@ -36,8 +36,8 @@ class MLThreadPool { public startSvmTraining( trainId: string, - points: MLToolkit.SVM.DataPoint[], - options: MLToolkit.SVM.SVMOptions, + points: SVM.DataPoint[], + options: SVM.SVMOptions, progress: (p: number) => void ) { const input: TaskInput = { trainingType: 'svm', points, options } diff --git a/packages/nlu-engine/src/ml/ml-thread-pool/thread-entry-point.ts b/packages/nlu-engine/src/ml/ml-thread-pool/thread-entry-point.ts index 57495789..6aa86d5b 100644 --- a/packages/nlu-engine/src/ml/ml-thread-pool/thread-entry-point.ts +++ b/packages/nlu-engine/src/ml/ml-thread-pool/thread-entry-point.ts @@ -1,6 +1,6 @@ import { makeThreadEntryPoint, TaskDefinition } from '@botpress/worker' -import { Trainer as CrfTrainer } from '../crf' -import { Trainer as SvmTrainer } from '../svm' +import { CRFTagger } from '../crf/base' +import { SVMClassifier } from '../svm/base' import { TaskInput, TaskOutput } from './typings' export const ENTRY_POINT = __filename @@ -9,22 +9,24 @@ const threadEntryPoint = makeThreadEntryPoint() const main = async () => { try { - threadEntryPoint.listenForTask(async (taskDef: TaskDefinition) => { + threadEntryPoint.listenForTask(async (taskDef: TaskDefinition) => { const { input, progress } = taskDef if (input.trainingType === 'svm') { - const trainer = new SvmTrainer(taskDef.logger) - const result = await trainer.train(input.points, input.options, progress) - return result + const svm = new SVMClassifier(taskDef.logger) + const result = await svm.train(input, progress) + const bin = SVMClassifier.modelType.encode(result) + return Buffer.from(bin) } - const trainer = new CrfTrainer(taskDef.logger) - await trainer.initialize() - const result = await trainer.train(input.points, input.options, progress) - return result + const crf = new CRFTagger(taskDef.logger) + const result = await crf.train({ elements: input.points, options: input.options }, progress) + const bin = CRFTagger.modelType.encode(result) + return Buffer.from(bin) }) await threadEntryPoint.initialize() - } catch (err) { + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) threadEntryPoint.logger.error('An unhandled error occured in the thread', err) process.exit(1) } diff --git a/packages/nlu-engine/src/ml/ml-thread-pool/typings.ts b/packages/nlu-engine/src/ml/ml-thread-pool/typings.ts index 7a9ea208..039e595e 100644 --- a/packages/nlu-engine/src/ml/ml-thread-pool/typings.ts +++ b/packages/nlu-engine/src/ml/ml-thread-pool/typings.ts @@ -1,15 +1,15 @@ -import { MLToolkit } from '../typings' +import { CRF, SVM } from '../toolkit' export type TaskInput = | { trainingType: 'svm' - points: MLToolkit.SVM.DataPoint[] - options: MLToolkit.SVM.SVMOptions + points: SVM.DataPoint[] + options: SVM.SVMOptions } | { trainingType: 'crf' - points: MLToolkit.CRF.DataPoint[] - options: MLToolkit.CRF.TrainerOptions + points: CRF.DataPoint[] + options: CRF.TrainerOptions } -export type TaskOutput = string +export type TaskOutput = Buffer diff --git a/packages/nlu-engine/src/ml/sentencepiece/index.ts b/packages/nlu-engine/src/ml/sentencepiece/index.ts index 81dfa431..5c29a216 100644 --- a/packages/nlu-engine/src/ml/sentencepiece/index.ts +++ b/packages/nlu-engine/src/ml/sentencepiece/index.ts @@ -1,6 +1 @@ -import { makeProcessor } from '@botpress/node-sentencepiece' -import { MLToolkit } from '../typings' - -export const processor: () => Promise = () => { - return makeProcessor() -} +export { makeProcessor as createProcessor, Processor } from '@botpress/node-sentencepiece' diff --git a/packages/nlu-engine/src/ml/svm/base.ts b/packages/nlu-engine/src/ml/svm/base.ts new file mode 100644 index 00000000..105301fe --- /dev/null +++ b/packages/nlu-engine/src/ml/svm/base.ts @@ -0,0 +1,199 @@ +import * as ptb from '@bpinternal/ptb-schema' +import _ from 'lodash' +import { PipelineComponent } from 'src/component' +import { Logger } from 'src/typings' +import { flattenMatrix, unflattenMatrix } from './flat-matrix' + +import { SVM } from './libsvm' +import { Data, KernelTypes, Parameters, SvmModel, SvmTypes } from './libsvm/typings' +import { PTBSVMClassifierModel, PTBSVMClassifierParams } from './serialization' +import { SVMTrainInput, Prediction, TrainProgressCallback, SVMOptions } from './typings' + +type Predictors = { + clf: SVM + labels: string[] + parameters: Parameters +} + +type Dic = _.Dictionary + +type ComponentModel = ptb.Infer + +export class SVMClassifier + implements PipelineComponent { + private static _displayName = 'SVM Classifier' + private static _name = 'svm-classifier' + + private _predictors: Predictors | undefined + + public get name() { + return SVMClassifier._name + } + + public static get modelType() { + return PTBSVMClassifierModel + } + + public get modelType() { + return PTBSVMClassifierModel + } + + constructor(protected logger: Logger) {} + + public async train(input: SVMTrainInput, callback: TrainProgressCallback | undefined): Promise { + const { points, options } = input + const vectorsLengths = _(points) + .map((p) => p.coordinates.length) + .uniq() + .value() + + if (vectorsLengths.length > 1) { + throw new Error('All vectors must be of the same size') + } + + const labels = _(points) + .map((p) => p.label) + .uniq() + .value() + + const dataset: Data[] = points.map((p) => [p.coordinates, labels.indexOf(p.label)]) + + if (labels.length < 2) { + throw new Error("SVM can't train on a dataset of only one class") + } + + const arr = (n?: number | number[]) => (_.isNumber(n) ? [n] : n) + const svm = new SVM( + { + svm_type: options && SvmTypes[options.classifier], + kernel_type: options && KernelTypes[options.kernel], + C: options && arr(options.c), + gamma: options && arr(options.gamma), + probability: options?.probability, + reduce: options?.reduce, + kFold: 4 + }, + this.logger + ) + + const seed = this._extractSeed(options) + const trainResult = await svm.train(dataset, seed, (progress) => { + if (callback && typeof callback === 'function') { + callback(progress) + } + }) + svm.free() + + const { model } = trainResult + const ser = this._serializeModel({ ...model, labels_idx: labels }) + return ser + } + + private _serializeModel = (model: SvmModel & { labels_idx: string[] }): ptb.Infer => { + const { SV, sv_coef, u, mu, sigma, ...others } = model + return { + ...others, + SV: flattenMatrix(SV), + sv_coef: flattenMatrix(sv_coef), + u: u && flattenMatrix(u), + mu, + sigma + } + } + + public load = async (serialized: ComponentModel) => { + const { labels_idx: labels, ...model } = this._deserializeModel(serialized) + const { param: parameters } = model + const clf = new SVM({ kFold: 1 }) + await clf.initialize(model) + this._predictors = { + clf, + labels, + parameters + } + } + + private _deserializeModel = (model: ComponentModel): SvmModel & { labels_idx: string[] } => { + const { SV, sv_coef, u, param, rho, probA, probB, sv_indices, label, nSV, labels_idx, ...others } = model + return { + param: this._deserializeParams(param), + SV: unflattenMatrix(SV), + sv_coef: unflattenMatrix(sv_coef), + u: u && unflattenMatrix(u), + rho: rho ?? [], + probA: probA ?? [], + probB: probB ?? [], + sv_indices: sv_indices ?? [], + label: label ?? [], + nSV: nSV ?? [], + labels_idx: labels_idx ?? [], + ...others + } + } + + private _deserializeParams = (params: ptb.Infer): Parameters => { + const { weight_label, weight, ...others } = params + return { + weight_label: weight_label ?? [], + weight: weight ?? [], + ...others + } + } + + public async predict(coordinates: number[]): Promise { + if (!this._predictors) { + throw new Error(`${SVMClassifier._displayName} must load model before calling predict.`) + } + + if (this._predictors.parameters.probability) { + return this._predictProb(this._predictors, coordinates) + } else { + return this._predictOne(this._predictors, coordinates) + } + } + + private async _predictProb(preds: Predictors, coordinates: number[]): Promise { + const results = await preds.clf.predictProbabilities(coordinates) + + const idexes = _.range(results.length) + const reducedResults = _.reduce( + idexes, + (acc: Dic, curr: number) => { + const label = this.getLabelByIdx(preds, curr).replace(/__k__\d+$/, '') + acc[label] = (acc[label] || 0) + results[curr] + return acc + }, + {} as Dic + ) + + return _.orderBy( + Object.keys(reducedResults).map((idx) => ({ label: idx, confidence: reducedResults[idx] })), + 'confidence', + 'desc' + ) + } + + private async _predictOne(preds: Predictors, coordinates: number[]): Promise { + // might simply use oneclass instead + const results = await preds.clf.predict(coordinates) + return [ + { + label: this.getLabelByIdx(preds, results), + confidence: 0 + } + ] + } + + private getLabelByIdx(preds: Predictors, idx: number): string { + idx = Math.round(idx) + if (idx < 0) { + throw new Error(`Invalid prediction, prediction must be between 0 and ${preds.labels.length}`) + } + return preds.labels[idx] + } + + private _extractSeed(options?: SVMOptions): number { + const seed = options?.seed + return seed ?? Math.round(Math.random() * 10000) + } +} diff --git a/packages/nlu-engine/src/ml/svm/flat-matrix.ts b/packages/nlu-engine/src/ml/svm/flat-matrix.ts new file mode 100644 index 00000000..fb41a1ca --- /dev/null +++ b/packages/nlu-engine/src/ml/svm/flat-matrix.ts @@ -0,0 +1,29 @@ +import * as ptb from '@bpinternal/ptb-schema' +import _ from 'lodash' + +let matrix_idx = 0 +export const PTBFlatMatrixMsg = new ptb.PTBMessage('Matrix', { + nCol: { type: 'int32', id: matrix_idx++, rule: 'required' }, + data: { type: 'double', id: matrix_idx++, rule: 'repeated' } +}) +export type PTBFlatMatrix = ptb.Infer + +export const flattenMatrix = (matrix: number[][]): PTBFlatMatrix => { + if (!matrix.length) { + return { + nCol: 0, + data: [] + } + } + + const nCol = matrix[0].length + const data = _.flatten(matrix) + return { + nCol, + data + } +} + +export const unflattenMatrix = (flatMatrix: PTBFlatMatrix): number[][] => { + return _.chunk(flatMatrix.data, flatMatrix.nCol) +} diff --git a/packages/nlu-engine/src/ml/svm/index.ts b/packages/nlu-engine/src/ml/svm/index.ts index af101f1d..8826620b 100644 --- a/packages/nlu-engine/src/ml/svm/index.ts +++ b/packages/nlu-engine/src/ml/svm/index.ts @@ -1,173 +1,9 @@ -import _ from 'lodash' -import { Logger } from 'src/typings' -import { MLToolkit } from '../typings' +import { SVMClassifier } from './base' +import { MultiThreadSVMClassifier } from './multi-thread' -import { SVM } from './libsvm' -import { Data, KernelTypes, SvmModel, SvmParameters as Parameters, SvmTypes } from './libsvm/typings' +const isTsNode = !!process.env.TS_NODE_DEV // worker_threads do not work with ts-node -type Serialized = SvmModel & { - labels_idx: string[] -} +export * from './typings' -export class Trainer implements MLToolkit.SVM.Trainer { - private model?: SvmModel - private svm?: SVM - - constructor(protected logger: Logger) {} - - cancelTraining() { - this.svm?.cancelTraining() - } - - async train( - points: MLToolkit.SVM.DataPoint[], - options?: MLToolkit.SVM.SVMOptions, - callback?: MLToolkit.SVM.TrainProgressCallback | undefined - ): Promise { - const vectorsLengths = _(points) - .map((p) => p.coordinates.length) - .uniq() - .value() - if (vectorsLengths.length > 1) { - throw new Error('All vectors must be of the same size') - } - - const labels = _(points) - .map((p) => p.label) - .uniq() - .value() - const dataset: Data[] = points.map((p) => [p.coordinates, labels.indexOf(p.label)]) - - if (labels.length < 2) { - throw new Error("SVM can't train on a dataset of only one class") - } - - const arr = (n?: number | number[]) => (_.isNumber(n) ? [n] : n) - this.svm = new SVM( - { - svm_type: options && SvmTypes[options.classifier], - kernel_type: options && KernelTypes[options.kernel], - C: options && arr(options.c), - gamma: options && arr(options.gamma), - probability: options?.probability, - reduce: options?.reduce, - kFold: 4 - }, - this.logger - ) - - const seed = this._extractSeed(options) - const trainResult = await this.svm.train(dataset, seed, (progress) => { - if (callback && typeof callback === 'function') { - callback(progress) - } - }) - this.svm.free() - - if (!trainResult) { - return '' - } - - const { model } = trainResult - this.model = model - const serialized: Serialized = { ...model, labels_idx: labels } - return JSON.stringify(serialized) - } - - isTrained(): boolean { - return !!this.model - } - - private _extractSeed(options?: MLToolkit.SVM.SVMOptions): number { - const seed = options?.seed - return seed ?? Math.round(Math.random() * 10000) - } -} - -export class Predictor implements MLToolkit.SVM.Predictor { - private clf: SVM | undefined - private labels: string[] - private parameters: Parameters | undefined - private serialized: Serialized - - constructor(json_model: string) { - const serialized: Serialized = JSON.parse(json_model) - this.labels = serialized.labels_idx - this.serialized = serialized - } - - public async initialize() { - try { - // TODO: actually check the model format - const model = _.omit(this.serialized, 'labels_idx') - this.parameters = model.param - this.clf = new SVM({ kFold: 1 }) - await this.clf.initialize(model) - } catch (err) { - this.throwModelHasChanged(err) - } - } - - private throwModelHasChanged(err?: Error) { - let errorMsg = 'SVM model format has changed. NLU needs to be retrained.' - if (err) { - errorMsg += ` Inner error is '${err}'.` - } - throw new Error(errorMsg) - } - - private getLabelByIdx(idx): string { - idx = Math.round(idx) - if (idx < 0) { - throw new Error(`Invalid prediction, prediction must be between 0 and ${this.labels.length}`) - } - - return this.labels[idx] - } - - async predict(coordinates: number[]): Promise { - if (this.parameters?.probability) { - return this._predictProb(coordinates) - } else { - return this._predictOne(coordinates) - } - } - - private async _predictProb(coordinates: number[]): Promise { - const results = await (this.clf as SVM).predictProbabilities(coordinates) - const reducedResults = _.reduce( - Object.keys(results), - (acc, curr) => { - const label = this.getLabelByIdx(curr).replace(/__k__\d+$/, '') - acc[label] = (acc[label] || 0) + results[curr] - return acc - }, - {} - ) - - return _.orderBy( - Object.keys(reducedResults).map((idx) => ({ label: idx, confidence: reducedResults[idx] })), - 'confidence', - 'desc' - ) - } - - private async _predictOne(coordinates: number[]): Promise { - // might simply use oneclass instead - const results = await (this.clf as SVM).predict(coordinates) - return [ - { - label: this.getLabelByIdx(results), - confidence: 0 - } - ] - } - - isLoaded(): boolean { - return !!this.clf - } - - getLabels(): string[] { - return _.values(this.labels) - } -} +export type Classifier = SVMClassifier +export const Classifier = isTsNode ? SVMClassifier : MultiThreadSVMClassifier diff --git a/packages/nlu-engine/src/ml/svm/libsvm/base-svm.ts b/packages/nlu-engine/src/ml/svm/libsvm/base-svm.ts index 16106890..e071e85b 100644 --- a/packages/nlu-engine/src/ml/svm/libsvm/base-svm.ts +++ b/packages/nlu-engine/src/ml/svm/libsvm/base-svm.ts @@ -8,14 +8,12 @@ import { Data } from './typings' export default class BaseSVM { private _clf: NSVM | undefined - constructor(clf?: NSVM) { - this._clf = clf - } - static restore = async (model: Model) => { const clf = await makeSvm() clf.set_model(model) // might throw - return new BaseSVM(clf) + const instance = new BaseSVM() + instance._clf = clf + return instance } train = async (dataset: Data[], random_seed: number, params: Parameters): Promise => { @@ -86,11 +84,11 @@ export default class BaseSVM { }) } - isTrained = () => { + public isTrained = () => { return !!this._clf ? this._clf.is_trained() : false } - free() { + public free() { return this._clf?.free_model() } } diff --git a/packages/nlu-engine/src/ml/svm/libsvm/grid-search/evaluators.ts b/packages/nlu-engine/src/ml/svm/libsvm/grid-search/evaluators.ts index 1a847c8d..479c58a9 100644 --- a/packages/nlu-engine/src/ml/svm/libsvm/grid-search/evaluators.ts +++ b/packages/nlu-engine/src/ml/svm/libsvm/grid-search/evaluators.ts @@ -97,7 +97,7 @@ class RegressionEvaluator implements Evaluator { } } -interface Evaluator { +type Evaluator = { compute(predictions: number[][]): Report electBest(results: GridSearchResult[]): GridSearchResult } diff --git a/packages/nlu-engine/src/ml/svm/libsvm/grid-search/typings.ts b/packages/nlu-engine/src/ml/svm/libsvm/grid-search/typings.ts index 694f8a6c..491de988 100644 --- a/packages/nlu-engine/src/ml/svm/libsvm/grid-search/typings.ts +++ b/packages/nlu-engine/src/ml/svm/libsvm/grid-search/typings.ts @@ -1,11 +1,11 @@ import { Report, SvmParameters } from '../typings' -export interface GridSearchResult { +export type GridSearchResult = { params: SvmParameters report?: Report } -export interface GridSearchProgress { +export type GridSearchProgress = { done: number total: number } diff --git a/packages/nlu-engine/src/ml/svm/libsvm/index.ts b/packages/nlu-engine/src/ml/svm/libsvm/index.ts index cd48d796..36c5e61f 100644 --- a/packages/nlu-engine/src/ml/svm/libsvm/index.ts +++ b/packages/nlu-engine/src/ml/svm/libsvm/index.ts @@ -1,7 +1,7 @@ import assert from 'assert' import _ from 'lodash' import numeric from 'numeric' -import { Logger } from 'src/typings' +import { Logger } from '../../../typings' import BaseSVM from './base-svm' import { checkConfig, defaultConfig } from './config' @@ -11,20 +11,25 @@ import { normalizeDataset, normalizeInput } from './normalize' import reduce from './reduce-dataset' import { Data, Report, SvmConfig, SvmModel } from './typings' -class TrainingCanceledError extends Error { - constructor(msg: string) { - super(msg) +class NoTrainedModelError extends Error { + constructor() { + super('Cannot predict because there is no trained model.') } } -interface TrainOutput { +type TrainOutput = { model: SvmModel report?: Report } +type Trained = { + svm: BaseSVM + model: SvmModel +} + export class SVM { private _config: SvmConfig - private _baseSvm: BaseSVM | undefined + private _trained: Trained | undefined private _retainedVariance: number = 0 private _retainedDimension: number = 0 private _initialDimension: number = 0 @@ -34,119 +39,135 @@ export class SVM { this._config = { ...checkConfig(defaultConfig(config)) } } - async initialize(model: SvmModel) { + public async initialize(model: SvmModel) { const self = this - this._baseSvm = await BaseSVM.restore(model) + const svm = await BaseSVM.restore(model) + this._trained = { + svm, + model + } + Object.entries(model.param).forEach(([key, val]) => { self._config[key] = val }) } - cancelTraining = () => { + public cancelTraining = () => { this._isCanceled = true } - train = async ( + public train = async ( dataset: Data[], seed: number, progressCb: (progress: number) => void - ): Promise => { - const self = this + ): Promise => { const dims = numeric.dim(dataset) assert(dims[0] > 0 && dims[1] === 2 && dims[2] > 0, 'dataset must be an list of [X,y] tuples') - if (!this._config.normalize) { - this._config.mu = Array(dims[2]).fill(0) - this._config.sigma = Array(dims[2]).fill(0) - } else { + let mu: number[] | undefined + let sigma: number[] | undefined + let u: number[][] | undefined + + if (this._config.normalize) { const norm = normalizeDataset(dataset) - this._config.mu = norm.mu - this._config.sigma = norm.sigma + mu = norm.mu + sigma = norm.sigma dataset = norm.dataset } if (!this._config.reduce) { - this._config.u = numeric.identity(dims[2]) this._retainedVariance = 1 this._retainedDimension = dims[2] this._initialDimension = dims[2] } else { const red = reduce(dataset, this._config.retainedVariance) - this._config.u = red.U + u = red.U this._retainedVariance = red.retainedVariance this._retainedDimension = red.newDimension this._initialDimension = red.oldDimension dataset = red.dataset } - let gridSearchResult: GridSearchResult - try { - gridSearchResult = await gridSearch(this._logger)(dataset, this._config, seed, (progress) => { - if (this._isCanceled) { - throw new TrainingCanceledError('Training was canceled') - } - progressCb(progress.done / (progress.total + 1)) - }) - } catch (err) { - if (err instanceof TrainingCanceledError) { - return - } - throw err - } + const gridSearchResult = await gridSearch(this._logger)(dataset, this._config, seed, (progress) => + progressCb(progress.done / (progress.total + 1)) + ) const { params, report } = gridSearchResult - self._baseSvm = new BaseSVM() - const model = await self._baseSvm.train(dataset, seed, params) + const svm = new BaseSVM() + const trainOutput = await svm.train(dataset, seed, params) + const model: SvmModel = { ...trainOutput, mu, sigma, u } + this._trained = { + svm, + model + } progressCb(1) - const fullModel: SvmModel = { ...model, param: { ...self._config, ...model.param } } if (report) { const fullReport: Report = { ...report, - reduce: self._config.reduce, - retainedVariance: self._retainedVariance, - retainedDimension: self._retainedDimension, - initialDimension: self._initialDimension + reduce: this._config.reduce, + retainedVariance: this._retainedVariance, + retainedDimension: this._retainedDimension, + initialDimension: this._initialDimension } - return { model: fullModel, report: fullReport } + return { model, report: fullReport } } - return { model: fullModel } + return { model } } - free = () => { - this._baseSvm?.free() + public free = () => { + this._trained?.svm.free() } - isTrained = () => { - return !!this._baseSvm ? this._baseSvm.isTrained() : false + public isTrained = () => { + return !!this._trained ? this._trained.svm.isTrained() : false } - predict = (x: number[]) => { - assert(this.isTrained()) - return (this._baseSvm as BaseSVM).predict(this._format(x)) + public predict = (x: number[]) => { + if (!this._trained) { + throw new NoTrainedModelError() + } + const { svm, model } = this._trained + const formattedInput = this._format(model, x) + return svm.predict(formattedInput) } - predictSync = (x: number[]) => { - assert(this.isTrained()) - return (this._baseSvm as BaseSVM).predictSync(this._format(x)) + public predictSync = (x: number[]) => { + if (!this._trained) { + throw new NoTrainedModelError() + } + const { svm, model } = this._trained + const formattedInput = this._format(model, x) + return svm.predictSync(formattedInput) } - predictProbabilities = (x: number[]) => { - assert(this.isTrained()) - return (this._baseSvm as BaseSVM).predictProbabilities(this._format(x)) + public predictProbabilities = (x: number[]) => { + if (!this._trained) { + throw new NoTrainedModelError() + } + const { svm, model } = this._trained + const formattedInput = this._format(model, x) + return svm.predictProbabilities(formattedInput) } - predictProbabilitiesSync = (x: number[]) => { - assert(this.isTrained()) - return (this._baseSvm as BaseSVM).predictProbabilitiesSync(this._format(x)) + public predictProbabilitiesSync = (x: number[]) => { + if (!this._trained) { + throw new NoTrainedModelError() + } + const { svm, model } = this._trained + const formattedInput = this._format(model, x) + return svm.predictProbabilitiesSync(formattedInput) } - private _format = (x: number[]) => { - const mu = this._config.mu as number[] - const sigma = this._config.sigma as number[] - const u = this._config.u as number[][] - const xNorm = normalizeInput(x, mu, sigma) - return numeric.dot(xNorm, u) as number[] + private _format = (model: SvmModel, x: number[]) => { + const { u, mu, sigma } = model + if (mu && sigma) { + x = normalizeInput(x, mu, sigma) + } + if (u) { + x = numeric.dot(x, u) as number[] + } + return x } } diff --git a/packages/nlu-engine/src/ml/svm/libsvm/kfold/domain/jest-helpers.ts b/packages/nlu-engine/src/ml/svm/libsvm/kfold/domain/jest-helpers.ts index eea593d8..4be21cd2 100644 --- a/packages/nlu-engine/src/ml/svm/libsvm/kfold/domain/jest-helpers.ts +++ b/packages/nlu-engine/src/ml/svm/libsvm/kfold/domain/jest-helpers.ts @@ -2,6 +2,7 @@ import { Domain } from '.' declare global { namespace jest { + // eslint-disable-next-line @typescript-eslint/consistent-type-definitions interface Matchers { toEqualDomain(d: Domain): CustomMatcherResult toIntersect(d: Domain): CustomMatcherResult diff --git a/packages/nlu-engine/src/ml/svm/libsvm/kfold/domain/typings.ts b/packages/nlu-engine/src/ml/svm/libsvm/kfold/domain/typings.ts index 4b33d49d..81415869 100644 --- a/packages/nlu-engine/src/ml/svm/libsvm/kfold/domain/typings.ts +++ b/packages/nlu-engine/src/ml/svm/libsvm/kfold/domain/typings.ts @@ -1,7 +1,7 @@ export type Pair = [T, T] export type SerializedDomain = number | Pair -export interface Domain { +export type Domain = { format(): string includes(k: number): boolean isEqual(dom: Domain): boolean @@ -13,13 +13,13 @@ export interface Domain { difference(dom: ContinuousDomain): SparsedDomain } -export interface ContinuousDomain extends Domain { +export type ContinuousDomain = { readonly min: number readonly max: number clone(): ContinuousDomain -} +} & Domain -export interface SparsedDomain extends Domain { +export type SparsedDomain = { readonly parts: ContinuousDomain[] clone(): SparsedDomain -} +} & Domain diff --git a/packages/nlu-engine/src/ml/svm/libsvm/kfold/krange.test.ts b/packages/nlu-engine/src/ml/svm/libsvm/kfold/krange.test.ts index 532a7726..5f51b4b9 100644 --- a/packages/nlu-engine/src/ml/svm/libsvm/kfold/krange.test.ts +++ b/packages/nlu-engine/src/ml/svm/libsvm/kfold/krange.test.ts @@ -1,9 +1,9 @@ -import { Domain } from './domain' import './domain/jest-helpers' import _ from 'lodash' import { Data } from '../typings' import { BaseKFold } from './base' +import { Domain } from './domain' import { StratifiedKFold } from './stratified' const x: Data = [[0, 0], 1] diff --git a/packages/nlu-engine/src/ml/svm/libsvm/kfold/typings.ts b/packages/nlu-engine/src/ml/svm/libsvm/kfold/typings.ts index 9371a33e..e1e379ca 100644 --- a/packages/nlu-engine/src/ml/svm/libsvm/kfold/typings.ts +++ b/packages/nlu-engine/src/ml/svm/libsvm/kfold/typings.ts @@ -3,12 +3,12 @@ import { Domain } from './domain' export type Fold = Data[] -export interface TrainTestSplit { +export type TrainTestSplit = { train: Data[] test: Data[] } -export interface KFold { +export type KFold = { kfold(dataset: Data[], k: number): Fold[] krange(dataset: Data[]): Domain } diff --git a/packages/nlu-engine/src/ml/svm/libsvm/kfold/utils.ts b/packages/nlu-engine/src/ml/svm/libsvm/kfold/utils.ts index 4041f354..36ac4514 100644 --- a/packages/nlu-engine/src/ml/svm/libsvm/kfold/utils.ts +++ b/packages/nlu-engine/src/ml/svm/libsvm/kfold/utils.ts @@ -1,7 +1,7 @@ import _ from 'lodash' import { Data } from '../typings' -export interface ClassCount { +export type ClassCount = { label: number count: number } diff --git a/packages/nlu-engine/src/ml/svm/libsvm/normalize.ts b/packages/nlu-engine/src/ml/svm/libsvm/normalize.ts index 337d17d0..de4cca63 100644 --- a/packages/nlu-engine/src/ml/svm/libsvm/normalize.ts +++ b/packages/nlu-engine/src/ml/svm/libsvm/normalize.ts @@ -4,20 +4,18 @@ import numeric from 'numeric' import { Data } from './typings' -export function normalizeDataset(dataset: Data[], mu?, sigma?) { +export function normalizeDataset(dataset: Data[], p_mu?: number[], p_sigma?: number[]) { assert(dataset instanceof Array, 'dataset must be an list of [X,y] tuples') assert(dataset.length > 0, 'dataset cannot be empty') - const X = dataset.map((ex) => { - return ex[0] - }), - n = numeric.dim(X)[0] || 0, - m = numeric.dim(X)[1] || 0 + const X = dataset.map((s) => s[0]) + const n = numeric.dim(X)[0] || 0 + const m = numeric.dim(X)[1] || 0 assert(m > 0, 'number of features must be gt 0') - mu = mu || _.range(m).map((i) => _.mean(X.map((x) => x[i] || 0))) - sigma = sigma || _.range(m).map((i) => std(X.map((x) => x[i] || 0))) + const mu = p_mu || _.range(m).map((i) => _.mean(X.map((x) => x[i] || 0))) + const sigma = p_sigma || _.range(m).map((i) => std(X.map((x) => x[i] || 0))) return { dataset: dataset.map((l) => [normalizeInput(l[0], mu, sigma), l[1]] as Data), diff --git a/packages/nlu-engine/src/ml/svm/libsvm/typings.ts b/packages/nlu-engine/src/ml/svm/libsvm/typings.ts index 61b6ac36..e42db735 100644 --- a/packages/nlu-engine/src/ml/svm/libsvm/typings.ts +++ b/packages/nlu-engine/src/ml/svm/libsvm/typings.ts @@ -1,4 +1,4 @@ -export interface Model { +export type Model = { param: Parameters nr_class: number l: number @@ -15,7 +15,7 @@ export interface Model { export type GridSearchParameters = 'C' | 'gamma' | 'degree' | 'nu' | 'p' | 'coef0' -export interface OtherParameters { +export type OtherParameters = { svm_type: number kernel_type: number cache_size: number @@ -29,20 +29,25 @@ export interface OtherParameters { export type Parameters = Record & OtherParameters -interface LibConfig { +type LibConfig = { kFold: number normalize: boolean reduce: boolean retainedVariance: number +} + +type LibModel = { mu?: number[] sigma?: number[] u?: number[][] } + export type SvmConfig = Record & OtherParameters & LibConfig -export type SvmModel = Model & { - param: SvmParameters -} +export type SvmModel = Model & + LibModel & { + param: Parameters + } export type SvmParameters = Parameters & LibConfig @@ -50,14 +55,14 @@ export type Data = [number[], number] export type Report = (ClassificationReport | RegressionReport) & Partial -export interface ReductionReport { +export type ReductionReport = { reduce: boolean retainedVariance: number retainedDimension: number initialDimension: number } -export interface ClassificationReport { +export type ClassificationReport = { accuracy: number fscore: any recall: any @@ -66,7 +71,7 @@ export interface ClassificationReport { size: any } -export interface RegressionReport { +export type RegressionReport = { mse: any std: number mean: any diff --git a/packages/nlu-engine/src/ml/svm/multi-thread-trainer.ts b/packages/nlu-engine/src/ml/svm/multi-thread-trainer.ts deleted file mode 100644 index 129c47a2..00000000 --- a/packages/nlu-engine/src/ml/svm/multi-thread-trainer.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { nanoid } from 'nanoid' -import mLThreadPool from '../ml-thread-pool' -import { MLToolkit } from '../typings' -import { Trainer } from '.' - -export class MultiThreadTrainer extends Trainer { - public async train( - elements: MLToolkit.SVM.DataPoint[], - options: MLToolkit.SVM.SVMOptions, - progressCallback: (iteration: number) => void - ) { - const id = nanoid() - const output = await mLThreadPool(this.logger).startSvmTraining(id, elements, options, progressCallback) - return output - } -} diff --git a/packages/nlu-engine/src/ml/svm/multi-thread.ts b/packages/nlu-engine/src/ml/svm/multi-thread.ts new file mode 100644 index 00000000..7f83861a --- /dev/null +++ b/packages/nlu-engine/src/ml/svm/multi-thread.ts @@ -0,0 +1,14 @@ +import { nanoid } from 'nanoid' +import mLThreadPool from '../ml-thread-pool' +import { SVMClassifier } from './base' +import { SVMTrainInput } from './typings' + +export class MultiThreadSVMClassifier extends SVMClassifier { + public async train(input: SVMTrainInput, progressCallback: (iteration: number) => void) { + const { points, options } = input + const id = nanoid() + const output = await mLThreadPool(this.logger).startSvmTraining(id, points, options, progressCallback) + const bin = Buffer.from(output) + return SVMClassifier.modelType.decode(bin) + } +} diff --git a/packages/nlu-engine/src/ml/svm/serialization.ts b/packages/nlu-engine/src/ml/svm/serialization.ts new file mode 100644 index 00000000..ac373a68 --- /dev/null +++ b/packages/nlu-engine/src/ml/svm/serialization.ts @@ -0,0 +1,43 @@ +import * as ptb from '@bpinternal/ptb-schema' +import { PTBFlatMatrixMsg } from './flat-matrix' + +let param_idx = 0 +export const PTBSVMClassifierParams = new ptb.PTBMessage('SVMClassifierParameters', { + svm_type: { type: 'int32', id: param_idx++, rule: 'required' }, + kernel_type: { type: 'int32', id: param_idx++, rule: 'required' }, + cache_size: { type: 'double', id: param_idx++, rule: 'required' }, + eps: { type: 'double', id: param_idx++, rule: 'required' }, + nr_weight: { type: 'int32', id: param_idx++, rule: 'required' }, + weight_label: { type: 'int32', id: param_idx++, rule: 'repeated' }, + weight: { type: 'double', id: param_idx++, rule: 'repeated' }, + shrinking: { type: 'bool', id: param_idx++, rule: 'required' }, + probability: { type: 'bool', id: param_idx++, rule: 'required' }, + C: { type: 'double', id: param_idx++, rule: 'required' }, + gamma: { type: 'double', id: param_idx++, rule: 'required' }, + degree: { type: 'int32', id: param_idx++, rule: 'required' }, + nu: { type: 'double', id: param_idx++, rule: 'required' }, + p: { type: 'double', id: param_idx++, rule: 'required' }, + coef0: { type: 'double', id: param_idx++, rule: 'required' } +}) + +let model_idx = 0 +export const PTBSVMClassifierModel = new ptb.PTBMessage('SVMClassifierModel', { + param: { type: PTBSVMClassifierParams, id: model_idx++, rule: 'required' }, + nr_class: { type: 'int32', id: model_idx++, rule: 'required' }, + l: { type: 'int32', id: model_idx++, rule: 'required' }, + SV: { type: PTBFlatMatrixMsg, id: model_idx++, rule: 'required' }, + sv_coef: { type: PTBFlatMatrixMsg, id: model_idx++, rule: 'required' }, + rho: { type: 'double', id: model_idx++, rule: 'repeated' }, + probA: { type: 'double', id: model_idx++, rule: 'repeated' }, + probB: { type: 'double', id: model_idx++, rule: 'repeated' }, + sv_indices: { type: 'int32', id: model_idx++, rule: 'repeated' }, + label: { type: 'int32', id: model_idx++, rule: 'repeated' }, + nSV: { type: 'int32', id: model_idx++, rule: 'repeated' }, + free_sv: { type: 'int32', id: model_idx++, rule: 'required' }, + + mu: { type: 'double', id: param_idx++, rule: 'repeated' }, + sigma: { type: 'double', id: param_idx++, rule: 'repeated' }, + u: { type: PTBFlatMatrixMsg, id: param_idx++, rule: 'optional' }, + + labels_idx: { type: 'string', id: model_idx++, rule: 'repeated' } +}) diff --git a/packages/nlu-engine/src/ml/svm/svm.test.ts b/packages/nlu-engine/src/ml/svm/svm.test.ts index 82b8fe7b..70140daa 100644 --- a/packages/nlu-engine/src/ml/svm/svm.test.ts +++ b/packages/nlu-engine/src/ml/svm/svm.test.ts @@ -1,35 +1,33 @@ import { Logger } from 'src/typings' -import { Predictor, Trainer } from '.' -import { MLToolkit } from '../typings' +import { SVMOptions } from '.' +import { SVMClassifier } from './base' +import { DataPoint } from './typings' const SEED = 42 -/** - * WARNING: - * If test fails it may be because of your Linux distribution. - * Try editing 'jest-before.ts' file your distribution. - */ +const dummyLogger: Partial = { debug: () => {} } +const dummyCallback = () => {} + describe('SVM', () => { test('Trainer should work with basic problems', async () => { - // prettier-ignore - const line: MLToolkit.SVM.DataPoint[] = [ + const line: DataPoint[] = [ { coordinates: [0, 0], label: 'A' }, { coordinates: [0, 1], label: 'A' }, { coordinates: [1, 0], label: 'B' }, { coordinates: [1, 1], label: 'B' } ] - const dummyLogger: Partial = { debug: () => {} } - const trainer = new Trainer(dummyLogger as Logger) - const mod = await trainer.train(line, { classifier: 'C_SVC', kernel: 'LINEAR', c: 1, seed: SEED }) + const svm = new SVMClassifier(dummyLogger as Logger) + + const options: SVMOptions = { classifier: 'C_SVC', kernel: 'LINEAR', c: 1, seed: SEED } + const mod = await svm.train({ points: line, options }, dummyCallback) - const predictor = new Predictor(mod) - await predictor.initialize() + await svm.load(mod) - const r1 = await predictor.predict([0, 0]) - const r2 = await predictor.predict([1, 1]) - const r3 = await predictor.predict([0, 1]) - const r4 = await predictor.predict([1, 0]) + const r1 = await svm.predict([0, 0]) + const r2 = await svm.predict([1, 1]) + const r3 = await svm.predict([0, 1]) + const r4 = await svm.predict([1, 0]) expect(r1[0].label).toBe('A') expect(r2[0].label).toBe('B') @@ -38,20 +36,18 @@ describe('SVM', () => { }) test('Trainer should throw when vectors have different lengths', async () => { - // prettier-ignore - const line: MLToolkit.SVM.DataPoint[] = [ + const line: DataPoint[] = [ { coordinates: [0, 0, 0], label: 'A' }, { coordinates: [0, 1], label: 'A' }, { coordinates: [1, 0], label: 'B' }, { coordinates: [1, 1], label: 'B' } ] - const dummyLogger: Partial = { debug: () => {} } - const trainer = new Trainer(dummyLogger as Logger) - let errorThrown = false try { - await trainer.train(line, { classifier: 'C_SVC', kernel: 'LINEAR', c: [1], seed: SEED }) + const svm = new SVMClassifier(dummyLogger as Logger) + const options: SVMOptions = { classifier: 'C_SVC', kernel: 'LINEAR', c: [1], seed: SEED } + await svm.train({ points: line, options }, dummyCallback) } catch (err) { errorThrown = true } diff --git a/packages/nlu-engine/src/ml/svm/typings.ts b/packages/nlu-engine/src/ml/svm/typings.ts new file mode 100644 index 00000000..d0b46db8 --- /dev/null +++ b/packages/nlu-engine/src/ml/svm/typings.ts @@ -0,0 +1,28 @@ +export type SVMOptions = { + classifier: 'C_SVC' | 'NU_SVC' | 'ONE_CLASS' | 'EPSILON_SVR' | 'NU_SVR' + kernel: 'LINEAR' | 'POLY' | 'RBF' | 'SIGMOID' + seed: number + c?: number | number[] + gamma?: number | number[] + probability?: boolean + reduce?: boolean +} + +export type DataPoint = { + label: string + coordinates: number[] +} + +export type Prediction = { + label: string + confidence: number +} + +export type TrainProgressCallback = { + (progress: number): void +} + +export type SVMTrainInput = { + points: DataPoint[] + options: SVMOptions +} diff --git a/packages/nlu-engine/src/ml/toolkit.ts b/packages/nlu-engine/src/ml/toolkit.ts index 1d2eed4a..c580b3d3 100644 --- a/packages/nlu-engine/src/ml/toolkit.ts +++ b/packages/nlu-engine/src/ml/toolkit.ts @@ -1,30 +1,5 @@ -import _ from 'lodash' -import kmeans from 'ml-kmeans' - -import { Tagger, Trainer as CRFTrainer } from './crf' -import { MultiThreadTrainer as CRFMultiThreadTrainer } from './crf/multi-thread-trainer' -import { FastTextModel } from './fasttext' -import { processor } from './sentencepiece' -import { Predictor, Trainer as SVMTrainer } from './svm' -import { MultiThreadTrainer as SVMMultiThreadTrainer } from './svm/multi-thread-trainer' -import { MLToolkit as IMLToolkit } from './typings' - -const isTsNode = !!process.env.TS_NODE_DEV // worker_threads do not work with ts-node - -const MLToolkit: typeof IMLToolkit = { - KMeans: { - kmeans - }, - CRF: { - Tagger, - Trainer: isTsNode ? CRFTrainer : CRFMultiThreadTrainer - }, - SVM: { - Predictor, - Trainer: isTsNode ? SVMTrainer : SVMMultiThreadTrainer - }, - FastText: { Model: FastTextModel }, - SentencePiece: { createProcessor: processor } -} - -export default MLToolkit +export * as CRF from './crf' +export * as FastText from './fasttext' +export * as KMeans from './kmeans' +export * as SentencePiece from './sentencepiece' +export * as SVM from './svm' diff --git a/packages/nlu-engine/src/ml/typings.d.ts b/packages/nlu-engine/src/ml/typings.d.ts deleted file mode 100644 index 8c527589..00000000 --- a/packages/nlu-engine/src/ml/typings.d.ts +++ /dev/null @@ -1,170 +0,0 @@ -export namespace MLToolkit { - export namespace FastText { - export type TrainCommand = 'supervised' | 'quantize' | 'skipgram' | 'cbow' - export type Loss = 'hs' | 'softmax' - - export interface TrainArgs { - lr: number - dim: number - ws: number - epoch: number - minCount: number - minCountLabel: number - neg: number - wordNgrams: number - loss: Loss - model: string - input: string - bucket: number - minn: number - maxn: number - thread: number - lrUpdateRate: number - t: number - label: string - pretrainedVectors: string - qout: boolean - retrain: boolean - qnorm: boolean - cutoff: number - dsub: number - } - - export interface PredictResult { - label: string - value: number - } - - export interface Model { - cleanup: () => void - trainToFile: (method: TrainCommand, modelPath: string, args: Partial) => Promise - loadFromFile: (modelPath: string) => Promise - predict: (str: string, nbLabels: number) => Promise - queryWordVectors(word: string): Promise - queryNearestNeighbors(word: string, nb: number): Promise - } - - export interface ModelConstructor { - new (): Model - new (lazy: boolean, keepInMemory: boolean, queryOnly: boolean): Model - } - - export const Model: ModelConstructor - } - - export namespace KMeans { - export interface KMeansOptions { - maxIterations?: number - tolerance?: number - withIterations?: boolean - distanceFunction?: DistanceFunction - seed?: number - initialization?: 'random' | 'kmeans++' | 'mostDistant' | number[][] - } - - export interface Centroid { - centroid: number[] - error: number - size: number - } - - // TODO convert this to class we build the source of ml-kmeans - export interface KmeansResult { - // constructor( - // clusters: number[], - // centroids: Centroid[], - // converged: boolean, - // iterations: number, - // distance: DistanceFunction - // ) - clusters: number[] - centroids: Centroid[] - iterations: number - nearest: (data: DataPoint[]) => number[] - } - - export type DataPoint = number[] - - export type DistanceFunction = (point0: DataPoint, point1: DataPoint) => number - - export const kmeans: (data: DataPoint[], K: number, options: KMeansOptions) => KmeansResult - } - - export namespace SVM { - export interface SVMOptions { - classifier: 'C_SVC' | 'NU_SVC' | 'ONE_CLASS' | 'EPSILON_SVR' | 'NU_SVR' - kernel: 'LINEAR' | 'POLY' | 'RBF' | 'SIGMOID' - seed: number - c?: number | number[] - gamma?: number | number[] - probability?: boolean - reduce?: boolean - } - - export interface DataPoint { - label: string - coordinates: number[] - } - - export interface Prediction { - label: string - confidence: number - } - - export interface TrainProgressCallback { - (progress: number): void - } - - export class Trainer { - constructor(logger: Logger) - train(points: DataPoint[], options?: SVMOptions, callback: TrainProgressCallback): Promise - isTrained(): boolean - } - - export class Predictor { - constructor(model: string) - initialize(): Promise - predict(coordinates: number[]): Promise - isLoaded(): boolean - getLabels(): string[] - } - } - - export namespace CRF { - export class Tagger { - initialize(): Promise - tag(xseq: Array): { probability: number; result: string[] } - open(model_filename: string): boolean - marginal(xseq: Array): { [label: string]: number }[] - } - - export interface TrainerOptions { - [key: string]: string - } - - export interface TrainProgressCallback { - (iteration: number): void - } - - interface DataPoint { - features: Array - labels: string[] - } - - export class Trainer { - constructor(logger: Logger) - initialize(): Promise - train(elements: DataPoint[], options: TrainerOptions, progressCallback: TrainProgressCallback): Promise - } - } - - export namespace SentencePiece { - export interface Processor { - loadModel: (modelPath: string) => void - encode: (inputText: string) => string[] - decode: (pieces: string[]) => string - } - - export const createProcessor: () => Promise - } -} diff --git a/packages/nlu-engine/src/model-id-service.test.ts b/packages/nlu-engine/src/model-id-service.test.ts index 3f276b8e..61eb3956 100644 --- a/packages/nlu-engine/src/model-id-service.test.ts +++ b/packages/nlu-engine/src/model-id-service.test.ts @@ -1,5 +1,5 @@ -import modelIdService, { HALF_MD5_REG } from './model-id-service' import { IntentDefinition, EntityDefinition, Specifications } from 'src/typings' +import modelIdService, { HALF_MD5_REG } from './model-id-service' import { ModelIdArgs } from './typings' const intents: IntentDefinition[] = [ @@ -21,7 +21,7 @@ const entities: EntityDefinition[] = [ ] const specifications: Specifications = { - nluVersion: '2.0.0', + engineVersion: '2.0.0', languageServer: { dimensions: 300, domain: 'bp', diff --git a/packages/nlu-engine/src/model-id-service.ts b/packages/nlu-engine/src/model-id-service.ts index 25c3ff4a..07694c3a 100644 --- a/packages/nlu-engine/src/model-id-service.ts +++ b/packages/nlu-engine/src/model-id-service.ts @@ -1,19 +1,10 @@ -import crypto from 'crypto' import _ from 'lodash' import { EntityDefinition, IntentDefinition, Specifications } from 'src/typings' import { ModelId, ModelIdArgs, ModelIdService } from './typings' +import { halfmd5 } from './utils/half-md5' export const HALF_MD5_REG = /^[a-fA-F0-9]{16}$/ -const MD5_NIBBLES_SIZE = 32 // (128 bits/hash / 8 bits/byte) * 2 nibbles/byte === 32 nibbles/hash -export const halfmd5 = (text: string) => { - return crypto - .createHash('md5') - .update(text) - .digest('hex') - .slice(MD5_NIBBLES_SIZE / 2) -} - const toString = (modelId: ModelId) => { const { contentHash, specificationHash, languageCode: lang, seed } = modelId return `${contentHash}.${specificationHash}.${seed}.${lang}` diff --git a/packages/nlu-engine/src/require-json.ts b/packages/nlu-engine/src/require-json.ts new file mode 100644 index 00000000..acb33420 --- /dev/null +++ b/packages/nlu-engine/src/require-json.ts @@ -0,0 +1,6 @@ +export const requireJSON = (filePath: string): T | undefined => { + try { + const fileContent = require(filePath) + return fileContent + } catch (err) {} +} diff --git a/packages/nlu-engine/src/typings.d.ts b/packages/nlu-engine/src/typings.d.ts index 12bcc77e..c9fcedab 100644 --- a/packages/nlu-engine/src/typings.d.ts +++ b/packages/nlu-engine/src/typings.d.ts @@ -1,45 +1,56 @@ +import { ErrorType as LangServerErrorType, LangError as SerializedLangError } from '@botpress/lang-client' +import * as linting from './linting' + export const SYSTEM_ENTITIES: string[] -export const errors: { - isTrainingAlreadyStarted: (err: Error) => boolean - isTrainingCanceled: (err: Error) => boolean +export namespace errors { + export class TrainingAlreadyStartedError extends Error {} + export class TrainingCanceledError extends Error {} + export class LangServerError extends Error { + public code: number + public type: LangServerErrorType + constructor(serializedError: SerializedLangError) + } + export class DucklingServerError extends Error { + constructor(message: string, stack?: string) + } } export const makeEngine: (config: Config, logger: Logger) => Promise export const modelIdService: ModelIdService +export type InstalledModel = { + lang: string + loaded: boolean +} + export class LanguageService { constructor(dim: number, domain: string, langDir: string, logger?: Logger) isReady: boolean dim: number domain: string - initialize(): Promise - loadModel(lang: string): Promise - tokenize(utterances: string[], lang: string): Promise - vectorize(tokens: string[], lang: string): Promise - getModels() - remove(lang: string) + public initialize(): Promise + public loadModel(lang: string): Promise + public tokenize(utterances: string[], lang: string): Promise + public vectorize(tokens: string[], lang: string): Promise + public getModels(): InstalledModel[] + public remove(lang: string): void } -export interface Config extends LanguageConfig { +export type Config = { modelCacheSize: string - legacyElection: boolean -} +} & LanguageConfig -export interface LanguageConfig { +export type LanguageConfig = { ducklingURL: string ducklingEnabled: boolean - languageSources: LanguageSource[] + languageURL: string + languageAuthToken?: string cachePath: string } -export interface LanguageSource { - endpoint: string - authToken?: string -} - -export interface Logger { +export type Logger = { debug: (msg: string) => void info: (msg: string) => void warning: (msg: string, err?: Error) => void @@ -47,72 +58,82 @@ export interface Logger { sub: (namespace: string) => Logger } -export interface ModelIdArgs extends TrainInput { +export type ModelIdArgs = { specifications: Specifications -} +} & TrainInput -export interface TrainingOptions { +export type TrainingProgressCb = (p: number) => void +export type TrainingOptions = { progressCallback: (x: number) => void - previousModel: ModelId | undefined minProgressHeartbeat: number } -export interface Engine { - getHealth: () => Health +export type LintingProgressCb = ( + current: number, + total: number, + issues: linting.DatasetIssue[] +) => void | Promise + +export type LintingOptions = { + progressCallback: LintingProgressCb + minSpeed: linting.IssueComputationSpeed + minSeverity: linting.IssueSeverity + runInMainProcess: boolean +} + +export type Engine = { getLanguages: () => string[] getSpecifications: () => Specifications + validateModel(serialized: Model): void loadModel: (model: Model) => Promise unloadModel: (modelId: ModelId) => void hasModel: (modelId: ModelId) => boolean - train: (trainSessionId: string, trainSet: TrainInput, options?: Partial) => Promise - cancelTraining: (trainSessionId: string) => Promise + train: (trainingId: string, trainSet: TrainInput, options?: Partial) => Promise + cancelTraining: (trainingId: string) => Promise + + lint: (lintingId: string, trainSet: TrainInput, options?: Partial) => Promise + cancelLinting: (lintingId: string) => Promise + getIssueDetails: (code: C) => linting.IssueDefinition | undefined detectLanguage: (text: string, modelByLang: { [key: string]: ModelId }) => Promise predict: (text: string, modelId: ModelId) => Promise } -export interface ModelIdService { +export type ModelIdService = { toString: (modelId: ModelId) => string // to use ModelId as a key - areSame: (id1: ModelId, id2: ModelId) => boolean fromString: (stringId: string) => ModelId // to parse information from a key + areSame: (id1: ModelId, id2: ModelId) => boolean isId: (m: string) => boolean makeId: (factors: ModelIdArgs) => ModelId briefId: (factors: Partial) => Partial // makes incomplete Id from incomplete information halfmd5: (str: string) => string } -export interface ModelId { +export type ModelId = { specificationHash: string // represents the nlu engine that was used to train the model contentHash: string // represents the intent and entity definitions the model was trained with seed: number // number to seed the random number generators used during nlu training languageCode: string // language of the model } -export interface Model { +export type Model = { id: ModelId startedAt: Date finishedAt: Date - data: { - input: string - output: string - } + data: Buffer } -export interface Specifications { - nluVersion: string // semver string - languageServer: { - dimensions: number - domain: string - version: string // semver string - } +export type LangServerSpecs = { + dimensions: number + domain: string + version: string } -export interface Health { - isEnabled: boolean - validProvidersCount: number - validLanguages: string[] +export type Specifications = { + engineVersion: string + languageServer: LangServerSpecs } /** @@ -121,26 +142,26 @@ export interface Health { * ################################## */ -export interface TrainInput { +export type TrainInput = { language: string intents: IntentDefinition[] entities: EntityDefinition[] seed: number } -export interface IntentDefinition { +export type IntentDefinition = { name: string contexts: string[] utterances: string[] slots: SlotDefinition[] } -export interface SlotDefinition { +export type SlotDefinition = { name: string entities: string[] } -export interface ListEntityDefinition { +export type ListEntityDefinition = { name: string type: 'list' values: { name: string; synonyms: string[] }[] @@ -149,7 +170,7 @@ export interface ListEntityDefinition { sensitive?: boolean } -export interface PatternEntityDefinition { +export type PatternEntityDefinition = { name: string type: 'pattern' regex: string @@ -170,15 +191,15 @@ export type EntityDefinition = ListEntityDefinition | PatternEntityDefinition */ export type TrainingStatus = 'done' | 'training-pending' | 'training' | 'canceled' | 'errored' -export type TrainingErrorType = 'already-started' | 'unknown' +export type TrainingErrorType = 'already-started' | 'internal' -export interface TrainingError { +export type TrainingError = { type: TrainingErrorType message: string stackTrace?: string } -export interface TrainingProgress { +export type TrainingProgress = { status: TrainingStatus progress: number error?: TrainingError @@ -189,7 +210,7 @@ export interface TrainingProgress { * ############ PREDICTION ############ * #################################### */ -export interface PredictOutput { +export type PredictOutput = { entities: EntityPrediction[] contexts: ContextPrediction[] spellChecked: string @@ -197,7 +218,7 @@ export interface PredictOutput { export type EntityType = 'pattern' | 'list' | 'system' -export interface EntityPrediction { +export type EntityPrediction = { name: string type: string // ex: ['custom.list.fruits', 'system.time'] value: string @@ -210,21 +231,21 @@ export interface EntityPrediction { sensitive?: boolean } -export interface ContextPrediction { +export type ContextPrediction = { name: string oos: number confidence: number intents: IntentPrediction[] } -export interface IntentPrediction { +export type IntentPrediction = { name: string confidence: number slots: SlotPrediction[] extractor: string } -export interface SlotPrediction { +export type SlotPrediction = { name: string value: string confidence: number diff --git a/packages/nlu-engine/src/utils/half-md5.ts b/packages/nlu-engine/src/utils/half-md5.ts new file mode 100644 index 00000000..ffb0dd58 --- /dev/null +++ b/packages/nlu-engine/src/utils/half-md5.ts @@ -0,0 +1,10 @@ +import crypto from 'crypto' + +const MD5_NIBBLES_SIZE = 32 // (128 bits/hash / 8 bits/byte) * 2 nibbles/byte === 32 nibbles/hash +export const halfmd5 = (text: string) => { + return crypto + .createHash('md5') + .update(text) + .digest('hex') + .slice(MD5_NIBBLES_SIZE / 2) +} diff --git a/packages/nlu-engine/src/utils/mock-extra.ts b/packages/nlu-engine/src/utils/mock-extra.ts deleted file mode 100644 index c5eaa323..00000000 --- a/packages/nlu-engine/src/utils/mock-extra.ts +++ /dev/null @@ -1,15 +0,0 @@ -export type MockObject = { T: T } & { readonly [key in keyof T]: jest.Mock } -export function createSpyObject(): MockObject { - const obj = {} - const handler: ProxyHandler = { - get(obj, prop) { - if (prop === 'T') { - return proxy - } - - return prop in obj ? obj[prop] : (obj[prop] = jest.fn()) - } - } - const proxy = new Proxy(obj, handler) - return proxy as MockObject -} diff --git a/packages/nlu-engine/src/utils/override-type.ts b/packages/nlu-engine/src/utils/override-type.ts new file mode 100644 index 00000000..df0d5630 --- /dev/null +++ b/packages/nlu-engine/src/utils/override-type.ts @@ -0,0 +1 @@ +export type Override>> = Omit & K diff --git a/packages/nlu-engine/src/utils/watch-dog.ts b/packages/nlu-engine/src/utils/watch-dog.ts index 2f85e150..a2c74499 100644 --- a/packages/nlu-engine/src/utils/watch-dog.ts +++ b/packages/nlu-engine/src/utils/watch-dog.ts @@ -33,7 +33,7 @@ class _WatchDog { type Func = (...x: X) => Y -export interface WatchDog { +export type WatchDog = { run(...x: X): void stop: () => void } diff --git a/packages/nlu-engine/tsconfig.json b/packages/nlu-engine/tsconfig.json index 5118ce9b..4af8a903 100644 --- a/packages/nlu-engine/tsconfig.json +++ b/packages/nlu-engine/tsconfig.json @@ -6,7 +6,7 @@ { "path": "../node-fasttext" }, { "path": "../node-sentencepiece" }, { "path": "../worker" }, - { "path": "../nlu-client" } + { "path": "../lang-client" } ], "compilerOptions": { "outDir": "./dist" /* Redirect output structure to the directory. */, diff --git a/packages/nlu-server/examples/api.rest b/packages/nlu-server/examples/api.rest index ad369293..22978988 100644 --- a/packages/nlu-server/examples/api.rest +++ b/packages/nlu-server/examples/api.rest @@ -21,10 +21,10 @@ X-App-Id: {{appId}} "utterances": [ "fruit is moldy", "this fruit is moldy", - "this [banana](fruit) is not good to eat", - "theses [oranges](fruit) have passed", - "theses [grapes](fruit) look bad", - "theses [apples](fruit) look soo moldy" + "this [banana](moldy_fruit) is not good to eat", + "theses [oranges](moldy_fruit) have passed", + "theses [grapes](moldy_fruit) look bad", + "theses [apples](moldy_fruit) look soo moldy" ], "slots": [ { diff --git a/packages/nlu-server/jest.config.js b/packages/nlu-server/jest.config.js deleted file mode 100644 index 61ea9c96..00000000 --- a/packages/nlu-server/jest.config.js +++ /dev/null @@ -1,11 +0,0 @@ -module.exports = { - preset: 'ts-jest', - testEnvironment: 'node', - testPathIgnorePatterns: ['dist', 'node_modules'], - rootDir: '.', - resetModules: true, - verbose: true, - modulePaths: ['/src/'], - moduleFileExtensions: ['js', 'json', 'jsx', 'ts', 'tsx', 'd.ts'], - modulePathIgnorePatterns: ['out'] -} diff --git a/packages/nlu-server/package.json b/packages/nlu-server/package.json index 679cacc2..206e551a 100644 --- a/packages/nlu-server/package.json +++ b/packages/nlu-server/package.json @@ -5,13 +5,24 @@ "main": "./dist/index.js", "author": "Botpress, Inc.", "license": "AGPL-3.0", - "types": "./src/typings.d.ts", + "types": "./dist/index.d.ts", "bin": "./dist/index.js", "dependencies": { - "@botpress/logger": "*", + "@botpress/distributed": "*", "@botpress/nlu-client": "*", "@botpress/nlu-engine": "*", - "@botpress/locks": "*", + "@botpress/telemetry": "*", + "@bpinternal/log4bot": "^0.0.4", + "@bpinternal/ptb-schema": "^0.0.2", + "@bpinternal/trail": "^0.1.0", + "@opentelemetry/api": "1.1.0", + "@opentelemetry/instrumentation-express": "0.27.0", + "@opentelemetry/instrumentation-http": "0.27.0", + "@opentelemetry/instrumentation-knex": "0.27.0", + "@opentelemetry/instrumentation-pg": "0.27.0", + "@promster/express": "5.0.3", + "@promster/metrics": "6.0.2", + "@promster/server": "6.0.2", "@sentry/node": "^6.9.0", "@sentry/tracing": "^6.9.0", "@types/nanoid": "^3.0.0", @@ -20,28 +31,27 @@ "body-parser": "^1.18.3", "bytes": "^3.1.0", "chalk": "^2.4.2", - "chokidar": "^2.1.5", "cors": "^2.8.5", "diff": "^4.0.1", "eventemitter2": "^5.0.1", "express": "^4.16.4", - "express-rate-limit": "^3.5.1", + "express-rate-limit": "^5.5.0", "fs-extra": "^9.1.0", "glob": "^7.1.6", "globrex": "^0.1.2", "joi": "^13.6.0", "jsonlint-mod": "^1.7.5", "jsonpack": "^1.1.5", - "knex": "^0.20.1", + "knex": "^0.95.15", "lodash": "^4.17.19", "lru-cache": "^5.1.1", "mkdirp": "^1.0.4", "moment": "^2.29.2", "ms": "^2.1.1", "nanoid": "^3.1.23", - "on-headers": "^1.0.2", - "pg": "^7.8.0", + "pg": "^8.0.3", "pg-pubsub": "^0.6.1", + "prom-client": "^14.0.1", "replace-in-file": "^4.1.1", "tar": "^4.4.6", "tmp": "^0.0.33", @@ -51,8 +61,10 @@ }, "devDependencies": { "@types/bytes": "^3.1.0", + "@types/cors": "^2.8.12", "@types/diff": "^5.0.0", "@types/express": "^4.17.13", + "@types/express-rate-limit": "^5.1.3", "@types/fs-extra": "^5.0.4", "@types/glob": "^7.1.3", "@types/jest": "^24.9.0", @@ -63,7 +75,7 @@ "@types/mkdirp": "^1.0.1", "@types/ms": "^0.7.30", "@types/nanoid": "^3.0.0", - "@types/node": "^12.13.0", + "@types/node": "^16.11.10", "@types/supertest": "^2.0.11", "@types/tar": "^4.0.4", "@types/tmp": "^0.0.33", @@ -75,16 +87,16 @@ "eslint-config-prettier": "^8.3.0", "eslint-plugin-import": "^2.22.1", "eslint-plugin-jsdoc": "^32.3.2", + "jest": "^24.9.0", "prettier": "^2.2.1", "supertest": "^6.1.3", - "jest": "^24.9.0", - "ts-jest": "^26.5.5", - "typescript": "^3.9.10" + "typescript": "^5.0.4" }, "scripts": { "build": "tsc --build", "postbuild": "cross-env node ./scripts/buildinfo.js ./.buildinfo.json", - "start": "cross-env node ./dist/index.js", - "test": "cross-env jest -i --detectOpenHandles -c jest.config.js" + "start": "node -r @bpinternal/trail/init ./dist/index.js", + "test": "jest --roots ./dist", + "clean": "rimraf ./dist && rimraf ./node_modules" } } diff --git a/packages/nlu-server/readme.md b/packages/nlu-server/readme.md index acd27eac..ca370b1c 100644 --- a/packages/nlu-server/readme.md +++ b/packages/nlu-server/readme.md @@ -86,6 +86,8 @@ This package contains the Botpress Standalone NLU server. +**To display this documentation, launch the nlu server with argument `--doc`.** + ## ⚠️⚠️ Disclaimer ⚠️⚠️ The NLU Server does **not** enforce authentication in any way. This means it is completely exposed to many attacks. If you plan on using the nlu-server in your local Botpress setup, makes sure it is not publicly exposed. If you plan on exposing the NLU server, make sure it his hidden behind a reverse proxy which ensures a proper authentication. This reverse proxy should: diff --git a/packages/nlu-server/src/api/app-id.ts b/packages/nlu-server/src/api/app-id.ts new file mode 100644 index 00000000..8614fa98 --- /dev/null +++ b/packages/nlu-server/src/api/app-id.ts @@ -0,0 +1,12 @@ +import { Request } from 'express' +import _ from 'lodash' +import { InvalidRequestFormatError } from './errors' + +const X_APP_ID = 'X-App-Id'.toLowerCase() +export const getAppId = (req: Request): string => { + const appId = req.headers[X_APP_ID] + if (!_.isString(appId) || !appId.length) { + throw new InvalidRequestFormatError('X-App-Id Header must be a non-empty string.') + } + return appId +} diff --git a/packages/nlu-server/src/api/errors.ts b/packages/nlu-server/src/api/errors.ts index 74c35744..ae9e0bad 100644 --- a/packages/nlu-server/src/api/errors.ts +++ b/packages/nlu-server/src/api/errors.ts @@ -22,14 +22,20 @@ export class ResponseError extends Error { } } -export class UnauthorizedError extends ResponseError { +export class InvalidRequestFormatError extends ResponseError { constructor(message: string) { - super(`Unauthorized: ${message}`, 401) + super(`Invalid Request Format: ${message}`, 400) } } -export class InvalidRequestFormatError extends ResponseError { +export class InvalidTrainSetError extends ResponseError { constructor(message: string) { - super(`Invalid Request Format: ${message}`, 400) + super(`Invalid Training Set: ${message}`, 400) + } +} + +export class ModelTransferDisabled extends ResponseError { + constructor() { + super('Model Transfer is disabled.', 403) } } diff --git a/packages/nlu-server/src/api/http.ts b/packages/nlu-server/src/api/http.ts deleted file mode 100644 index d1e2955d..00000000 --- a/packages/nlu-server/src/api/http.ts +++ /dev/null @@ -1,19 +0,0 @@ -import { http } from '@botpress/nlu-client' -import { Request, Response, NextFunction } from 'express' -import _ from 'lodash' -import { InvalidRequestFormatError, ResponseError } from './errors' - -export const handleError = (err: Error, _req: Request, res: Response, _next: NextFunction) => { - const httpStatusCode = err instanceof ResponseError ? err.statusCode : 500 - const resp: http.ErrorResponse = { success: false, error: err.message } - return res.status(httpStatusCode).send(resp) -} - -const X_APP_ID = 'X-App-Id'.toLowerCase() -export const getAppId = (req: Request): string => { - const appId = req.headers[X_APP_ID] - if (!_.isString(appId) || !appId.length) { - throw new InvalidRequestFormatError('X-App-Id Header must be a non-empty string.') - } - return appId -} diff --git a/packages/nlu-server/src/api/index.ts b/packages/nlu-server/src/api/index.ts index d55c8377..7b330d9e 100644 --- a/packages/nlu-server/src/api/index.ts +++ b/packages/nlu-server/src/api/index.ts @@ -1,261 +1,147 @@ -import { Logger } from '@botpress/logger' -import { http, TrainInput } from '@botpress/nlu-client' import * as NLUEngine from '@botpress/nlu-engine' +import { prometheus } from '@botpress/telemetry' +import { Logger } from '@bpinternal/log4bot' +import { isEnabled } from '@bpinternal/trail' +import { context, trace } from '@opentelemetry/api' import * as Sentry from '@sentry/node' -import * as Tracing from '@sentry/tracing' -import bodyParser from 'body-parser' import cors from 'cors' import express, { Application as ExpressApp } from 'express' import rateLimit from 'express-rate-limit' import _ from 'lodash' import ms from 'ms' +import { NLUServerOptions } from '..' import { Application } from '../application' -import { orderKeys } from '../utils/order-keys' -import { InvalidRequestFormatError } from './errors' - -import { handleError, getAppId } from './http' -import { validatePredictInput, validateTrainInput, validateDetectLangInput } from './validation/validate' -interface APIOptions { - host: string - port: number - limitWindow: string - limit: number - bodySize: string - batchSize: number - apmEnabled?: boolean - apmSampleRate?: number -} +import { ModelLoadedData } from '../application/app-observer' +import { Training } from '../infrastructure/training-repo/typings' +import { modelMemoryLoadDuration, modelStorageReadDuration, trainingCount, trainingDuration } from '../telemetry/metric' +import { UsageClient } from '../telemetry/usage-client' +import { createModelTransferRouter } from './routers/model-transfer' +import { createRootRouter } from './routers/root' const { modelIdService } = NLUEngine -export const createAPI = async (options: APIOptions, app: Application, baseLogger: Logger): Promise => { - const requestLogger = baseLogger.sub('api').sub('request') +const isTrainingRunning = ({ status }: Training) => status === 'training-pending' || status === 'training' + +export const createAPI = async ( + options: NLUServerOptions, + app: Application, + baseLogger: Logger +): Promise => { + const apiLogger = baseLogger.sub('api') + const requestLogger = apiLogger.sub('request') const expressApp = express() - // This must be first, otherwise the /info endpoint can't be called when token is used expressApp.use(cors()) - if (options.apmEnabled) { - Sentry.init({ - integrations: [ - new Sentry.Integrations.Http({ tracing: true }), - new Tracing.Integrations.Express({ app: expressApp }) - ], - sampleRate: options.apmSampleRate ?? 1.0 - }) - - expressApp.use(Sentry.Handlers.requestHandler()) - expressApp.use(Sentry.Handlers.tracingHandler()) - } - - expressApp.use(bodyParser.json({ limit: options.bodySize })) + if (options.prometheusEnabled) { + const prometheusLogger = apiLogger.sub('prometheus') + prometheusLogger.debug('prometheus metrics enabled') - expressApp.use((req, res, next) => { - res.header('X-Powered-By', 'Botpress NLU') - requestLogger.debug(`incoming ${req.method} ${req.path}`, { ip: req.ip }) - next() - }) + app.on('training_update', async (training: Training) => { + if (isTrainingRunning(training) || !training.trainingTime) { + return + } - if (options.apmEnabled) { - expressApp.use(Sentry.Handlers.errorHandler()) - } + const trainingTime = training.trainingTime / 1000 + prometheusLogger.debug(`adding metric "training_duration_seconds" with value: ${trainingTime}`) + trainingDuration.observe({ status: training.status }, trainingTime) + }) - expressApp.use(handleError) + app.on('model_loaded', async (data: ModelLoadedData) => { + prometheusLogger.debug(`adding metric "model_storage_read_duration" with value: ${data.readTime}`) + modelStorageReadDuration.observe(data.readTime) - if (process.env.REVERSE_PROXY) { - expressApp.set('trust proxy', process.env.REVERSE_PROXY) - } + prometheusLogger.debug(`adding metric "model_memory_load_duration" with value: ${data.loadTime}`) + modelMemoryLoadDuration.observe(data.loadTime) + }) - if (options.limit > 0) { - expressApp.use( - rateLimit({ - windowMs: ms(options.limitWindow), - max: options.limit, - message: 'Too many requests, please slow down' - }) - ) + await prometheus.init(expressApp, async () => { + const count = await app.getLocalTrainingCount() + trainingCount.set(count) + }) } - const router = express.Router({ mergeParams: true }) - - expressApp.use(['/v1', '/'], router) - - router.get('/', async (req, res, next) => { - try { - return res.redirect('/info') - } catch (err) { - return handleError(err as Error, req, res, next) - } - }) - - router.get('/info', async (req, res, next) => { - try { - const info = app.getInfo() - const resp: http.InfoResponseBody = { success: true, info } - res.send(resp) - } catch (err) { - return handleError(err as Error, req, res, next) - } - }) - - router.get('/models', async (req, res, next) => { - try { - const appId = getAppId(req) - const modelIds = await app.getModels(appId) - const stringIds = modelIds.map(modelIdService.toString) - const resp: http.ListModelsResponseBody = { success: true, models: stringIds } - res.send(resp) - } catch (err) { - return handleError(err as Error, req, res, next) - } - }) - - router.post('/models/prune', async (req, res, next) => { - try { - const appId = getAppId(req) - const modelIds = await app.pruneModels(appId) - const stringIds = modelIds.map(modelIdService.toString) - const resp: http.PruneModelsResponseBody = { success: true, models: stringIds } - return res.send(resp) - } catch (err) { - return handleError(err as Error, req, res, next) - } - }) - - router.post('/train', async (req, res, next) => { - try { - const appId = getAppId(req) - const input = await validateTrainInput(req.body) - const { intents, entities, seed, language } = input - - const pickedSeed = seed ?? Math.round(Math.random() * 10000) - - const content = orderKeys({ - entities: _.orderBy(entities, (e) => e.name), - intents: _.orderBy(intents, (i) => i.name) - }) + if (options.usageURL) { + const usageLogger = apiLogger.sub('usage') + usageLogger.debug('usage endpoint enabled') - const trainInput: TrainInput = { - ...content, - language, - seed: pickedSeed + const usageClient = new UsageClient(options.usageURL) + app.on('training_update', async (training: Training) => { + if (isTrainingRunning(training) || !training.trainingTime) { + return } - const modelId = await app.startTraining(appId, trainInput) - - const resp: http.TrainResponseBody = { success: true, modelId: NLUEngine.modelIdService.toString(modelId) } - return res.send(resp) - } catch (err) { - return handleError(err as Error, req, res, next) - } - }) - - router.get('/train', async (req, res, next) => { - try { - const appId = getAppId(req) - const { lang } = req.query - if (lang && !_.isString(lang)) { - throw new InvalidRequestFormatError(`query parameter lang: "${lang}" has invalid format`) + const { appId, modelId, trainingTime } = training + const app_id = appId + const model_id = modelIdService.toString(modelId) + const training_time = trainingTime / 1000 + const timestamp = new Date().toISOString() + + const type = 'training_time' + const value = { + app_id, + model_id, + training_time, + timestamp } - const trainings = await app.getAllTrainings(appId, lang) - const serialized = trainings.map(({ modelId, ...state }) => ({ - modelId: modelIdService.toString(modelId), - ...state - })) - - const resp: http.ListTrainingsResponseBody = { success: true, trainings: serialized } - res.send(resp) - } catch (err) { - return handleError(err as Error, req, res, next) - } - }) + usageLogger.debug(`sending usage ${type} with value: ${JSON.stringify(value)}`) - router.get('/train/:modelId', async (req, res, next) => { - try { - const appId = getAppId(req) - const { modelId: stringId } = req.params - if (!_.isString(stringId) || !NLUEngine.modelIdService.isId(stringId)) { - throw new InvalidRequestFormatError(`model id "${stringId}" has invalid format`) + try { + await usageClient.sendUsage('nlu', type, [value]) + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + usageLogger.attachError(err).error(`an error occured when sending "${type}" usage.`) } + }) + } - const modelId = NLUEngine.modelIdService.fromString(stringId) - const session = await app.getTrainingState(appId, modelId) - - const resp: http.TrainProgressResponseBody = { success: true, session } - res.send(resp) - } catch (err) { - return handleError(err as Error, req, res, next) - } - }) - - router.post('/train/:modelId/cancel', async (req, res, next) => { - try { - const appId = getAppId(req) - - const { modelId: stringId } = req.params - - const modelId = NLUEngine.modelIdService.fromString(stringId) - - await app.cancelTraining(appId, modelId) + if (options.apmEnabled) { + Sentry.init() + expressApp.use(Sentry.Handlers.requestHandler()) + } - const resp: http.SuccessReponse = { success: true } - return res.send(resp) - } catch (err) { - return handleError(err as Error, req, res, next) - } - }) + expressApp.use((req, res, next) => { + res.header('X-Powered-By', 'Botpress NLU') - router.post('/predict/:modelId', async (req, res, next) => { - try { - const appId = getAppId(req) + const metadata: { ip: string; traceId?: string } = { ip: req.ip } - const { modelId: stringId } = req.params - const { utterances } = await validatePredictInput(req.body) + if (isEnabled()) { + const spanContext = trace.getSpanContext(context.active()) - if (!_.isArray(utterances) || (options.batchSize > 0 && utterances.length > options.batchSize)) { - throw new InvalidRequestFormatError( - `Batch size of ${utterances.length} is larger than the allowed maximum batch size (${options.batchSize}).` - ) + if (spanContext?.traceId) { + metadata.traceId = spanContext?.traceId } - - const modelId = NLUEngine.modelIdService.fromString(stringId) - const predictions = await app.predict(appId, modelId, utterances) - - const resp: http.PredictResponseBody = { success: true, predictions } - res.send(resp) - } catch (err) { - return handleError(err as Error, req, res, next) } - }) - router.post('/detect-lang', async (req, res, next) => { - try { - const appId = getAppId(req) - - const { utterances, models } = await validateDetectLangInput(req.body) + requestLogger.debug(`incoming ${req.method} ${req.path}`, metadata) + next() + }) - const invalidIds = models.filter(_.negate(modelIdService.isId)) - if (invalidIds.length) { - throw new InvalidRequestFormatError(`The following model ids are invalid: [${invalidIds.join(', ')}]`) - } + if (options.reverseProxy) { + expressApp.set('trust proxy', options.reverseProxy) + } - const modelIds = models.map(modelIdService.fromString) + if (options.limit > 0) { + expressApp.use( + rateLimit({ + windowMs: ms(options.limitWindow), + max: options.limit, + message: 'Too many requests, please slow down' + }) + ) + } - if (!_.isArray(utterances) || (options.batchSize > 0 && utterances.length > options.batchSize)) { - const error = `Batch size of ${utterances.length} is larger than the allowed maximum batch size (${options.batchSize}).` - return res.status(400).send({ success: false, error }) - } + const rootRouter = createRootRouter(options, app, baseLogger) + const modelRouter = createModelTransferRouter(options, app, baseLogger) - const detectedLanguages = await app.detectLanguage(appId, modelIds, utterances) + expressApp.use('/', rootRouter) + expressApp.use('/modelweights', modelRouter) - const resp: http.DetectLangResponseBody = { success: true, detectedLanguages } - res.send(resp) - } catch (err) { - return handleError(err as Error, req, res, next) - } - }) + if (options.apmEnabled) { + expressApp.use(Sentry.Handlers.errorHandler()) + } return expressApp } diff --git a/packages/nlu-server/src/utils/order-keys.ts b/packages/nlu-server/src/api/order-keys.ts similarity index 100% rename from packages/nlu-server/src/utils/order-keys.ts rename to packages/nlu-server/src/api/order-keys.ts diff --git a/packages/nlu-server/src/api/routers/model-transfer.ts b/packages/nlu-server/src/api/routers/model-transfer.ts new file mode 100644 index 00000000..97b912e6 --- /dev/null +++ b/packages/nlu-server/src/api/routers/model-transfer.ts @@ -0,0 +1,64 @@ +import * as NLUEngine from '@botpress/nlu-engine' +import { Logger } from '@bpinternal/log4bot' +import bodyParser from 'body-parser' +import express, { Router, Request, Response, NextFunction } from 'express' + +import _ from 'lodash' +import { Application } from '../../application' +import { NLUServerOptions } from '../../typings' +import { getAppId } from '../app-id' +import { InvalidRequestFormatError, ResponseError } from '../errors' + +const handleError = (logger: Logger) => (thrownObject: any, _req: Request, res: Response, _next: NextFunction) => { + const error: Error = thrownObject instanceof Error ? thrownObject : new Error(`${thrownObject}`) + const code = error instanceof ResponseError ? error.statusCode : 500 + if (code >= 500) { + logger.attachError(error).error('Internal Error') + } + return res.sendStatus(code) +} + +export const createModelTransferRouter = (options: NLUServerOptions, app: Application, baseLogger: Logger): Router => { + const apiLogger = baseLogger.sub('api') + + const router = express.Router({ mergeParams: true }) + router.use(bodyParser.raw({ limit: options.modelSize })) + + router.get('/:modelId', async (req, res, next) => { + try { + const appId = getAppId(req) + const { modelId: stringId } = req.params + if (!_.isString(stringId) || !NLUEngine.modelIdService.isId(stringId)) { + throw new InvalidRequestFormatError(`model id "${stringId}" has invalid format`) + } + + const modelId = NLUEngine.modelIdService.fromString(stringId) + const modelWeights = await app.getModelWeights(appId, modelId) + + res.send(modelWeights) // express takes care of chunking + return next() + } catch (thrown) { + return next(thrown) + } + }) + + router.post('/', async (req, res, next) => { + try { + const appId = getAppId(req) + if (!req.body || !(req.body instanceof Buffer)) { + throw new InvalidRequestFormatError('request body has invalid format') + } + const modelWeights = req.body + await app.setModelWeights(appId, modelWeights) + + res.sendStatus(200) + return next() + } catch (thrown) { + return next(thrown) + } + }) + + router.use(handleError(apiLogger)) + + return router +} diff --git a/packages/nlu-server/src/api/routers/root.ts b/packages/nlu-server/src/api/routers/root.ts new file mode 100644 index 00000000..e647e97b --- /dev/null +++ b/packages/nlu-server/src/api/routers/root.ts @@ -0,0 +1,349 @@ +import { http, TrainInput } from '@botpress/nlu-client' +import * as NLUEngine from '@botpress/nlu-engine' +import { Logger } from '@bpinternal/log4bot' +import bodyParser from 'body-parser' +import express, { Router, Request, Response, NextFunction } from 'express' + +import _ from 'lodash' +import { Application } from '../../application' +import { + ModelDoesNotExistError, + TrainingNotFoundError, + TrainingAlreadyStartedError, + LangServerCommError, + DucklingCommError, + DatasetValidationError, + LintingNotFoundError +} from '../../application/errors' +import { NLUServerOptions } from '../../typings' + +import { getAppId } from '../app-id' +import { InvalidRequestFormatError, InvalidTrainSetError } from '../errors' +import { orderKeys } from '../order-keys' + +import { + validatePredictInput, + validateTrainInput, + validateDetectLangInput, + validateLintInput, + isLintingSpeed +} from '../validation/validate' + +const { modelIdService } = NLUEngine + +const serializeError = (err: Error): http.NLUError => { + const { message, stack } = err + if (err instanceof ModelDoesNotExistError) { + const { statusCode } = err + return { message, stack, type: 'model_not_found', code: statusCode } + } + if (err instanceof TrainingNotFoundError) { + const { statusCode } = err + return { message, stack, type: 'training_not_found', code: statusCode } + } + if (err instanceof LintingNotFoundError) { + const { statusCode } = err + return { message, stack, type: 'linting_not_found', code: statusCode } + } + if (err instanceof TrainingAlreadyStartedError) { + const { statusCode } = err + return { message, stack, type: 'training_already_started', code: statusCode } + } + if (err instanceof InvalidRequestFormatError) { + const { statusCode } = err + return { message, stack, type: 'request_format', code: statusCode } + } + if (err instanceof LangServerCommError) { + const { statusCode } = err + return { message, stack, type: 'lang-server', code: statusCode } + } + if (err instanceof DucklingCommError) { + const { statusCode } = err + return { message, stack, type: 'duckling-server', code: statusCode } + } + if (err instanceof DatasetValidationError || err instanceof InvalidTrainSetError) { + const { statusCode } = err + return { message, stack, type: 'dataset_format', code: statusCode } + } + return { message, stack, type: 'internal', code: 500 } +} + +const handleError = (logger: Logger) => (thrownObject: any, _req: Request, res: Response, _next: NextFunction) => { + const error: Error = thrownObject instanceof Error ? thrownObject : new Error(`${thrownObject}`) + const nluError = serializeError(error) + const { code } = nluError + if (code >= 500) { + logger.attachError(error).error('Internal Error') + } + const resp: http.ErrorResponse = { success: false, error: nluError } + return res.status(code).send(resp) +} + +export const createRootRouter = (options: NLUServerOptions, app: Application, baseLogger: Logger): Router => { + const apiLogger = baseLogger.sub('api') + + const router = express.Router({ mergeParams: true }) + router.use(bodyParser.json({ limit: options.bodySize })) + + router.get('/', async (req, res, next) => { + try { + res.redirect('/info') + return next() + } catch (thrown) { + return next(thrown) + } + }) + + router.get('/info', async (req, res, next) => { + try { + const info = app.getInfo() + const resp: http.InfoResponseBody = { success: true, info } + res.send(resp) + return next() + } catch (thrown) { + return next(thrown) + } + }) + + router.get('/models', async (req, res, next) => { + try { + const appId = getAppId(req) + const modelIds = await app.getModels(appId) + const stringIds = modelIds.map(modelIdService.toString) + const resp: http.ListModelsResponseBody = { success: true, models: stringIds } + res.send(resp) + return next() + } catch (thrown) { + return next(thrown) + } + }) + + router.post('/models/prune', async (req, res, next) => { + try { + const appId = getAppId(req) + const modelIds = await app.pruneModels(appId) + const stringIds = modelIds.map(modelIdService.toString) + const resp: http.PruneModelsResponseBody = { success: true, models: stringIds } + res.send(resp) + return next() + } catch (thrown) { + return next(thrown) + } + }) + + router.post('/train', async (req, res, next) => { + try { + const appId = getAppId(req) + const input = await validateTrainInput(req.body) + const { intents, entities, seed, language } = input + + const pickedSeed = seed ?? Math.round(Math.random() * 10000) + + const content = orderKeys({ + entities: _.orderBy(entities, (e) => e.name), + intents: _.orderBy(intents, (i) => i.name) + }) + + const trainInput: TrainInput = { + ...content, + language, + seed: pickedSeed + } + + const modelId = await app.startTraining(appId, trainInput) + + const resp: http.TrainResponseBody = { success: true, modelId: NLUEngine.modelIdService.toString(modelId) } + res.send(resp) + return next() + } catch (thrown) { + return next(thrown) + } + }) + + router.get('/train', async (req, res, next) => { + try { + const appId = getAppId(req) + const { lang } = req.query + if (lang && !_.isString(lang)) { + throw new InvalidRequestFormatError(`query parameter lang: "${lang}" has invalid format`) + } + + const trainings = await app.getAllTrainings(appId, lang) + const serialized = trainings.map(({ modelId, ...state }) => ({ + modelId: modelIdService.toString(modelId), + ...state + })) + + const resp: http.ListTrainingsResponseBody = { success: true, trainings: serialized } + res.send(resp) + return next() + } catch (thrown) { + return next(thrown) + } + }) + + router.get('/train/:modelId', async (req, res, next) => { + try { + const appId = getAppId(req) + const { modelId: stringId } = req.params + if (!_.isString(stringId) || !NLUEngine.modelIdService.isId(stringId)) { + throw new InvalidRequestFormatError(`model id "${stringId}" has invalid format`) + } + + const modelId = NLUEngine.modelIdService.fromString(stringId) + const session = await app.getTrainingState(appId, modelId) + + const resp: http.TrainProgressResponseBody = { success: true, session } + res.send(resp) + return next() + } catch (thrown) { + return next(thrown) + } + }) + + router.post('/train/:modelId/cancel', async (req, res, next) => { + try { + const appId = getAppId(req) + + const { modelId: stringId } = req.params + + const modelId = NLUEngine.modelIdService.fromString(stringId) + + await app.cancelTraining(appId, modelId) + + const resp: http.SuccessReponse = { success: true } + res.send(resp) + return next() + } catch (thrown) { + return next(thrown) + } + }) + + router.post('/predict/:modelId', async (req, res, next) => { + try { + const appId = getAppId(req) + + const { modelId: stringId } = req.params + const { utterances } = await validatePredictInput(req.body) + + if (!_.isArray(utterances) || (options.batchSize > 0 && utterances.length > options.batchSize)) { + throw new InvalidRequestFormatError( + `Batch size of ${utterances.length} is larger than the allowed maximum batch size (${options.batchSize}).` + ) + } + + const modelId = NLUEngine.modelIdService.fromString(stringId) + const predictions = await app.predict(appId, modelId, utterances) + + const resp: http.PredictResponseBody = { success: true, predictions } + res.send(resp) + return next() + } catch (thrown) { + return next(thrown) + } + }) + + router.post('/detect-lang', async (req, res, next) => { + try { + const appId = getAppId(req) + + const { utterances, models } = await validateDetectLangInput(req.body) + + const invalidIds = models.filter(_.negate(modelIdService.isId)) + if (invalidIds.length) { + throw new InvalidRequestFormatError(`The following model ids are invalid: [${invalidIds.join(', ')}]`) + } + + const modelIds = models.map(modelIdService.fromString) + + if (!_.isArray(utterances) || (options.batchSize > 0 && utterances.length > options.batchSize)) { + const thrownor = `Batch size of ${utterances.length} is larger than the allowed maximum batch size (${options.batchSize}).` + return res.status(400).send({ success: false, thrownor }) + } + + const detectedLanguages = await app.detectLanguage(appId, modelIds, utterances) + + const resp: http.DetectLangResponseBody = { success: true, detectedLanguages } + res.send(resp) + return next() + } catch (thrown) { + return next(thrown) + } + }) + + router.post('/lint', async (req, res, next) => { + try { + const appId = getAppId(req) + const input = await validateLintInput(req.body) + const { intents, entities, language, speed } = input + + const trainInput: TrainInput = { + intents, + entities, + language, + seed: 0 + } + + const modelId = await app.startLinting(appId, speed, trainInput) + + const resp: http.LintResponseBody = { success: true, modelId: NLUEngine.modelIdService.toString(modelId) } + res.send(resp) + return next() + } catch (thrown) { + return next(thrown) + } + }) + + router.get('/lint/:modelId/:speed', async (req, res, next) => { + try { + const appId = getAppId(req) + const { modelId: stringId, speed } = req.params + if (!_.isString(stringId) || !NLUEngine.modelIdService.isId(stringId)) { + throw new InvalidRequestFormatError(`model id "${stringId}" has invalid format`) + } + if (!isLintingSpeed(speed)) { + throw new InvalidRequestFormatError(`path param "${speed}" is not a valid linting speed.`) + } + + const modelId = NLUEngine.modelIdService.fromString(stringId) + const session = await app.getLintingState(appId, modelId, speed) + + const resp: http.LintProgressResponseBody = { + success: true, + session + } + res.send(resp) + return next() + } catch (thrown) { + return next(thrown) + } + }) + + router.post('/lint/:modelId/:speed/cancel', async (req, res, next) => { + try { + const appId = getAppId(req) + + const { modelId: stringId, speed } = req.params + if (!_.isString(stringId) || !NLUEngine.modelIdService.isId(stringId)) { + throw new InvalidRequestFormatError(`model id "${stringId}" has invalid format`) + } + if (!isLintingSpeed(speed)) { + throw new InvalidRequestFormatError(`path param "${speed}" is not a valid linting speed.`) + } + + const modelId = NLUEngine.modelIdService.fromString(stringId) + + await app.cancelLinting(appId, modelId, speed) + + const resp: http.SuccessReponse = { success: true } + res.send(resp) + return next() + } catch (thrown) { + return next(thrown) + } + }) + + router.use(handleError(apiLogger)) + + return router +} diff --git a/packages/nlu-server/src/api/validation/extractVariables.test.ts b/packages/nlu-server/src/api/validation/extractVariables.test.ts deleted file mode 100644 index 51b79953..00000000 --- a/packages/nlu-server/src/api/validation/extractVariables.test.ts +++ /dev/null @@ -1,11 +0,0 @@ -import extractVariables from './extractVariables' - -test('variables extraction', () => { - expect(extractVariables('give me a [banana](fruit)')[0]).toBe('fruit') - - const extracted = extractVariables('give me a [banana](fruit) and a [pizza](thing)') - expect(extracted[0]).toBe('fruit') - expect(extracted[1]).toBe('thing') - - expect(extractVariables('give me a fruit').length).toBe(0) -}) diff --git a/packages/nlu-server/src/api/validation/extractVariables.ts b/packages/nlu-server/src/api/validation/extractVariables.ts deleted file mode 100644 index 4b870d66..00000000 --- a/packages/nlu-server/src/api/validation/extractVariables.ts +++ /dev/null @@ -1,10 +0,0 @@ -const ALL_SLOTS_REGEX = /\[(.+?)\]\(([\w_\. :-]+)\)/gi - -export default function extractVariables(text: string) { - const slotMatches: RegExpExecArray[] = [] - let matches: RegExpExecArray | null - while ((matches = ALL_SLOTS_REGEX.exec(text)) !== null) { - slotMatches.push(matches) - } - return slotMatches.map((s) => s[2]) -} diff --git a/packages/nlu-server/src/api/validation/schemas.ts b/packages/nlu-server/src/api/validation/schemas.ts index 7e4ec71e..e336befa 100644 --- a/packages/nlu-server/src/api/validation/schemas.ts +++ b/packages/nlu-server/src/api/validation/schemas.ts @@ -46,6 +46,15 @@ export const TrainInputSchema = Joi.object().keys({ seed: Joi.number().optional() }) +export const LintInputSchema = Joi.object().keys({ + language: Joi.string().required(), + intents: Joi.array().items(IntentSchema).required().min(0), + contexts: Joi.array().items(Joi.string()).required().min(0), + entities: Joi.array().items(EntitySchema).optional().default([]), + seed: Joi.number().optional(), // just to be compatible with train input + speed: Joi.string().required() +}) + export const PredictInputSchema = Joi.object().keys({ utterances: Joi.array().items(Joi.string()).required().min(1) }) diff --git a/packages/nlu-server/src/api/validation/validate.test.ts b/packages/nlu-server/src/api/validation/validate.test.ts index a2e005b9..0ccc4d3f 100644 --- a/packages/nlu-server/src/api/validation/validate.test.ts +++ b/packages/nlu-server/src/api/validation/validate.test.ts @@ -1,5 +1,10 @@ -import { IntentDefinition, ListEntityDefinition, PatternEntityDefinition, SlotDefinition } from '@botpress/nlu-client' -import { http } from '@botpress/nlu-client' +import { + IntentDefinition, + ListEntityDefinition, + PatternEntityDefinition, + SlotDefinition, + http +} from '@botpress/nlu-client' import { validateTrainInput } from './validate' @@ -13,14 +18,6 @@ const CITY_ENUM: ListEntityDefinition = { ] } -const TICKET_PATTERN: PatternEntityDefinition = { - name: 'ticket', - type: 'pattern', - case_sensitive: true, - regex: '[A-Z]{3}-[0-9]{3}', // ABC-123 - examples: ['ABC-123'] -} - const VARIABLE_CITY_FROM: SlotDefinition = { name: 'city-from', entities: ['city'] } const VARIABLE_TICKET_PROBLEM: SlotDefinition = { name: 'tick-with-problem', entities: ['ticket'] } @@ -107,7 +104,7 @@ test('validate input without topics or language should throw', async () => { await expect(validateTrainInput(withoutLang)).rejects.toThrow() }) -test('validate without intent should fail', async () => { +test('validate intent without utterances should fail', async () => { // arrange const withoutUtterances: IntentDefinition = { name: 'will break', contexts: ['A'] } as IntentDefinition @@ -137,75 +134,6 @@ test('validate intent with unexisting context should fail', async () => { await expect(validateTrainInput(trainInput)).rejects.toThrow() }) -test('validate enum without values or patterns without regexes should fail', async () => { - // arrange - const incompleteEnum: ListEntityDefinition = { name: 'city' } as ListEntityDefinition - - const incompletePattern: PatternEntityDefinition = { name: 'password' } as PatternEntityDefinition - - const withoutValues: http.TrainRequestBody = { - intents: [FLY_INTENT], - contexts: ['fly'], - entities: [incompleteEnum], - language: LANG, - seed: 42 - } - - const withoutRegexes: http.TrainRequestBody = { - intents: [PROBLEM_INTENT], - contexts: ['problem'], - entities: [incompletePattern], - language: LANG, - seed: 42 - } - - // act & assert - await expect(validateTrainInput(withoutValues)).rejects.toThrow() - await expect(validateTrainInput(withoutRegexes)).rejects.toThrow() -}) - -test('validate with an unexisting referenced enum should throw', async () => { - // arrange - const trainInput: http.TrainRequestBody = { - intents: [FLY_INTENT], - contexts: ['fly'], - entities: [TICKET_PATTERN], - language: LANG, - seed: 42 - } - - // act & assert - await expect(validateTrainInput(trainInput)).rejects.toThrow() -}) - -test('validate with an unexisting referenced pattern should throw', async () => { - // arrange - const trainInput: http.TrainRequestBody = { - intents: [PROBLEM_INTENT], - contexts: ['problem'], - entities: [CITY_ENUM], - language: LANG, - seed: 42 - } - - // act & assert - await expect(validateTrainInput(trainInput)).rejects.toThrow() -}) - -test('validate with an unexisting referenced complex should throw', async () => { - // arrange - const trainInput: http.TrainRequestBody = { - intents: [BOUILLON_INTENT], - contexts: ['bouillon'], - entities: [CITY_ENUM], - language: LANG, - seed: 42 - } - - // act & assert - await expect(validateTrainInput(trainInput)).rejects.toThrow() -}) - test('validate with correct format but unexpected property should fail', async () => { // arrange const trainInput: http.TrainRequestBody & { enums: any[] } = { diff --git a/packages/nlu-server/src/api/validation/validate.ts b/packages/nlu-server/src/api/validation/validate.ts index 51880d1e..e61cf2ed 100644 --- a/packages/nlu-server/src/api/validation/validate.ts +++ b/packages/nlu-server/src/api/validation/validate.ts @@ -1,73 +1,83 @@ -import { - IntentDefinition, - ListEntityDefinition, - PatternEntityDefinition, - SlotDefinition, - http -} from '@botpress/nlu-client' -import * as NLUEngine from '@botpress/nlu-engine' -import { validate } from 'joi' +import { IntentDefinition, http, IssueComputationSpeed } from '@botpress/nlu-client' +import { ObjectSchema, validate } from 'joi' +import _ from 'lodash' +import { InvalidRequestFormatError, InvalidTrainSetError } from '../errors' -import { isListEntity, isPatternEntity } from '../../utils/guards' -import { PredictInputSchema, TrainInputSchema, DetectLangInputSchema } from './schemas' +import { PredictInputSchema, TrainInputSchema, DetectLangInputSchema, LintInputSchema } from './schemas' -const SLOT_ANY = 'any' - -const makeSlotChecker = (listEntities: ListEntityDefinition[], patternEntities: PatternEntityDefinition[]) => ( - variable: SlotDefinition -) => { - const { entities, name } = variable - - const supportedTypes = [ - ...listEntities.map((e) => e.name), - ...patternEntities.map((p) => p.name), - ...NLUEngine.SYSTEM_ENTITIES, - SLOT_ANY - ] - for (const entity of entities) { - if (!supportedTypes.includes(entity)) { - throw new Error(`Slot ${name} references entity ${entity}, but it does not exist.`) - } - } -} - -const makeIntentChecker = (contexts: string[]) => ( - intent: IntentDefinition, - enums: ListEntityDefinition[], - patterns: PatternEntityDefinition[] -) => { +const validateIntent = (contexts: string[], intent: IntentDefinition) => { for (const ctx of intent.contexts) { if (!contexts.includes(ctx)) { - throw new Error(`Context ${ctx} of Intent ${intent.name} does not seem to appear in all contexts`) + throw new InvalidTrainSetError(`Context ${ctx} of Intent ${intent.name} does not seem to appear in all contexts`) } } - const variableChecker = makeSlotChecker(enums, patterns) - intent.slots.forEach(variableChecker) } -export async function validateTrainInput(rawInput: any): Promise { - const validatedInput: http.TrainRequestBody = await validate(rawInput, TrainInputSchema, {}) - - const { entities, contexts } = validatedInput - - const listEntities = entities.filter(isListEntity) - const patternEntities = entities.filter(isPatternEntity) +async function _validateTrainset( + rawInput: any, + schema: ObjectSchema +) { + let validatedInput: T + try { + validatedInput = await validate(rawInput, schema, {}) + } catch (thrown) { + if (thrown instanceof Error) { + throw new InvalidRequestFormatError(thrown.message) + } + throw new InvalidRequestFormatError('invalid training/linting format') + } - const validateIntent = makeIntentChecker(contexts) + const { contexts } = validatedInput for (const intent of validatedInput.intents) { - validateIntent(intent, listEntities, patternEntities) + validateIntent(contexts, intent) } return validatedInput } -export async function validatePredictInput(rawInput: any): Promise { - const validated: http.PredictRequestBody = await validate(rawInput, PredictInputSchema, {}) +export async function validateTrainInput(rawInput: any): Promise { + return _validateTrainset(rawInput, TrainInputSchema) +} + +export async function validateLintInput(rawInput: any): Promise { + const validated = await _validateTrainset(rawInput, LintInputSchema) + if (!isLintingSpeed(validated.speed)) { + throw new InvalidRequestFormatError(`path param "${validated.speed}" is not a valid linting speed.`) + } return validated } +export async function validatePredictInput(rawInput: any): Promise { + try { + const validated: http.PredictRequestBody = await validate(rawInput, PredictInputSchema, {}) + return validated + } catch (thrown) { + if (thrown instanceof Error) { + throw new InvalidRequestFormatError(thrown.message) + } + throw new InvalidRequestFormatError('invalid predict format') + } +} + export async function validateDetectLangInput(rawInput: any): Promise { - const validated: http.DetectLangRequestBody = await validate(rawInput, DetectLangInputSchema, {}) - return validated + try { + const validated: http.DetectLangRequestBody = await validate(rawInput, DetectLangInputSchema, {}) + return validated + } catch (thrown) { + if (thrown instanceof Error) { + throw new InvalidRequestFormatError(thrown.message) + } + throw new InvalidRequestFormatError('invalid detect language format') + } +} + +export function isLintingSpeed(s: string): s is IssueComputationSpeed { + const allSpeeds: { [s in IssueComputationSpeed]: s } = { + fastest: 'fastest', + fast: 'fast', + slow: 'slow', + slowest: 'slowest' + } + return Object.keys(allSpeeds).includes(s) } diff --git a/packages/nlu-server/src/app.test.ts b/packages/nlu-server/src/app.test.ts deleted file mode 100644 index 62eaab13..00000000 --- a/packages/nlu-server/src/app.test.ts +++ /dev/null @@ -1,60 +0,0 @@ -import { createAPI } from './api' -import { makeLogger } from '@botpress/logger' -import request from 'supertest' -import { version } from 'moment' -import { makeApplication } from './bootstrap/make-application' -import { NLUServerOptions } from './bootstrap/config' -import { buildWatcher } from './bootstrap/watcher' -import { Application } from './application' - -const options: NLUServerOptions = { - host: 'localhost', - port: 3200, - limitWindow: '1m', - limit: 0, - bodySize: '', - batchSize: 1, - modelCacheSize: '', - verbose: 1, - doc: false, - logFilter: [''], - languageSources: [ - { - endpoint: 'https://lang-01.botpress.io' - } - ], - ducklingURL: 'https://duckling.botpress.io', - ducklingEnabled: true, - legacyElection: false, - modelDir: 'testdir', - maxTraining: 2 -} - -const baseLogger = makeLogger() -let watcher -let app: Application - -beforeEach(async () => { - watcher = buildWatcher() - app = await makeApplication(options, version, baseLogger, watcher) -}) - -afterEach(async () => { - watcher.close() - await app.teardown() -}) - -test('GET /unknown-path', async () => { - const expressApp = await createAPI(options, app, baseLogger) - await request(expressApp).get('/unknown-path').expect(404) -}) - -test.each(['/info', '/v1/info'])('GET %s', async (path) => { - const expressApp = await createAPI(options, app, baseLogger) - await request(expressApp).get(path).expect(200) -}) - -test('GET /models', async () => { - const expressApp = await createAPI(options, app, baseLogger) - await request(expressApp).get('/models').set('X-App-Id', 'my-app').expect(200, { success: true, models: [] }) -}) diff --git a/packages/nlu-server/src/application/app-observer.ts b/packages/nlu-server/src/application/app-observer.ts new file mode 100644 index 00000000..be1d5c66 --- /dev/null +++ b/packages/nlu-server/src/application/app-observer.ts @@ -0,0 +1,39 @@ +import { ModelId } from '@botpress/nlu-engine' +import { EventEmitter2 } from 'eventemitter2' +import { Training } from '../infrastructure/training-repo/typings' + +export type TrainingUpdateData = Training +export type TrainingUpdateListener = (eventData: TrainingUpdateData) => Promise + +export type ModelLoadedData = { appId: string; modelId: ModelId; readTime: number; loadTime: number; totalTime: number } +export type ModelLoadedListener = (eventData: ModelLoadedData) => Promise + +export type ApplicationEvent = 'training_update' | 'model_loaded' + +export type ApplicationEventHandler = E extends 'training_update' + ? TrainingUpdateListener + : ModelLoadedListener + +export type ApplicationEventData = E extends 'training_update' + ? TrainingUpdateData + : ModelLoadedData + +export class ApplicationObserver { + protected evEmitter = new EventEmitter2() + + public on(event: E, handler: ApplicationEventHandler): void { + this.evEmitter.on(event, handler) + } + + public once(event: E, handler: ApplicationEventHandler): void { + this.evEmitter.once(event, handler) + } + + public off(event: E, handler: ApplicationEventHandler): void { + this.evEmitter.off(event, handler) + } + + public emit(event: E, data: ApplicationEventData): void { + this.evEmitter.emit(event, data) + } +} diff --git a/packages/nlu-server/src/application/distributed-training-queue.ts b/packages/nlu-server/src/application/distributed-training-queue.ts deleted file mode 100644 index 2ba6c983..00000000 --- a/packages/nlu-server/src/application/distributed-training-queue.ts +++ /dev/null @@ -1,47 +0,0 @@ -import { Logger } from '@botpress/logger' -import { Engine, ModelId } from '@botpress/nlu-engine' -import { ModelRepository } from '../infrastructure/model-repo' -import { TrainingRepository } from '../infrastructure/training-repo/typings' -import { Broadcaster } from '../utils/broadcast' -import TrainingQueue, { QueueOptions } from './training-queue' - -export class DistributedTrainingQueue extends TrainingQueue { - private _broadcastCancelTraining!: TrainingQueue['cancelTraining'] - private _broadcastRunTask!: () => Promise - - constructor( - engine: Engine, - modelRepo: ModelRepository, - trainingRepo: TrainingRepository, - clusterId: string, - logger: Logger, - private _broadcaster: Broadcaster, - opt?: Partial - ) { - super(engine, modelRepo, trainingRepo, clusterId, logger, opt) - } - - public async initialize() { - await super.initialize() - - this._broadcastCancelTraining = await this._broadcaster.broadcast<[string, ModelId]>({ - name: 'cancel_training', - run: super.cancelTraining.bind(this) - }) - - this._broadcastRunTask = await this._broadcaster.broadcast<[]>({ - name: 'run_task', - run: super.runTask.bind(this) - }) - } - - // for if a different instance gets the cancel training http call - public cancelTraining(appId: string, modelId: ModelId) { - return this._broadcastCancelTraining(appId, modelId) - } - - // for if an completly busy instance receives a queue training http call - protected runTask() { - return this._broadcastRunTask() - } -} diff --git a/packages/nlu-server/src/application/errors.ts b/packages/nlu-server/src/application/errors.ts index ab89d368..747cbc33 100644 --- a/packages/nlu-server/src/application/errors.ts +++ b/packages/nlu-server/src/application/errors.ts @@ -1,23 +1,41 @@ +import { DatasetIssue, IssueCode, IssueComputationSpeed } from '@botpress/nlu-client' import { ModelId, modelIdService } from '@botpress/nlu-engine' import { ResponseError } from '../api/errors' export class ModelDoesNotExistError extends ResponseError { - constructor(modelId: ModelId) { + constructor(appId: string, modelId: ModelId) { const stringId = modelIdService.toString(modelId) - super(`modelId ${stringId} can't be found`, 404) + const trainKey = `${appId}/${stringId}` + super(`model ${trainKey} can't be found`, 404) } } export class TrainingNotFoundError extends ResponseError { - constructor(modelId: ModelId) { + constructor(appId: string, modelId: ModelId) { const stringId = modelIdService.toString(modelId) - super(`no current training for model: ${stringId}`, 404) + const trainKey = `${appId}/${stringId}` + super(`no current training for: ${trainKey}`, 404) + } +} + +export class LintingNotFoundError extends ResponseError { + constructor(appId: string, modelId: ModelId, speed: IssueComputationSpeed) { + const stringId = modelIdService.toString(modelId) + const trainKey = `${appId}/${stringId}` + super(`no current linting with speed "${speed}" for: ${trainKey}`, 404) + } +} + +export class InvalidModelFormatError extends ResponseError { + constructor(message: string) { + super(`model weights have an invalid format: ${message}`, 400) } } export class InvalidModelSpecError extends ResponseError { constructor(modelId: ModelId, currentSpec: string) { - super(`expected spec hash to be "${currentSpec}". target model has spec "${modelId.specificationHash}".`, 400) + const code = 455 // custom status code + super(`expected spec hash to be "${currentSpec}". target model has spec "${modelId.specificationHash}".`, code) } } @@ -28,3 +46,32 @@ export class TrainingAlreadyStartedError extends ResponseError { super(`Training "${trainKey}" already started...`, 409) } } + +export class LintingAlreadyStartedError extends ResponseError { + constructor(appId: string, modelId: ModelId) { + const stringId = modelIdService.toString(modelId) + const lintKey = `${appId}/${stringId}` + super(`Linting "${lintKey}" already started...`, 409) + } +} + +export class LangServerCommError extends ResponseError { + constructor(err: Error) { + const { message } = err + super(`An error occured during communication with language server: ${message}`, 500) + } +} + +export class DucklingCommError extends ResponseError { + constructor(err: Error) { + const { message } = err + super(`An error occured during communication with Duckling server: ${message}`, 500) + } +} + +export class DatasetValidationError extends ResponseError { + constructor(issues: DatasetIssue[]) { + const message = issues.map(({ code, message }) => `[${code}] ${message}`).join('\n') + super(message, 400) + } +} diff --git a/packages/nlu-server/src/application/index.ts b/packages/nlu-server/src/application/index.ts index feabfeba..a06c4e3f 100644 --- a/packages/nlu-server/src/application/index.ts +++ b/packages/nlu-server/src/application/index.ts @@ -1,24 +1,54 @@ -import { Logger } from '@botpress/logger' -import { TrainingState, PredictOutput, TrainInput, ServerInfo, TrainingStatus } from '@botpress/nlu-client' -import { Engine, ModelId, modelIdService, Specifications } from '@botpress/nlu-engine' +import { + TrainingState, + PredictOutput, + TrainInput, + ServerInfo, + TrainingStatus, + LintingState, + IssueComputationSpeed +} from '@botpress/nlu-client' +import { Engine, ModelId, modelIdService, errors as engineErrors, Model } from '@botpress/nlu-engine' +import { Logger } from '@bpinternal/log4bot' import Bluebird from 'bluebird' import _ from 'lodash' +import { ModelTransferDisabled } from '../api/errors' +import { TrainingRepository, TrainingListener, Training } from '../infrastructure' +import { LintingRepository } from '../infrastructure/linting-repo' import { ModelRepository } from '../infrastructure/model-repo' -import { ReadonlyTrainingRepository } from '../infrastructure/training-repo/typings' -import { ModelDoesNotExistError, TrainingNotFoundError, InvalidModelSpecError } from './errors' -import TrainingQueue from './training-queue' +import { ApplicationObserver } from './app-observer' +import { + ModelDoesNotExistError, + TrainingNotFoundError, + LangServerCommError, + DucklingCommError, + InvalidModelSpecError, + DatasetValidationError, + LintingNotFoundError, + InvalidModelFormatError +} from './errors' +import { LintingQueue } from './linting-queue' +import { deserializeModel, deserializeModelId } from './serialize-model' +import { TrainingQueue } from './training-queue' + +type AppOptions = { + modelTransferEnabled: boolean +} -export class Application { +export class Application extends ApplicationObserver { private _logger: Logger constructor( private _modelRepo: ModelRepository, - private _trainingRepo: ReadonlyTrainingRepository, + private _trainingRepo: TrainingRepository, + private _lintingRepo: LintingRepository, private _trainingQueue: TrainingQueue, + private _lintingQueue: LintingQueue, private _engine: Engine, private _serverVersion: string, - baseLogger: Logger + baseLogger: Logger, + private _opts: Partial = {} ) { + super() this._logger = baseLogger.sub('app') } @@ -26,20 +56,78 @@ export class Application { await this._modelRepo.initialize() await this._trainingRepo.initialize() await this._trainingQueue.initialize() + await this._lintingRepo.initialize() + await this._lintingQueue.initialize() + this._trainingQueue.addListener(this._listenTrainingUpdates) } public async teardown() { await this._modelRepo.teardown() await this._trainingRepo.teardown() await this._trainingQueue.teardown() + this._trainingQueue.removeListener(this._listenTrainingUpdates) + } + + public getLocalTrainingCount() { + return this._trainingQueue.getLocalTrainingCount() } public getInfo(): ServerInfo { - const health = this._engine.getHealth() const specs = this._engine.getSpecifications() const languages = this._engine.getLanguages() const version = this._serverVersion - return { health, specs, languages, version } + + const { modelTransferEnabled } = this._opts + return { specs, languages, version, modelTransferEnabled: !!modelTransferEnabled } + } + + public async getModelWeights(appId: string, modelId: ModelId): Promise { + if (!this._opts.modelTransferEnabled) { + throw new ModelTransferDisabled() + } + + const modelWeights = await this._modelRepo.getModel(appId, modelId) + if (!modelWeights) { + throw new ModelDoesNotExistError(appId, modelId) + } + + return modelWeights + } + + public async setModelWeights(appId: string, modelWeights: Buffer) { + if (!this._opts.modelTransferEnabled) { + throw new ModelTransferDisabled() + } + + let modelId: ModelId + try { + modelId = deserializeModelId(modelWeights) + } catch (thrown) { + const err = this._toErr(thrown) + throw new InvalidModelFormatError(err.message) + } + + const { specificationHash: currentSpec } = this._getSpecFilter() + if (modelId.specificationHash !== currentSpec) { + throw new InvalidModelSpecError(modelId, currentSpec) + } + + let model: Model + try { + model = deserializeModel(modelWeights) + } catch (thrown) { + const err = this._toErr(thrown) + throw new InvalidModelFormatError(err.message) + } + + try { + this._engine.validateModel(model) + } catch (thrown) { + const err = this._toErr(thrown) + throw new InvalidModelFormatError(err.message) + } + + return this._modelRepo.saveModel(appId, model.id, modelWeights) } public async getModels(appId: string): Promise { @@ -65,6 +153,18 @@ export class Application { specifications: this._engine.getSpecifications() }) + const stringId = modelIdService.toString(modelId) + const key = `${appId}/${stringId}` + const { issues } = await this._engine.lint(key, trainInput, { + minSpeed: 'fastest', + minSeverity: 'critical', + runInMainProcess: true + }) + + if (!!issues.length) { + throw new DatasetValidationError(issues) + } + await this._trainingQueue.queueTraining(appId, modelId, trainInput) return modelId } @@ -106,9 +206,9 @@ export class Application { throw new InvalidModelSpecError(modelId, currentSpec) } - const model = await this._modelRepo.getModel(appId, modelId) - if (!model) { - throw new TrainingNotFoundError(modelId) + const modelExists = await this._modelRepo.exists(appId, modelId) + if (!modelExists) { + throw new TrainingNotFoundError(appId, modelId) } return { @@ -121,10 +221,14 @@ export class Application { return this._trainingQueue.cancelTraining(appId, modelId) } + public async cancelLinting(appId: string, modelId: ModelId, speed: IssueComputationSpeed): Promise { + return this._lintingQueue.cancelLinting(appId, modelId, speed) + } + public async predict(appId: string, modelId: ModelId, utterances: string[]): Promise { const modelExists: boolean = await this._modelRepo.exists(appId, modelId) if (!modelExists) { - throw new ModelDoesNotExistError(modelId) + throw new ModelDoesNotExistError(appId, modelId) } const { specificationHash: currentSpec } = this._getSpecFilter() @@ -132,29 +236,28 @@ export class Application { throw new InvalidModelSpecError(modelId, currentSpec) } - if (!this._engine.hasModel(modelId)) { - const model = await this._modelRepo.getModel(appId, modelId) - if (!model) { - throw new ModelDoesNotExistError(modelId) - } + await this._loadModelIfNeeded(appId, modelId) - await this._engine.loadModel(model) + try { + const predictions = await Bluebird.map(utterances, (utterance) => this._engine.predict(utterance, modelId)) + return predictions + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + if (err instanceof engineErrors.LangServerError) { + throw new LangServerCommError(err) + } + if (err instanceof engineErrors.DucklingServerError) { + throw new DucklingCommError(err) + } + throw thrown } - - const predictions = await Bluebird.map(utterances as string[], async (utterance) => { - const detectedLanguage = await this._engine.detectLanguage(utterance, { [modelId.languageCode]: modelId }) - const { entities, contexts, spellChecked } = await this._engine.predict(utterance, modelId) - return { entities, contexts, spellChecked, detectedLanguage } - }) - - return predictions } public async detectLanguage(appId: string, modelIds: ModelId[], utterances: string[]): Promise { for (const modelId of modelIds) { const modelExists: boolean = await this._modelRepo.exists(appId, modelId) if (!modelExists) { - throw new ModelDoesNotExistError(modelId) + throw new ModelDoesNotExistError(appId, modelId) } const { specificationHash: currentSpec } = this._getSpecFilter() @@ -162,24 +265,20 @@ export class Application { throw new InvalidModelSpecError(modelId, currentSpec) } - if (!this._engine.hasModel(modelId)) { - const model = await this._modelRepo.getModel(appId, modelId) - if (!model) { - throw new ModelDoesNotExistError(modelId) - } - await this._engine.loadModel(model) - } + await this._loadModelIfNeeded(appId, modelId) } const missingModels = modelIds.filter((m) => !this._engine.hasModel(m)) if (missingModels.length) { - const stringMissingModels = missingModels.map(modelIdService.toString) - this._logger.warn( - `About to detect language but your model cache seems to small to contains all models simultaneously. The following models are missing [${stringMissingModels.join( - ', ' - )}. You can increase your cache size by the CLI or config.]` - ) + const stringMissingModels = missingModels.map(modelIdService.toString).join(', ') + + const CACHE_TOO_SMALL_WARNING = ` +About to detect language but your model cache seems to small to contains all models simultaneously. +The following models are missing [${stringMissingModels}]. +You can increase your cache size by the CLI or config. + ` + this._logger.warn(CACHE_TOO_SMALL_WARNING) } const loadedModels = modelIds.filter((m) => this._engine.hasModel(m)) @@ -194,9 +293,73 @@ export class Application { return detectedLanguages } + public async startLinting(appId: string, speed: IssueComputationSpeed, trainInput: TrainInput): Promise { + const modelId = modelIdService.makeId({ + ...trainInput, + specifications: this._engine.getSpecifications() + }) + + await this._lintingQueue.queueLinting(appId, modelId, speed, trainInput) + return modelId + } + + public async getLintingState(appId: string, modelId: ModelId, speed: IssueComputationSpeed): Promise { + const linting = await this._lintingRepo.get({ appId, modelId, speed }) + if (linting) { + const { status, error, currentCount, totalCount, issues } = linting + return { status, error, currentCount, totalCount, issues } + } + + const { specificationHash: currentSpec } = this._getSpecFilter() + if (modelId.specificationHash !== currentSpec) { + throw new InvalidModelSpecError(modelId, currentSpec) + } + + throw new LintingNotFoundError(appId, modelId, speed) + } + + private _listenTrainingUpdates: TrainingListener = async (training: Training) => { + this.emit('training_update', training) + } + + private _loadModelIfNeeded = async (appId: string, modelId: ModelId) => { + if (!this._engine.hasModel(modelId)) { + const modelReadStartTime = Date.now() + + const modelBuffer = await this._modelRepo.getModel(appId, modelId) + if (!modelBuffer) { + throw new ModelDoesNotExistError(appId, modelId) + } + + const modelLoadStartTime = Date.now() + + const model = deserializeModel(modelBuffer) + await this._engine.loadModel(model) + + const modelLoadEndTime = Date.now() + + const readTime = modelLoadStartTime - modelReadStartTime + const loadTime = modelLoadEndTime - modelLoadStartTime + const totalTime = modelLoadEndTime - modelReadStartTime + + const strId = this._toString(appId, modelId) + this._logger.debug( + `[${strId}] Reading model from storage took ${readTime} ms and loading it in memory took ${loadTime} ms. The whole operation took ${totalTime} ms` + ) + this.emit('model_loaded', { appId, modelId, readTime, loadTime, totalTime }) + } + } + + private _toString = (appId: string, modelId: ModelId) => { + const strModelId = modelIdService.toString(modelId) + return `${appId}/${strModelId}` + } + private _getSpecFilter = (): { specificationHash: string } => { const specifications = this._engine.getSpecifications() const specFilter = modelIdService.briefId({ specifications }) as { specificationHash: string } return specFilter } + + private _toErr = (thrown: any): Error => (thrown instanceof Error ? thrown : new Error(`${thrown}`)) } diff --git a/packages/nlu-server/src/application/linting-queue/index.ts b/packages/nlu-server/src/application/linting-queue/index.ts new file mode 100644 index 00000000..a94d4180 --- /dev/null +++ b/packages/nlu-server/src/application/linting-queue/index.ts @@ -0,0 +1,123 @@ +import * as q from '@botpress/distributed' +import { IssueComputationSpeed, TrainInput } from '@botpress/nlu-client' +import * as NLUEngine from '@botpress/nlu-engine' +import { Logger } from '@bpinternal/log4bot' +import _ from 'lodash' +import ms from 'ms' + +import { LintingId, LintingRepository } from '../../infrastructure' +import { LintingAlreadyStartedError, LintingNotFoundError } from '../errors' +import { LintHandler } from './lint-handler' +import { LintTaskRepo } from './lint-task-repo' +import { LintTaskQueue, LintTaskQueueOptions } from './typings' + +export const MIN_LINTING_HEARTBEAT = ms('10s') +export const MAX_LINTING_HEARTBEAT = MIN_LINTING_HEARTBEAT * 3 +export const LINTING_PROGRESS_THROTTLE = MIN_LINTING_HEARTBEAT / 2 + +const TASK_OPTIONS: LintTaskQueueOptions = { + queueId: 'linting', + initialProgress: { start: 0, end: -1, current: 0 }, + initialData: { issues: [] }, + maxTasks: 2, + maxProgressDelay: MAX_LINTING_HEARTBEAT, + progressThrottle: LINTING_PROGRESS_THROTTLE +} + +const LINTING_PREFIX = 'linting-queue' + +export type LintQueueOptions = { + maxLinting?: number +} + +const idToString = (id: LintingId) => { + const { appId, modelId } = id + const stringModelId = NLUEngine.modelIdService.toString(modelId) + return `${appId}/${stringModelId}` +} + +export abstract class LintingQueue { + constructor(private taskQueue: LintTaskQueue, private logger: Logger) {} + + public initialize = this.taskQueue.initialize.bind(this.taskQueue) + public teardown = this.taskQueue.teardown.bind(this.taskQueue) + public getLocalLintingCount = this.taskQueue.getLocalTaskCount.bind(this.taskQueue) + + public queueLinting = async ( + appId: string, + modelId: NLUEngine.ModelId, + speed: IssueComputationSpeed, + trainInput: TrainInput + ) => { + try { + const lintId: LintingId = { appId, modelId, speed } + const lintKey = idToString(lintId) + await this.taskQueue.queueTask(lintId, trainInput) + this.logger.info(`[${lintKey}] Linting Queued.`) + } catch (thrown) { + if (thrown instanceof q.TaskAlreadyStartedError) { + throw new LintingAlreadyStartedError(appId, modelId) + } + throw thrown + } + } + + public async cancelLinting(appId: string, modelId: NLUEngine.ModelId, speed: IssueComputationSpeed): Promise { + try { + const lintId: LintingId = { appId, modelId, speed } + await this.taskQueue.cancelTask(lintId) + } catch (thrown) { + if (thrown instanceof q.TaskNotFoundError || thrown instanceof q.TaskNotRunning) { + throw new LintingNotFoundError(appId, modelId, speed) + } + throw thrown + } + } +} + +export class PgLintingQueue extends LintingQueue { + constructor( + pgURL: string, + lintingRepo: LintingRepository, + engine: NLUEngine.Engine, + baseLogger: Logger, + opt: LintQueueOptions = {} + ) { + const lintingLoger = baseLogger.sub(LINTING_PREFIX) + const lintTaskRepo = new LintTaskRepo(lintingRepo) + const lintHandler = new LintHandler(engine, lintingLoger) + const options = opt.maxLinting + ? { + ...TASK_OPTIONS, + maxTasks: opt.maxLinting + } + : TASK_OPTIONS + + const taskQueue = new q.PGDistributedTaskQueue(pgURL, lintTaskRepo, lintHandler, lintingLoger, idToString, options) + super(taskQueue, lintingLoger) + } +} + +export class LocalLintingQueue extends LintingQueue { + constructor( + lintingRepo: LintingRepository, + engine: NLUEngine.Engine, + baseLogger: Logger, + opt: LintQueueOptions = {} + ) { + const lintTaskRepo = new LintTaskRepo(lintingRepo) + const lintingLogger = baseLogger.sub(LINTING_PREFIX) + const lintHandler = new LintHandler(engine, lintingLogger) + + const options = + opt.maxLinting === undefined + ? TASK_OPTIONS + : { + ...TASK_OPTIONS, + maxTasks: opt.maxLinting + } + + const taskQueue = new q.LocalTaskQueue(lintTaskRepo, lintHandler, lintingLogger, idToString, options) + super(taskQueue, lintingLogger) + } +} diff --git a/packages/nlu-server/src/application/linting-queue/lint-handler.ts b/packages/nlu-server/src/application/linting-queue/lint-handler.ts new file mode 100644 index 00000000..5a8e5be8 --- /dev/null +++ b/packages/nlu-server/src/application/linting-queue/lint-handler.ts @@ -0,0 +1,36 @@ +import { DatasetIssue, IssueCode, LintingError } from '@botpress/nlu-client' +import * as NLUEngine from '@botpress/nlu-engine' +import { Logger } from '@bpinternal/log4bot' +import _ from 'lodash' +import { LintingId } from '../../infrastructure' +import { idToString } from '../training-queue' +import { LintTask, LintTaskProgress, LintTaskRunner, TerminatedLintTask } from './typings' + +export class LintHandler implements LintTaskRunner { + constructor(private _engine: NLUEngine.Engine, private _logger: Logger) {} + + public run = async (task: LintTask, progressCb: LintTaskProgress): Promise => { + const lintKey = idToString(task) + try { + const allIssues: DatasetIssue[] = [] + await this._engine.lint(lintKey, task.input, { + minSpeed: 'slow', + progressCallback: (currentCount: number, totalCount: number, issues: DatasetIssue[]) => { + allIssues.push(...issues) + return progressCb({ start: 0, end: totalCount, current: currentCount }, { issues: allIssues }) + } + }) + this._logger.info(`[${lintKey}] Linting Done.`) + return { ...task, status: 'done' } + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + const error: LintingError = { message: err.message, stack: err.stack, type: 'internal' } + return { ...task, status: 'errored', error } + } + } + + public async cancel(task: LintingId): Promise { + const trainKey = idToString(task) + return this._engine.cancelTraining(trainKey) + } +} diff --git a/packages/nlu-server/src/application/linting-queue/lint-task-repo.ts b/packages/nlu-server/src/application/linting-queue/lint-task-repo.ts new file mode 100644 index 00000000..0ff18402 --- /dev/null +++ b/packages/nlu-server/src/application/linting-queue/lint-task-repo.ts @@ -0,0 +1,112 @@ +import * as q from '@botpress/distributed' +import { LintingError, LintingStatus } from '@botpress/nlu-client' +import _ from 'lodash' +import { Linting, LintingId, LintingRepository } from '../../infrastructure' +import { MAX_LINTING_HEARTBEAT } from '.' +import { LintTask, LintTaskRepository } from './typings' + +const zombieError: LintingError = { + type: 'zombie-linting', + message: `Zombie Linting: Linting has not been updated in more than ${MAX_LINTING_HEARTBEAT} ms.` +} + +const mapLintStatusToTaskStatus = (lintStatus: LintingStatus): q.TaskStatus => { + if (lintStatus === 'linting') { + return 'running' + } + if (lintStatus === 'linting-pending') { + return 'pending' + } + return lintStatus +} + +const mapTaskStatusToLintStatus = (taskStatus: Exclude): LintingStatus => { + if (taskStatus === 'running') { + return 'linting' + } + if (taskStatus === 'pending') { + return 'linting-pending' + } + return taskStatus +} + +const mapLintingToTask = (linting: Linting): LintTask => { + const { appId, modelId, speed, status, cluster, currentCount, totalCount, dataset, error, issues } = linting + const isZombie = error?.type === 'zombie-linting' + return { + appId, + modelId, + speed, + cluster, + status: isZombie ? 'zombie' : mapLintStatusToTaskStatus(status), + data: { + issues + }, + input: dataset, + progress: { start: 0, end: totalCount, current: currentCount }, + error: isZombie ? undefined : error + } +} + +const mapTaskToLinting = (task: LintTask): Linting => { + const { appId, modelId, speed, status, cluster, progress, input, data, error } = task + const { issues } = data + const isZombie = status === 'zombie' + return { + appId, + modelId, + speed, + cluster, + status: isZombie ? 'errored' : mapTaskStatusToLintStatus(status), + dataset: input, + currentCount: progress.current, + totalCount: progress.end, + issues, + error: isZombie ? zombieError : error + } +} + +const mapTaskQueryToLintingQuery = (task: Partial): Partial => { + const { appId, modelId, status, cluster, progress, input, data, error } = task + const isZombie = status === 'zombie' + return _.pickBy( + { + appId, + modelId, + cluster, + status: isZombie ? 'errored' : status && mapTaskStatusToLintStatus(status), + dataset: input, + progress: progress?.current, + issues: data?.issues, + error: isZombie ? zombieError : error + }, + (x) => x !== undefined + ) +} + +export class LintTaskRepo implements LintTaskRepository { + constructor(private _lintRepo: LintingRepository) {} + public has = this._lintRepo.has.bind(this._lintRepo) + + public async get(lintId: LintingId): Promise { + const linting = await this._lintRepo.get(lintId) + return linting && mapLintingToTask(linting) + } + + public async query(taskQuery: Partial): Promise { + const lintQuery = mapTaskQueryToLintingQuery(taskQuery) + const lintings = await this._lintRepo.query(lintQuery) + return lintings.map(mapLintingToTask) + } + + public async queryOlderThan(taskQuery: Partial, threshold: Date): Promise { + const lintQuery = mapTaskQueryToLintingQuery(taskQuery) + const lintings = await this._lintRepo.queryOlderThan(lintQuery, threshold) + return lintings.map(mapLintingToTask) + } + + public async set(task: LintTask): Promise { + const linting = mapTaskToLinting(task) + return this._lintRepo.set(linting) + } +} diff --git a/packages/nlu-server/src/application/linting-queue/typings.ts b/packages/nlu-server/src/application/linting-queue/typings.ts new file mode 100644 index 00000000..79d4b9ff --- /dev/null +++ b/packages/nlu-server/src/application/linting-queue/typings.ts @@ -0,0 +1,15 @@ +import * as q from '@botpress/distributed' +import { DatasetIssue, IssueCode, LintingError, TrainInput } from '@botpress/nlu-client' +import { LintingId } from '../../infrastructure' + +type LintData = { + issues: DatasetIssue[] +} + +export type TerminatedLintTask = q.TerminatedTask +export type LintTask = q.Task +export type LintTaskRunner = q.TaskRunner +export type LintTaskProgress = q.ProgressCb +export type LintTaskRepository = q.TaskRepository +export type LintTaskQueue = q.TaskQueue +export type LintTaskQueueOptions = q.QueueOptions diff --git a/packages/nlu-server/src/application/serialize-model.ts b/packages/nlu-server/src/application/serialize-model.ts new file mode 100644 index 00000000..1cdbe9bd --- /dev/null +++ b/packages/nlu-server/src/application/serialize-model.ts @@ -0,0 +1,43 @@ +import { Model, ModelId } from '@botpress/nlu-engine' +import * as ptb from '@bpinternal/ptb-schema' +import _ from 'lodash' + +const PTBModelId = new ptb.PTBMessage('ModelId', { + specificationHash: { type: 'string', id: 1, rule: 'required' }, + contentHash: { type: 'string', id: 2, rule: 'required' }, + seed: { type: 'int32', id: 3, rule: 'required' }, + languageCode: { type: 'string', id: 4, rule: 'required' } +}) + +const PTBModel = new ptb.PTBMessage('Model', { + id: { type: PTBModelId, id: 1, rule: 'required' }, + startedAt: { type: 'string', id: 2, rule: 'required' }, + finishedAt: { type: 'string', id: 3, rule: 'required' }, + data: { type: 'bytes', id: 4, rule: 'required' } +}) + +/** usefull to retreive only model id from binary */ +const PTBPartialModel = new ptb.PTBMessage('Model', { + id: { type: PTBModelId, id: 1, rule: 'required' } +}) + +export const serializeModel = (model: Model): Buffer => { + const { id, startedAt, finishedAt, data } = model + const serialized = PTBModel.encode({ + id, + startedAt: startedAt.toISOString(), + finishedAt: finishedAt.toISOString(), + data + }) + return Buffer.from(serialized) +} + +export const deserializeModel = (buffer: Buffer): Model => { + const { id, finishedAt, startedAt, data } = PTBModel.decode(buffer) + return { id, finishedAt: new Date(finishedAt), startedAt: new Date(startedAt), data: Buffer.from(data) } +} + +export const deserializeModelId = (buffer: Buffer): ModelId => { + const { id } = PTBPartialModel.decode(buffer) + return id +} diff --git a/packages/nlu-server/src/application/training-queue.ts b/packages/nlu-server/src/application/training-queue.ts deleted file mode 100644 index 8f32eb00..00000000 --- a/packages/nlu-server/src/application/training-queue.ts +++ /dev/null @@ -1,245 +0,0 @@ -import { Logger } from '@botpress/logger' -import { TrainingErrorType, TrainInput, TrainingStatus, TrainingError } from '@botpress/nlu-client' -import * as NLUEngine from '@botpress/nlu-engine' -import Bluebird from 'bluebird' -import _ from 'lodash' -import moment from 'moment' -import ms from 'ms' - -import { ModelRepository } from '../infrastructure/model-repo' -import { - Training, - TrainingId, - TrainingRepository, - TrainingState, - WrittableTrainingRepository -} from '../infrastructure/training-repo/typings' -import { serializeError } from '../utils/error-utils' -import { watchDog, WatchDog } from '../utils/watch-dog' -import { TrainingAlreadyStartedError, TrainingNotFoundError } from './errors' - -const MAX_MODEL_PER_USER_PER_LANG = 1 - -const TRAINING_HEARTBEAT_SECURITY_FACTOR = 3 -const MIN_TRAINING_HEARTBEAT = ms('10s') -const MAX_TRAINING_HEARTBEAT = MIN_TRAINING_HEARTBEAT * TRAINING_HEARTBEAT_SECURITY_FACTOR - -export interface QueueOptions { - maxTraining: number -} -const DEFAULT_OPTIONS: QueueOptions = { - maxTraining: 2 -} - -export default class TrainingQueue { - private logger: Logger - private options: QueueOptions - private task!: WatchDog<[]> - - constructor( - private engine: NLUEngine.Engine, - private modelRepo: ModelRepository, - private trainingRepo: TrainingRepository, - private _clusterId: string, - logger: Logger, - opt: Partial = {} - ) { - this.logger = logger.sub('training-queue') - this.options = { ...DEFAULT_OPTIONS, ..._.pickBy(opt) } - } - - public async initialize() { - this.task = watchDog(this._runTask.bind(this), MAX_TRAINING_HEARTBEAT * 2) - } - - public async teardown() { - return this.task.stop() - } - - public queueTraining = async (appId: string, modelId: NLUEngine.ModelId, trainInput: TrainInput) => { - const trainId: TrainingId = { modelId, appId } - const trainKey = this._toKey(trainId) - - await this.trainingRepo.inTransaction(async (repo) => { - const currentTraining = await repo.get(trainId) - if (currentTraining && (currentTraining.status === 'training' || currentTraining.status === 'training-pending')) { - throw new TrainingAlreadyStartedError(appId, modelId) - } - - const state: TrainingState = { - status: 'training-pending', - progress: 0, - cluster: this._clusterId - } - - this.logger.debug(`Queuing "${trainKey}"`) - return repo.set({ - ...trainId, - ...state, - dataset: trainInput - }) - }, 'queueTraining') - this.logger.info(`[${trainKey}] Training Queued.`) - - // to return asap - // eslint-disable-next-line @typescript-eslint/no-floating-promises - this.runTask() - } - - public async cancelTraining(appId: string, modelId: NLUEngine.ModelId): Promise { - const trainId: TrainingId = { modelId, appId } - const trainKey = this._toKey(trainId) - - return this.trainingRepo.inTransaction(async (repo) => { - const currentTraining = await repo.get(trainId) - if (!currentTraining) { - throw new TrainingNotFoundError(modelId) - } - - const zombieTrainings = await this._getZombies(repo) - const isZombie = !!zombieTrainings.find((t) => this._areSame(t, trainId)) - - if (currentTraining.status === 'training-pending' || isZombie) { - const newTraining = { ...currentTraining, status: 'canceled' } - - return repo.set(newTraining) - } - - if (currentTraining.cluster !== this._clusterId) { - this.logger.debug(`Training "${trainKey}" was not launched on this instance`) - return - } - - if (currentTraining.status === 'training') { - return this.engine.cancelTraining(trainKey) - } - }, 'cancelTraining') - } - - protected async runTask() { - return this.task.run() - } - - private _runTask = async () => { - return this.trainingRepo.inTransaction(async (repo) => { - const localTrainings = await repo.query({ cluster: this._clusterId, status: 'training' }) - if (localTrainings.length >= this.options.maxTraining) { - return - } - - const zombieTrainings = await this._getZombies(repo) - if (zombieTrainings.length) { - this.logger.debug(`Queuing back ${zombieTrainings.length} trainings because they seem to be zombies.`) - const error: TrainingError = { - type: 'zombie-training', - message: `Zombie Training: Training had not been updated in more than ${MAX_TRAINING_HEARTBEAT} ms.` - } - const newState: TrainingState = { status: 'errored', progress: 0, cluster: this._clusterId, error } - await Bluebird.each(zombieTrainings, (z) => repo.set({ ...z, ...newState })) - } - - const pendings = await repo.query({ status: 'training-pending' }) - if (pendings.length <= 0) { - return - } - - const training = pendings[0] - training.status = 'training' - - await repo.set(training) - - // floating promise to return fast from task - // eslint-disable-next-line @typescript-eslint/no-floating-promises - this._train(training) - }, '_runTask') - } - - private _areSame(t1: TrainingId, t2: TrainingId) { - return t1.appId === t2.appId && NLUEngine.modelIdService.areSame(t1.modelId, t2.modelId) - } - - private _getZombies = (repo: WrittableTrainingRepository) => { - const zombieThreshold = moment().subtract(MAX_TRAINING_HEARTBEAT, 'ms').toDate() - return repo.queryOlderThan({ status: 'training' }, zombieThreshold) - } - - private _train = async (training: Training) => { - const trainKey = this._toKey(training) - - this.logger.debug(`training "${trainKey}" is about to start.`) - - if (!training) { - throw new Error("Invalid state: training state can't be found") - } - - const progressCb = async (progress: number) => { - training.status = 'training' - training.progress = progress - await this.trainingRepo.inTransaction(async (repo) => { - return repo.set(training) - }, 'progressCallback') - } - const throttledCb = _.throttle(progressCb, MIN_TRAINING_HEARTBEAT / 2) - - const { dataset } = training - try { - const model = await this.engine.train(trainKey, dataset, { - progressCallback: throttledCb, - minProgressHeartbeat: MIN_TRAINING_HEARTBEAT - }) - throttledCb.flush() - - const { language: languageCode } = dataset - const { appId } = training - - const keep = MAX_MODEL_PER_USER_PER_LANG - 1 // TODO: make the max amount of models on FS (by appId + lang) configurable - await this.modelRepo.pruneModels(appId, { keep }, { languageCode }) - await this.modelRepo.saveModel(appId, model) - - this.logger.info(`[${trainKey}] Training Done.`) - - training.status = 'done' - await this.trainingRepo.inTransaction(async (repo) => { - return repo.set(training) - }, '_train_done') - } catch (err) { - throttledCb.cancel() - - if (NLUEngine.errors.isTrainingCanceled(err as Error)) { - this.logger.info(`[${trainKey}] Training Canceled.`) - - training.status = 'canceled' - await this.trainingRepo.inTransaction(async (repo) => { - return repo.set(training) - }, '_train_canceled') - return - } - - if (NLUEngine.errors.isTrainingAlreadyStarted(err as Error)) { - this.logger.warn(`[${trainKey}] Training Already Started.`) // This should never occur. - return - } - - const type: TrainingErrorType = 'unknown' - training.status = 'errored' - training.error = { ...serializeError(err), type } - - await this.trainingRepo.inTransaction(async (repo) => { - return repo.set(training) - }, '_train_errored') - - if (type === 'unknown') { - this.logger.attachError(err as Error).error('an error occured during training') - } - } finally { - // to return asap - // eslint-disable-next-line @typescript-eslint/no-floating-promises - this.runTask() - } - } - - private _toKey(id: TrainingId) { - const stringId = NLUEngine.modelIdService.toString(id.modelId) - return `${id.appId}/${stringId}` - } -} diff --git a/packages/nlu-server/src/application/training-queue/index.ts b/packages/nlu-server/src/application/training-queue/index.ts new file mode 100644 index 00000000..8a47feaa --- /dev/null +++ b/packages/nlu-server/src/application/training-queue/index.ts @@ -0,0 +1,129 @@ +import * as q from '@botpress/distributed' +import { TrainInput } from '@botpress/nlu-client' +import * as NLUEngine from '@botpress/nlu-engine' +import { Logger } from '@bpinternal/log4bot' +import _ from 'lodash' +import ms from 'ms' + +import { ModelRepository, TrainingId, TrainingRepository } from '../../infrastructure' +import { TrainingAlreadyStartedError, TrainingNotFoundError } from '../errors' +import { TrainHandler } from './train-handler' +import { TrainTaskRepo } from './train-task-repo' +import { TrainTaskQueue, TrainTaskQueueOptions } from './typings' + +export const MIN_TRAINING_HEARTBEAT = ms('10s') +export const MAX_TRAINING_HEARTBEAT = MIN_TRAINING_HEARTBEAT * 3 +export const TRAINING_PROGRESS_THROTTLE = MIN_TRAINING_HEARTBEAT / 2 + +const TASK_OPTIONS: TrainTaskQueueOptions = { + queueId: 'training', + maxTasks: 2, + initialData: {}, + initialProgress: { start: 0, end: 100, current: 0 }, + maxProgressDelay: MAX_TRAINING_HEARTBEAT, + progressThrottle: TRAINING_PROGRESS_THROTTLE +} + +export type TrainQueueOptions = { + maxTraining?: number +} + +const TRAINING_PREFIX = 'training-queue' + +export const idToString = (id: TrainingId) => { + const { appId, modelId } = id + const stringModelId = NLUEngine.modelIdService.toString(modelId) + return `${appId}/${stringModelId}` +} + +export abstract class TrainingQueue { + constructor(private trainingRepo: TrainingRepository, private taskQueue: TrainTaskQueue, private logger: Logger) {} + + public addListener = this.trainingRepo.addListener.bind(this.trainingRepo) + public removeListener = this.trainingRepo.removeListener.bind(this.trainingRepo) + public initialize = this.taskQueue.initialize.bind(this.taskQueue) + public teardown = this.taskQueue.teardown.bind(this.taskQueue) + public getLocalTrainingCount = this.taskQueue.getLocalTaskCount.bind(this.taskQueue) + + public queueTraining = async (appId: string, modelId: NLUEngine.ModelId, trainInput: TrainInput) => { + const trainId: TrainingId = { modelId, appId } + const trainKey: string = idToString(trainId) + try { + await this.taskQueue.queueTask(trainId, trainInput) + this.logger.info(`[${trainKey}] Training Queued.`) + } catch (thrown) { + if (thrown instanceof q.TaskAlreadyStartedError) { + throw new TrainingAlreadyStartedError(appId, modelId) + } + throw thrown + } + } + + public async cancelTraining(appId: string, modelId: NLUEngine.ModelId): Promise { + try { + await this.taskQueue.cancelTask({ modelId, appId }) + } catch (thrown) { + if (thrown instanceof q.TaskNotFoundError || thrown instanceof q.TaskNotRunning) { + throw new TrainingNotFoundError(appId, modelId) + } + throw thrown + } + } +} + +export class PgTrainingQueue extends TrainingQueue { + constructor( + pgURL: string, + trainingRepo: TrainingRepository, + engine: NLUEngine.Engine, + modelRepo: ModelRepository, + baseLogger: Logger, + opt: TrainQueueOptions = {} + ) { + const trainingLogger = baseLogger.sub('training-queue') + const trainTaskRepo = new TrainTaskRepo(trainingRepo) + const trainHandler = new TrainHandler(engine, modelRepo, trainingLogger) + + const options = + opt.maxTraining === undefined + ? TASK_OPTIONS + : { + ...TASK_OPTIONS, + maxTasks: opt.maxTraining + } + + const taskQueue = new q.PGDistributedTaskQueue( + pgURL, + trainTaskRepo, + trainHandler, + trainingLogger, + idToString, + options + ) + super(trainingRepo, taskQueue, trainingLogger) + } +} + +export class LocalTrainingQueue extends TrainingQueue { + constructor( + trainingRepo: TrainingRepository, + engine: NLUEngine.Engine, + modelRepo: ModelRepository, + baseLogger: Logger, + opt: TrainQueueOptions = {} + ) { + const trainingLogger = baseLogger.sub(TRAINING_PREFIX) + const trainTaskRepo = new TrainTaskRepo(trainingRepo) + const trainHandler = new TrainHandler(engine, modelRepo, trainingLogger) + + const options = opt.maxTraining + ? { + ...TASK_OPTIONS, + maxTasks: opt.maxTraining + } + : TASK_OPTIONS + + const taskQueue = new q.LocalTaskQueue(trainTaskRepo, trainHandler, trainingLogger, idToString, options) + super(trainingRepo, taskQueue, trainingLogger) + } +} diff --git a/packages/nlu-server/src/application/training-queue/train-handler.ts b/packages/nlu-server/src/application/training-queue/train-handler.ts new file mode 100644 index 00000000..edaaa0dd --- /dev/null +++ b/packages/nlu-server/src/application/training-queue/train-handler.ts @@ -0,0 +1,87 @@ +import { TrainingErrorType } from '@botpress/nlu-client' +import * as NLUEngine from '@botpress/nlu-engine' +import { Logger } from '@bpinternal/log4bot' +import _ from 'lodash' +import { ModelRepository, TrainingId } from '../../infrastructure' +import { serializeModel } from '../serialize-model' +import { idToString, MIN_TRAINING_HEARTBEAT } from '.' +import { TerminatedTrainTask, TrainTask, TrainTaskProgress, TrainTaskRunner } from './typings' + +const MAX_MODEL_PER_USER_PER_LANG = 1 + +export class TrainHandler implements TrainTaskRunner { + constructor(private engine: NLUEngine.Engine, private modelRepo: ModelRepository, private logger: Logger) {} + + public run = async (task: TrainTask, progressCb: TrainTaskProgress): Promise => { + const trainKey = idToString(task) + + this.logger.debug(`training "${trainKey}" is about to start.`) + + const startTime = new Date() + + const { input, appId } = task + try { + const model = await this.engine.train(trainKey, input, { + progressCallback: (p: number) => progressCb({ start: 0, end: 100, current: p }), + minProgressHeartbeat: MIN_TRAINING_HEARTBEAT + }) + + const modelBuffer = serializeModel(model) + const { language: languageCode } = input + + const keep = MAX_MODEL_PER_USER_PER_LANG - 1 // TODO: make the max amount of models on FS (by appId + lang) configurable + await this.modelRepo.pruneModels(appId, { keep }, { languageCode }) + await this.modelRepo.saveModel(appId, model.id, modelBuffer) + + task.data.trainingTime = this._getTrainingTime(startTime) + + this.logger.info(`[${trainKey}] Training Done.`) + return { ...task, status: 'done' } + } catch (thrownObject) { + const err = thrownObject instanceof Error ? thrownObject : new Error(`${thrownObject}`) + + if (err instanceof NLUEngine.errors.TrainingCanceledError) { + this.logger.info(`[${trainKey}] Training Canceled.`) + + task.data.trainingTime = this._getTrainingTime(startTime) + return { ...task, status: 'canceled' } + } + + if (err instanceof NLUEngine.errors.TrainingAlreadyStartedError) { + this.logger.warn(`[${trainKey}] Training Already Started.`) // This should never occur. + return + } + + let type: TrainingErrorType = 'internal' + if (err instanceof NLUEngine.errors.LangServerError) { + type = 'lang-server' + this.logger.attachError(err).error(`[${trainKey}] Error occured with Language Server.`) + } + + if (err instanceof NLUEngine.errors.DucklingServerError) { + type = 'duckling-server' + this.logger.attachError(err).error(`[${trainKey}] Error occured with Duckling Server.`) + } + + task.data = { trainingTime: this._getTrainingTime(startTime) } + const { message, stack } = err + task.error = { message, stack, type } + + if (type === 'internal') { + this.logger.attachError(err as Error).error(`[${trainKey}] Error occured during training.`) + } + + return { ...task, status: 'errored' } + } + } + + public cancel(task: TrainingId): Promise { + const trainKey = idToString(task) + return this.engine.cancelTraining(trainKey) + } + + private _getTrainingTime(startTime: Date) { + const endTime = new Date() + return endTime.getTime() - startTime.getTime() + } +} diff --git a/packages/nlu-server/src/application/training-queue/train-task-repo.ts b/packages/nlu-server/src/application/training-queue/train-task-repo.ts new file mode 100644 index 00000000..11883598 --- /dev/null +++ b/packages/nlu-server/src/application/training-queue/train-task-repo.ts @@ -0,0 +1,113 @@ +import * as q from '@botpress/distributed' +import { TrainingError, TrainingStatus } from '@botpress/nlu-client' +import _ from 'lodash' +import { Training, TrainingId, TrainingRepository } from '../../infrastructure' +import { MAX_TRAINING_HEARTBEAT } from '.' +import { TrainTask, TrainTaskRepository } from './typings' + +/** Maps tasks to trainings */ + +const zombieError: TrainingError = { + type: 'zombie-training', + message: `Zombie Training: Training has not been updated in more than ${MAX_TRAINING_HEARTBEAT} ms.` +} + +const mapTrainStatusToTaskStatus = (trainStatus: TrainingStatus): q.TaskStatus => { + if (trainStatus === 'training') { + return 'running' + } + if (trainStatus === 'training-pending') { + return 'pending' + } + return trainStatus +} + +const mapTaskStatusToTrainStatus = (taskStatus: Exclude): TrainingStatus => { + if (taskStatus === 'running') { + return 'training' + } + if (taskStatus === 'pending') { + return 'training-pending' + } + return taskStatus +} + +const mapTrainingToTask = (training: Training): TrainTask => { + const { appId, modelId, status, cluster, progress, dataset, trainingTime, error } = training + const isZombie = error?.type === 'zombie-training' + + return { + appId, + modelId, + cluster, + status: isZombie ? 'zombie' : mapTrainStatusToTaskStatus(status), + progress: { start: 0, end: 100, current: progress }, + data: { trainingTime }, + input: dataset, + error: isZombie ? undefined : error + } +} + +const mapTaskQueryToTrainingQuery = (task: Partial): Partial => { + const { appId, modelId, status, cluster, progress, input, data, error } = task + const { trainingTime } = data ?? {} + const isZombie = status === 'zombie' + + return _.pickBy( + { + appId, + modelId, + cluster, + status: isZombie ? 'errored' : status && mapTaskStatusToTrainStatus(status), + trainingTime, + dataset: input, + progress: progress?.current, + error: isZombie ? zombieError : error + }, + (x) => x !== undefined + ) +} + +const mapTaskToTraining = (task: TrainTask): Training => { + const { appId, modelId, input, data, error, status, cluster, progress } = task + const { trainingTime } = data + const isZombie = status === 'zombie' + + return { + appId, + modelId, + cluster, + status: isZombie ? 'errored' : mapTaskStatusToTrainStatus(status), + trainingTime, + dataset: input, + progress: progress.current, + error: isZombie ? zombieError : error + } +} + +export class TrainTaskRepo implements TrainTaskRepository { + constructor(private _trainRepo: TrainingRepository) {} + public has = this._trainRepo.has.bind(this._trainRepo) + + public async get(taskId: TrainingId): Promise { + const training = await this._trainRepo.get(taskId) + return training && mapTrainingToTask(training) + } + + public async query(taskQuery: Partial): Promise { + const trainQuery = mapTaskQueryToTrainingQuery(taskQuery) + const trainings = await this._trainRepo.query(trainQuery) + return trainings.map(mapTrainingToTask) + } + + public async queryOlderThan(taskQuery: Partial, threshold: Date): Promise { + const trainQuery = mapTaskQueryToTrainingQuery(taskQuery) + const trainings = await this._trainRepo.queryOlderThan(trainQuery, threshold) + return trainings.map(mapTrainingToTask) + } + + public async set(task: TrainTask): Promise { + const training = mapTaskToTraining(task) + return this._trainRepo.set(training) + } +} diff --git a/packages/nlu-server/src/application/training-queue/typings.ts b/packages/nlu-server/src/application/training-queue/typings.ts new file mode 100644 index 00000000..0faecb78 --- /dev/null +++ b/packages/nlu-server/src/application/training-queue/typings.ts @@ -0,0 +1,15 @@ +import * as q from '@botpress/distributed' +import { TrainingError, TrainInput } from '@botpress/nlu-client' +import { TrainingId } from '../../infrastructure' + +type TrainData = { + trainingTime?: number +} + +export type TrainTask = q.Task +export type TerminatedTrainTask = q.TerminatedTask +export type TrainTaskRunner = q.TaskRunner +export type TrainTaskProgress = q.ProgressCb +export type TrainTaskRepository = q.TaskRepository +export type TrainTaskQueue = q.TaskQueue +export type TrainTaskQueueOptions = q.QueueOptions diff --git a/packages/nlu-server/src/bootstrap/banner.ts b/packages/nlu-server/src/bootstrap/banner.ts index 455278a8..b651e78a 100644 --- a/packages/nlu-server/src/bootstrap/banner.ts +++ b/packages/nlu-server/src/bootstrap/banner.ts @@ -1,15 +1,15 @@ -import { centerText, Logger } from '@botpress/logger' +import { Logger } from '@bpinternal/log4bot' import chalk from 'chalk' import _ from 'lodash' import moment from 'moment' -interface BuildMetadata { +type BuildMetadata = { date: number branch: string } -interface BannerConfig { +type BannerConfig = { title: string version: string buildInfo?: BuildMetadata @@ -18,6 +18,11 @@ interface BannerConfig { logger: Logger } +const centerText = (text: string, width: number, indent: number = 0) => { + const padding = Math.floor((width - text.length) / 2) + return _.repeat(' ', padding + indent) + text + _.repeat(' ', padding) +} + export const showBanner = (config: BannerConfig) => { const { title, version, buildInfo, logScopeLength, bannerWidth, logger } = config diff --git a/packages/nlu-server/src/bootstrap/config.ts b/packages/nlu-server/src/bootstrap/config.ts index 4dbab8a8..da9776c9 100644 --- a/packages/nlu-server/src/bootstrap/config.ts +++ b/packages/nlu-server/src/bootstrap/config.ts @@ -1,126 +1,38 @@ import bytes from 'bytes' -import fse from 'fs-extra' import { getAppDataPath } from '../app-data' - -interface LanguageSource { - endpoint: string - authToken?: string -} - -interface BaseOptions { - host: string - port: number - limitWindow: string - limit: number - bodySize: string - batchSize: number - modelCacheSize: string - dbURL?: string - modelDir: string - verbose: number - doc: boolean - logFilter?: string[] - apmEnabled?: boolean - apmSampleRate?: number - maxTraining: number -} - -export type CommandLineOptions = Partial & { - languageURL: string - languageAuthToken?: string - ducklingURL: string - ducklingEnabled: boolean - config?: string -} - -export type NLUServerOptions = BaseOptions & { - languageSources: LanguageSource[] // when passed by env variable, there can be more than one lang server - ducklingURL: string - ducklingEnabled: boolean - legacyElection: boolean // not available from CLI -} +import { NLUServerOptions, CommandLineOptions } from '../typings' const DEFAULT_OPTIONS = (): NLUServerOptions => ({ - host: 'localhost', port: 3200, + host: 'localhost', + modelDir: getAppDataPath(), + dbURL: undefined, limit: 0, limitWindow: '1h', - bodySize: '2mb', - batchSize: 1, - languageSources: [{ endpoint: 'https://lang-01.botpress.io' }], + languageURL: 'https://lang-01.botpress.io', + languageAuthToken: undefined, + apmEnabled: false, + apmSampleRate: undefined, ducklingURL: 'https://duckling.botpress.io', ducklingEnabled: true, + bodySize: '2mb', + modelSize: '1gb', + batchSize: -1, modelCacheSize: '2.5gb', - verbose: 3, doc: false, - logFilter: undefined, - legacyElection: false, - modelDir: getAppDataPath(), - maxTraining: 2 + logLevel: 'info', + prometheusEnabled: false, + debugFilter: undefined, + logFormat: 'text', + maxTraining: 2, + maxLinting: 2, + modelTransferEnabled: false }) -export type ConfigSource = 'environment' | 'cli' | 'file' - -const _mapCli = (c: CommandLineOptions): Partial => { - const { ducklingEnabled, ducklingURL, languageURL, languageAuthToken } = c - return { - ...c, - languageSources: [ - { - endpoint: languageURL, - authToken: languageAuthToken - } - ], - ducklingEnabled, - ducklingURL, - legacyElection: false - } -} - -const readEnvJSONConfig = (): NLUServerOptions | null => { - const rawContent = process.env.NLU_SERVER_CONFIG - if (!rawContent) { - return null - } - try { - const parsedContent = JSON.parse(rawContent) - const defaults = DEFAULT_OPTIONS() - return { ...defaults, ...parsedContent } - } catch { - return null - } -} - -const readFileConfig = async (configPath: string): Promise => { - try { - const rawContent = await fse.readFile(configPath, 'utf8') - const parsedContent = JSON.parse(rawContent) - const defaults = DEFAULT_OPTIONS() - return { ...defaults, ...parsedContent } - } catch (err) { - const e = new Error(`The following errored occured when reading config file "${configPath}": ${err.message}`) - e.stack = err.stack - throw e - } -} - -export const getConfig = async ( - c: CommandLineOptions -): Promise<{ options: NLUServerOptions; source: ConfigSource }> => { - const envConfig = readEnvJSONConfig() - if (envConfig) { - return { options: envConfig, source: 'environment' } - } - - if (c.config) { - const options = await readFileConfig(c.config) - return { options, source: 'file' } - } - - const cliOptions = _mapCli(c) +export const getConfig = async (cliOptions: CommandLineOptions): Promise => { const defaults = DEFAULT_OPTIONS() const options: NLUServerOptions = { ...defaults, ...cliOptions } - return { options, source: 'cli' } + return options } export const validateConfig = (options: NLUServerOptions) => { diff --git a/packages/nlu-server/src/bootstrap/documentation.ts b/packages/nlu-server/src/bootstrap/documentation.ts index 2108d1db..23b8dc66 100644 --- a/packages/nlu-server/src/bootstrap/documentation.ts +++ b/packages/nlu-server/src/bootstrap/documentation.ts @@ -1,23 +1,22 @@ -import { Logger } from '@botpress/logger' +import { Logger } from '@bpinternal/log4bot' import chalk from 'chalk' -import { NLUServerOptions } from './config' +import { NLUServerOptions } from '../typings' -const GH_TYPINGS_FILE = 'https://github.com/botpress/nlu/blob/master/packages/nlu-client/src/typings/sdk.d.ts' const GH_TRAIN_INPUT_EXAMPLE = 'https://github.com/botpress/nlu/blob/master/packages/nlu-server/examples/train-example.json' const NPM_NLU_CLIENT = 'https://www.npmjs.com/package/@botpress/nlu-client' export const displayDocumentation = (logger: Logger, options: NLUServerOptions) => { const { host, port } = options - const baseUrl = `http://${host}:${port}/v1` + const baseUrl = `http://${host}:${port}/` logger.info(chalk` -{bold {underline Available Routes}} +{bold {underline Important Routes}} {green /** * Gets the current version of the NLU engine being used. Usefull to test if your installation is working. - * @returns {bold info}: version, health and supported languages. + * @returns {bold info}: version and supported languages. */} {bold GET ${baseUrl}/info} @@ -33,14 +32,6 @@ export const displayDocumentation = (logger: Logger, options: NLUServerOptions) */} {bold POST ${baseUrl}/train} -{green /** - * List all trainings. - * @header {bold x-app-id} Application ID to make sure there's no collision between models of different applications. - * @query_parameter {bold lang} Language code to filter trainings. {yellow ** Optionnal **} - * @returns {bold trainings} List of all trainings for your app id. - */} -{bold GET ${baseUrl}/train} - {green /** * Gets a training progress status. * @header {bold x-app-id} Application ID to make sure there's no collision between models of different applications. @@ -49,22 +40,6 @@ export const displayDocumentation = (logger: Logger, options: NLUServerOptions) */} {bold GET ${baseUrl}/train/:modelId} -{green /** - * List all models for a given app Id and secret. - * @header {bold x-app-id} Application ID to make sure there's no collision between models of different applications. - * @path_parameter {bold modelId} The model id for which you seek the training progress. - * @returns {bold models} Array of strings model ids available for prediction. - */} -{bold GET ${baseUrl}/models/:modelId} - -{green /** - * Cancels a training. - * @header {bold x-app-id} Application ID to make sure there's no collision between models of different applications. - * @path_parameter {bold modelId} The model id for which you want to cancel the training. - * @returns {bold models} Array of strings model ids that where pruned. - */} -{bold POST ${baseUrl}/models/prune} - {green /** * Cancels a training. * @header {bold x-app-id} Application ID to make sure there's no collision between models of different applications. @@ -81,19 +56,6 @@ export const displayDocumentation = (logger: Logger, options: NLUServerOptions) */} {bold POST ${baseUrl}/predict/:modelId} -{green /** - * Perform prediction for a text input. - * @header {bold x-app-id} Application ID to make sure there's no collision between models of different applications. - * @path_parameter {bold modelId} The model id you want to use for prediction. - * @body_parameter {bold utterances} Array of text for which you want a prediction. - * @body_parameter {bold models} Array of strings model ids you want to use to detect language. {yellow ** Optionnal **} - * @returns {bold detectedLanguages} Array of string language codes. - */} -{bold POST ${baseUrl}/detect-lang} - -{bold For more detailed information on typings, see -${GH_TYPINGS_FILE}}. - {bold For a complete example on training input, see ${GH_TRAIN_INPUT_EXAMPLE}}. diff --git a/packages/nlu-server/src/bootstrap/launcher.ts b/packages/nlu-server/src/bootstrap/launcher.ts index c40b2430..bc2594c7 100644 --- a/packages/nlu-server/src/bootstrap/launcher.ts +++ b/packages/nlu-server/src/bootstrap/launcher.ts @@ -1,17 +1,14 @@ -import { Logger } from '@botpress/logger' +import { Logger } from '@bpinternal/log4bot' import chalk from 'chalk' import _ from 'lodash' import ms from 'ms' -import { BuildInfo } from '../typings' +import { BuildInfo, NLUServerOptions } from '../typings' import { showBanner } from './banner' -import { ConfigSource, NLUServerOptions } from './config' import { displayDocumentation } from './documentation' -interface LaunchingInfo { +type LaunchingInfo = { version: string buildInfo?: BuildInfo - configSource: ConfigSource - configFile?: string } export const logLaunchingMessage = async (info: NLUServerOptions & LaunchingInfo, launcherLogger: Logger) => { @@ -19,17 +16,11 @@ export const logLaunchingMessage = async (info: NLUServerOptions & LaunchingInfo title: 'Botpress Standalone NLU', version: info.version, buildInfo: info.buildInfo, - logScopeLength: 9, + logScopeLength: 0, bannerWidth: 75, logger: launcherLogger }) - if (info.configSource === 'environment') { - launcherLogger.info('Loading config from environment variables') - } else if (info.configSource === 'file') { - launcherLogger.info(`Loading config from file "${info.configFile}"`) - } - if (info.limit) { launcherLogger.info( `limit: ${chalk.greenBright('enabled')} allowing ${info.limit} requests/IP address in a ${ @@ -45,9 +36,7 @@ export const logLaunchingMessage = async (info: NLUServerOptions & LaunchingInfo } else { launcherLogger.info(`duckling: ${chalk.redBright('disabled')}`) } - for (const langSource of info.languageSources) { - launcherLogger.info(`lang server: url=${langSource.endpoint}`) - } + launcherLogger.info(`lang server: url=${info.languageURL}`) launcherLogger.info(`body size: allowing HTTP requests body of size ${info.bodySize}`) @@ -57,10 +46,16 @@ export const logLaunchingMessage = async (info: NLUServerOptions & LaunchingInfo launcherLogger.info(`models stored at "${info.modelDir}"`) } - if (info.batchSize > 0) { + if (info.batchSize > 1) { launcherLogger.info(`batch size: allowing up to ${info.batchSize} predictions in one call to POST /predict`) } + if (info.modelTransferEnabled) { + launcherLogger.info(`model transfer: ${chalk.greenBright('enabled')}`) + } else { + launcherLogger.info(`model transfer: ${chalk.redBright('disabled')}`) + } + if (info.doc) { const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)) await sleep(ms('1s')) diff --git a/packages/nlu-server/src/bootstrap/make-application.ts b/packages/nlu-server/src/bootstrap/make-application.ts index a13c18f3..13cb7a5e 100644 --- a/packages/nlu-server/src/bootstrap/make-application.ts +++ b/packages/nlu-server/src/bootstrap/make-application.ts @@ -1,71 +1,95 @@ -import { makePostgresTrxQueue } from '@botpress/locks' -import { Logger } from '@botpress/logger' -import chokidar from 'chokidar' -import knex from 'knex' -import { nanoid } from 'nanoid' -import PGPubsub from 'pg-pubsub' +import { Engine } from '@botpress/nlu-engine' +import { Logger } from '@bpinternal/log4bot' +import Knex from 'knex' import { Application } from '../application' -import { DistributedTrainingQueue } from '../application/distributed-training-queue' -import TrainingQueue from '../application/training-queue' -import { makeGhost } from '../infrastructure/make-ghost' -import { ModelRepository } from '../infrastructure/model-repo' -import { DbTrainingRepository } from '../infrastructure/training-repo/db-training-repo' -import InMemoryTrainingRepo from '../infrastructure/training-repo/in-memory-training-repo' -import { Broadcaster } from '../utils/broadcast' -import { NLUServerOptions } from './config' +import { LintingQueue, LocalLintingQueue, PgLintingQueue } from '../application/linting-queue' +import { TrainQueueOptions, TrainingQueue, PgTrainingQueue, LocalTrainingQueue } from '../application/training-queue' +import { + DbTrainingRepository, + InMemoryTrainingRepo, + TrainingRepository, + FileSystemModelRepository, + DbModelRepository, + ModelRepository +} from '../infrastructure' +import { InMemoryLintingRepo, LintingRepository, DatabaseLintingRepo } from '../infrastructure/linting-repo' +import { NLUServerOptions } from '../typings' import { makeEngine } from './make-engine' -const CLUSTER_ID = nanoid() +type Services = { + modelRepo: ModelRepository + trainRepo: TrainingRepository + trainingQueue: TrainingQueue + lintingRepo: LintingRepository + lintingQueue: LintingQueue +} -const makeKnexDb = (dbURL: string) => { - return knex({ - connection: dbURL, - client: 'pg' - }) +const makeServicesWithoutDb = (modelDir: string) => async ( + engine: Engine, + logger: Logger, + queueOptions?: Partial +): Promise => { + const modelRepo = new FileSystemModelRepository(modelDir, logger) + const trainRepo = new InMemoryTrainingRepo(logger) + const trainingQueue = new LocalTrainingQueue(trainRepo, engine, modelRepo, logger, queueOptions) + const lintingRepo = new InMemoryLintingRepo(logger) + const lintingQueue = new LocalLintingQueue(lintingRepo, engine, logger) + return { + modelRepo, + trainRepo, + trainingQueue, + lintingRepo, + lintingQueue + } } -const makeBroadcaster = (dbURL: string) => { - const dummyLogger = () => {} - const pubsub = new PGPubsub(dbURL, { - log: dummyLogger - }) - return new Broadcaster(pubsub) +const makeServicesWithDb = (dbURL: string) => async ( + engine: Engine, + logger: Logger, + queueOptions?: Partial +): Promise => { + const knexDb = Knex({ connection: dbURL, client: 'pg' }) + + const modelRepo = new DbModelRepository(knexDb, logger) + const trainRepo = new DbTrainingRepository(knexDb, logger) + const trainingQueue = new PgTrainingQueue(dbURL, trainRepo, engine, modelRepo, logger, queueOptions) + const lintingRepo = new DatabaseLintingRepo(knexDb, logger, engine) + const lintingQueue = new PgLintingQueue(dbURL, lintingRepo, engine, logger) + return { + modelRepo, + trainRepo, + trainingQueue, + lintingRepo, + lintingQueue + } } export const makeApplication = async ( options: NLUServerOptions, serverVersion: string, - baseLogger: Logger, - watcher: chokidar.FSWatcher + baseLogger: Logger ): Promise => { const engine = await makeEngine(options, baseLogger.sub('Engine')) - const { dbURL, modelDir } = options + const serviceMaker = dbURL ? makeServicesWithDb(dbURL) : makeServicesWithoutDb(modelDir) + const { modelRepo, trainRepo, trainingQueue, lintingRepo, lintingQueue } = await serviceMaker( + engine, + baseLogger, + options + ) - const ghost = makeGhost(baseLogger, modelDir, watcher, dbURL) - await ghost.initialize(!!dbURL) - - const modelRepo = new ModelRepository(ghost, baseLogger) - - const loggingCb = (msg: string) => baseLogger.sub('trx-queue').debug(msg) - - const trainRepo = dbURL - ? new DbTrainingRepository(makeKnexDb(dbURL), makePostgresTrxQueue(dbURL, loggingCb), baseLogger, CLUSTER_ID) - : new InMemoryTrainingRepo(baseLogger) - - const trainingQueue = dbURL - ? new DistributedTrainingQueue( - engine, - modelRepo, - trainRepo, - CLUSTER_ID, - baseLogger, - makeBroadcaster(dbURL), - options - ) - : new TrainingQueue(engine, modelRepo, trainRepo, CLUSTER_ID, baseLogger) - - const application = new Application(modelRepo, trainRepo, trainingQueue, engine, serverVersion, baseLogger) + const { modelTransferEnabled } = options + const application = new Application( + modelRepo, + trainRepo, + lintingRepo, + trainingQueue, + lintingQueue, + engine, + serverVersion, + baseLogger, + { modelTransferEnabled } + ) await application.initialize() return application } diff --git a/packages/nlu-server/src/bootstrap/make-engine.ts b/packages/nlu-server/src/bootstrap/make-engine.ts index d7387c36..9ec11538 100644 --- a/packages/nlu-server/src/bootstrap/make-engine.ts +++ b/packages/nlu-server/src/bootstrap/make-engine.ts @@ -1,8 +1,8 @@ -import { Logger } from '@botpress/logger' import * as NLUEngine from '@botpress/nlu-engine' +import { Logger } from '@bpinternal/log4bot' import _ from 'lodash' import { getAppDataPath } from '../app-data' -import { NLUServerOptions } from './config' +import { NLUServerOptions } from '../typings' const wrapLogger = (logger: Logger): NLUEngine.Logger => { return { @@ -17,25 +17,15 @@ const wrapLogger = (logger: Logger): NLUEngine.Logger => { export const makeEngine = async (options: NLUServerOptions, logger: Logger) => { const loggerWrapper: NLUEngine.Logger = wrapLogger(logger) - try { - const { ducklingEnabled, ducklingURL, modelCacheSize, languageSources, legacyElection } = options - const config: NLUEngine.Config = { - languageSources, - ducklingEnabled, - ducklingURL, - modelCacheSize, - legacyElection, - cachePath: getAppDataPath() - } - - const engine = await NLUEngine.makeEngine(config, loggerWrapper) - return engine - } catch (err) { - logger - .attachError(err) - .error( - 'There was an error while initializing Engine tools. Check out the connection to your language and Duckling server.' - ) - throw err + const { ducklingEnabled, ducklingURL, modelCacheSize, languageURL, languageAuthToken } = options + const config: NLUEngine.Config = { + languageURL, + languageAuthToken, + ducklingEnabled, + ducklingURL, + modelCacheSize, + cachePath: getAppDataPath() } + + return NLUEngine.makeEngine(config, loggerWrapper) } diff --git a/packages/nlu-server/src/bootstrap/server-listen.ts b/packages/nlu-server/src/bootstrap/server-listen.ts new file mode 100644 index 00000000..5a689016 --- /dev/null +++ b/packages/nlu-server/src/bootstrap/server-listen.ts @@ -0,0 +1,15 @@ +import { Server } from 'http' +import * as types from '../typings' + +export const serverListen = (httpServer: Server, options: types.NLUServerOptions): Promise => { + return new Promise((resolve, reject) => { + try { + const hostname = options.host === 'localhost' ? undefined : options.host + httpServer.listen(options.port, hostname, undefined, () => { + resolve() + }) + } catch (err) { + reject(err) + } + }) +} diff --git a/packages/nlu-server/src/bootstrap/watcher.ts b/packages/nlu-server/src/bootstrap/watcher.ts deleted file mode 100644 index 8bd65d44..00000000 --- a/packages/nlu-server/src/bootstrap/watcher.ts +++ /dev/null @@ -1,14 +0,0 @@ -import chokidar from 'chokidar' -import path from 'path' -import { getProjectLocation } from '../project-location' - -export const buildWatcher = () => { - const projectLocation = getProjectLocation() - - const foldersToWatch = [path.join(projectLocation, 'data', 'bots'), path.join(projectLocation, 'data', 'global')] - - return chokidar.watch(foldersToWatch, { - ignoreInitial: true, - ignorePermissionErrors: true - }) -} diff --git a/packages/nlu-server/src/index.ts b/packages/nlu-server/src/index.ts index b098cb04..ba0a6f58 100644 --- a/packages/nlu-server/src/index.ts +++ b/packages/nlu-server/src/index.ts @@ -1,16 +1,16 @@ -import { LoggerLevel, makeLogger } from '@botpress/logger' -import Bluebird from 'bluebird' +import { Logger, JSONFormatter, TextFormatter } from '@bpinternal/log4bot' import { createServer } from 'http' import _ from 'lodash' import path from 'path' import { createAPI } from './api' -import { CommandLineOptions, getConfig, validateConfig } from './bootstrap/config' +import { getConfig, validateConfig } from './bootstrap/config' import { logLaunchingMessage } from './bootstrap/launcher' import { makeApplication } from './bootstrap/make-application' -import { buildWatcher } from './bootstrap/watcher' +import { serverListen } from './bootstrap/server-listen' import { requireJSON } from './require-json' import * as types from './typings' +import { listenForUncaughtErrors } from './uncaught-errors' const packageJsonPath = path.resolve(__dirname, '../package.json') const buildInfoPath = path.resolve(__dirname, '../.buildinfo.json') @@ -22,39 +22,35 @@ if (!packageJson) { const { version: pkgVersion } = packageJson +export * from './typings' export const version = pkgVersion -export const run: typeof types.run = async (cliOptions: CommandLineOptions) => { - const { options, source: configSource } = await getConfig(cliOptions) +export const run = async (cliOptions: types.CommandLineOptions) => { + const options = await getConfig(cliOptions) validateConfig(options) - const baseLogger = makeLogger({ - level: Number(options.verbose) !== NaN ? Number(options.verbose) : LoggerLevel.Info, - minLevel: LoggerLevel.Error, - filters: options.logFilter + const formatter = options.logFormat === 'json' ? new JSONFormatter() : new TextFormatter() + const baseLogger = new Logger('', { + level: options.logLevel, + filters: options.debugFilter ? { debug: options.debugFilter } : {}, + prefix: 'NLU', + formatter }) const launcherLogger = baseLogger.sub('Launcher') launcherLogger.configure({ - minLevel: LoggerLevel.Info // Launcher always display + level: 'info' // Launcher always display }) - const watcher = buildWatcher() + await logLaunchingMessage({ ...options, version, buildInfo }, launcherLogger) - const launchingMessageInfo = { ...options, version, buildInfo, configSource, configFile: cliOptions.config } - await logLaunchingMessage(launchingMessageInfo, launcherLogger) - - const application = await makeApplication(options, version, baseLogger, watcher) + const application = await makeApplication(options, version, baseLogger) const app = await createAPI(options, application, baseLogger) const httpServer = createServer(app) - - await Bluebird.fromCallback((callback) => { - const hostname = options.host === 'localhost' ? undefined : options.host - httpServer.listen(options.port, hostname, undefined, () => { - callback(null) - }) - }) + await serverListen(httpServer, options) const url = `http://${options.host}:${options.port}/` launcherLogger.info(`NLU Server is ready at ${url}. Make sure this URL is not publicly available.`) + + listenForUncaughtErrors(baseLogger) } diff --git a/packages/nlu-server/src/infrastructure/database-utils.ts b/packages/nlu-server/src/infrastructure/database-utils.ts new file mode 100644 index 00000000..0ba0e371 --- /dev/null +++ b/packages/nlu-server/src/infrastructure/database-utils.ts @@ -0,0 +1,14 @@ +import { Knex } from 'knex' + +export const createTableIfNotExists = async ( + knex: Knex, + tableName: string, + cb: (tableBuilder: Knex.CreateTableBuilder) => void +): Promise => { + return knex.schema.hasTable(tableName).then((exists) => { + if (exists) { + return false + } + return knex.schema.createTable(tableName, cb).then(() => true) + }) +} diff --git a/packages/nlu-server/src/infrastructure/dataset-serializer.ts b/packages/nlu-server/src/infrastructure/dataset-serializer.ts new file mode 100644 index 00000000..8f8063c1 --- /dev/null +++ b/packages/nlu-server/src/infrastructure/dataset-serializer.ts @@ -0,0 +1,10 @@ +import { TrainInput } from '@botpress/nlu-client' +import jsonpack from 'jsonpack' + +export const packTrainSet = (ts: TrainInput): string => { + return jsonpack.pack(ts) +} + +export const unpackTrainSet = (compressed: string): TrainInput => { + return jsonpack.unpack(compressed) +} diff --git a/packages/nlu-server/src/infrastructure/ghost/database/db.ts b/packages/nlu-server/src/infrastructure/ghost/database/db.ts deleted file mode 100644 index 7b64d1af..00000000 --- a/packages/nlu-server/src/infrastructure/ghost/database/db.ts +++ /dev/null @@ -1,104 +0,0 @@ -import { Logger } from '@botpress/logger' -import Bluebird from 'bluebird' -import { mkdirpSync } from 'fs-extra' -import Knex from 'knex' -import _ from 'lodash' -import path from 'path' -import { getProjectLocation } from '../../../project-location' -import { patchKnex } from './helpers' - -import AllTables, { Table } from './tables' -import { KnexExtended } from './typings' - -export type DatabaseType = 'postgres' | 'sqlite' - -export class Database { - knex!: KnexExtended - - private tables: Table[] = [] - - public constructor(private logger: Logger, private dbURL?: string) {} - - async bootstrap() { - await Bluebird.mapSeries(AllTables, async (Tbl) => { - const table = new Tbl(this.knex!) - const created = await table.bootstrap() - if (created) { - this.logger.debug(`Created table '${table.name}'`) - } - this.tables.push(table) - }) - } - - async seedForTests() { - // Add seeding here - } - - async teardownTables() { - await Bluebird.mapSeries(AllTables, async (Tbl) => { - const table = new Tbl(this.knex!) - if (this.knex.isLite) { - await this.knex.raw('PRAGMA foreign_keys = OFF;') - await this.knex.raw(`DROP TABLE IF EXISTS "${table.name}";`) - await this.knex.raw('PRAGMA foreign_keys = ON;') - } else { - await this.knex.raw(`DROP TABLE IF EXISTS "${table.name}" CASCADE;`) - } - }) - } - - async initialize(databaseType: DatabaseType = 'postgres') { - const logger = this.logger - const { DATABASE_URL, DATABASE_POOL } = process.env - - let poolOptions: Knex.PoolConfig = {} - - try { - const customPoolOptions = DATABASE_POOL ? JSON.parse(DATABASE_POOL) : {} - poolOptions = { ...poolOptions, ...customPoolOptions } - } catch (err) { - this.logger.warn('Database pool option is not valid json') - } - - if (DATABASE_URL) { - if (!databaseType) { - databaseType = DATABASE_URL.toLowerCase().startsWith('postgres') ? 'postgres' : 'sqlite' - } - if (!this.dbURL) { - this.dbURL = DATABASE_URL - } - } - - let config: Knex.Config = { - useNullAsDefault: true, - log: { - error: (message) => logger.error(`[knex] ${message}`), - warn: (message) => logger.warn(`[knex] ${message}`), - debug: (message) => logger.debug(`[knex] ${message}`) - } - } - - if (databaseType === 'postgres') { - config = { ...config, client: 'pg', connection: this.dbURL, pool: poolOptions } - } else { - const projectLocation = getProjectLocation() - const dbLocation = this.dbURL ? this.dbURL : `${projectLocation}/data/storage/core.sqlite` - mkdirpSync(path.dirname(dbLocation)) - - Object.assign(config, { - client: 'sqlite3', - connection: { filename: dbLocation }, - pool: { - afterCreate: (conn, cb) => { - conn.run('PRAGMA foreign_keys = ON', cb) - }, - ...poolOptions - } - }) - } - - this.knex = patchKnex(Knex(config)) - - await this.bootstrap() - } -} diff --git a/packages/nlu-server/src/infrastructure/ghost/database/helpers.ts b/packages/nlu-server/src/infrastructure/ghost/database/helpers.ts deleted file mode 100644 index 7fc45213..00000000 --- a/packages/nlu-server/src/infrastructure/ghost/database/helpers.ts +++ /dev/null @@ -1,173 +0,0 @@ -import Knex from 'knex' -import moment from 'moment' -import { VError } from 'verror' -import { KnexExtended, KnexExtension } from './typings' - -export const patchKnex = (knex: Knex): KnexExtended => { - const isLite = knex.client.config.client === 'sqlite3' - const location = isLite ? knex.client.connectionSettings.filename : undefined - - const dateParse = (exp: string): Knex.Raw => { - return isLite ? knex.raw(`strftime('%Y-%m-%dT%H:%M:%fZ', ${exp})`) : knex.raw(exp) - } - - const dateFormat = (date: Date) => { - const iso = moment(date).toDate().toISOString() - return dateParse(`'${iso}'`) - } - - const columnOrDateFormat = (colOrDate: Knex.ColumnOrDate) => { - if ((colOrDate).sql) { - return (colOrDate).sql - } - - if (typeof colOrDate === 'string') { - return isLite ? dateParse(colOrDate) : `"${colOrDate}"` - } - - return dateFormat(colOrDate) - } - - const createTableIfNotExists = async (tableName: string, cb: Knex.KnexCallback): Promise => { - const exists = await knex.schema.hasTable(tableName) - if (exists) { - return false - } - await knex.schema.createTable(tableName, cb) - return true - } - - // only works for single insert because of SQLite - const insertAndRetrieve = async ( - tableName: string, - data: any, - returnColumns: string | string[] = 'id', - idColumnName: string = 'id', - trx?: Knex.Transaction - ): Promise => { - const handleResult = (res) => { - if (!res || res.length !== 1) { - throw new VError('Error doing insertAndRetrieve') - } - return res[0] as T - } - - // postgres supports 'returning' natively - if (!isLite) { - return knex(tableName).insert(data).returning(returnColumns).then(handleResult) - } - - const getQuery = (trx) => - knex(tableName) - .insert(data) - .transacting(trx) - .then(() => - knex - .select(knex.raw('last_insert_rowid() as id')) - .transacting(trx) - .then(([{ id: rowid }]) => { - let id = data && data.id - if (!id || idColumnName === 'rowid') { - id = rowid - } - - if (returnColumns === idColumnName) { - return id - } - return knex(tableName).select('*').where(idColumnName, id).limit(1).transacting(trx).then(handleResult) - }) - ) - - // transactions inside another transaction may lead to a deadlock - if (trx) { - return getQuery(trx) - } - - return knex.transaction((trx) => getQuery(trx).then(trx.commit).catch(trx.rollback)) - } - - const binary: Knex.Binary = { - set: (data: string | Buffer): any => { - if (isLite || typeof data !== 'string') { - return data - } - - return new Buffer(data, 'utf8') - } - } - - const date: Knex.Date = { - set: (date?: Date) => (date ? date.toISOString() : undefined), - get: (date) => new Date(date), - - format: dateFormat, - now: () => (isLite ? knex.raw("strftime('%Y-%m-%dT%H:%M:%fZ', 'now')") : knex.raw('now()')), - today: () => (isLite ? knex.raw('(date())') : knex.raw('(date(now()))')), - isBefore: (d1: Knex.ColumnOrDate, d2: Knex.ColumnOrDate): Knex.Raw => { - const exp1 = columnOrDateFormat(d1) - const exp2 = columnOrDateFormat(d2) - return knex.raw(`${exp1} < ${exp2}`) - }, - - isBeforeOrOn: (d1: Knex.ColumnOrDate, d2: Knex.ColumnOrDate): Knex.Raw => { - const exp1 = columnOrDateFormat(d1) - const exp2 = columnOrDateFormat(d2) - return knex.raw(`${exp1} <= ${exp2}`) - }, - - isAfter: (d1: Knex.ColumnOrDate, d2: Knex.ColumnOrDate): Knex.Raw => { - const exp1 = columnOrDateFormat(d1) - const exp2 = columnOrDateFormat(d2) - return knex.raw(`${exp1} > ${exp2}`) - }, - - isAfterOrOn: (d1: Knex.ColumnOrDate, d2: Knex.ColumnOrDate): Knex.Raw => { - const exp1 = columnOrDateFormat(d1) - const exp2 = columnOrDateFormat(d2) - return knex.raw(`${exp1} >= ${exp2}`) - }, - - isBetween: (date: Knex.ColumnOrDate, betweenA: Knex.ColumnOrDate, betweenB: Knex.ColumnOrDate): Knex.Raw => { - const exp1 = columnOrDateFormat(date) - const exp2 = columnOrDateFormat(betweenA) - const exp3 = columnOrDateFormat(betweenB) - - return knex.raw(`${exp1} BETWEEN ${exp2} AND ${exp3}`) - }, - - isSameDay: (d1: Knex.ColumnOrDate, d2: Knex.ColumnOrDate): Knex.Raw => { - const exp1 = columnOrDateFormat(d1) - const exp2 = columnOrDateFormat(d2) - return knex.raw(`date(${exp1}) = date(${exp2})`) - }, - - hourOfDay: (date: Knex.ColumnOrDate): Knex.Raw => { - const exp1 = columnOrDateFormat(date) - return isLite ? knex.raw(`strftime('%H', ${exp1})`) : knex.raw(`to_char(${exp1}, 'HH24')`) - } - } - - const bool: Knex.Bool = { - true: () => (isLite ? 1 : true), - false: () => (isLite ? 0 : false), - parse: (value) => (isLite ? !!value : value) - } - - const json: Knex.Json = { - set: (obj) => (isLite ? obj && JSON.stringify(obj) : obj), - get: (obj) => (isLite ? obj && JSON.parse(obj) : obj) - } - - const extensions: KnexExtension = { - isLite, - location, - binary, - date, - json, - bool, - createTableIfNotExists, - insertAndRetrieve - } - - return Object.assign(knex, extensions) -} diff --git a/packages/nlu-server/src/infrastructure/ghost/database/knex.d.ts b/packages/nlu-server/src/infrastructure/ghost/database/knex.d.ts deleted file mode 100644 index 40898379..00000000 --- a/packages/nlu-server/src/infrastructure/ghost/database/knex.d.ts +++ /dev/null @@ -1,45 +0,0 @@ -import 'knex' - -type OriginalDate = Date - -declare module 'knex' { - type ColumnOrDate = string | OriginalDate | Sql - - interface Date { - set(date?: OriginalDate): any - get(date: any): OriginalDate - - format(exp: any): Raw - now(): Raw - today(): Raw - isBefore(d1: ColumnOrDate, d2: ColumnOrDate): Raw - isBeforeOrOn(d1: ColumnOrDate, d2: ColumnOrDate): Raw - isAfter(d1: ColumnOrDate, d2: ColumnOrDate): Raw - isAfterOrOn(d1: ColumnOrDate, d2: ColumnOrDate): Raw - isBetween(date: ColumnOrDate, betweenA: ColumnOrDate, betweenB: ColumnOrDate): Raw - isSameDay(d1: ColumnOrDate, d2: ColumnOrDate): Raw - hourOfDay(date: ColumnOrDate): Raw - } - - interface Bool { - true(): any - false(): any - parse(value: any): boolean - } - - interface Json { - set(obj: any): any - get(obj: any): any - } - - interface Binary { - set(data: string | Buffer): any - } - - type KnexCallback = (tableBuilder: CreateTableBuilder) => any - - type GetOrCreateResult = Promise<{ - created: boolean - result: T - }> -} diff --git a/packages/nlu-server/src/infrastructure/ghost/database/tables.ts b/packages/nlu-server/src/infrastructure/ghost/database/tables.ts deleted file mode 100644 index b7028c9d..00000000 --- a/packages/nlu-server/src/infrastructure/ghost/database/tables.ts +++ /dev/null @@ -1,46 +0,0 @@ -import Knex from 'knex' -import { KnexExtended } from './typings' - -export abstract class Table { - constructor(public knex: KnexExtended) {} - abstract bootstrap(): Promise - abstract get name(): string -} - -class GhostFilesTable extends Table { - readonly name: string = 'srv_ghost_files' - - async bootstrap() { - let created = false - await this.knex.createTableIfNotExists(this.name, (table) => { - table.string('file_path').primary() - table.binary('content') - table.boolean('deleted') - table.timestamp('modified_on') - created = true - }) - return created - } -} - -class GhostRevisionsTable extends Table { - readonly name: string = 'srv_ghost_index' - - async bootstrap() { - let created = false - await this.knex.createTableIfNotExists(this.name, (table) => { - table.string('file_path') - table.string('revision') - table.string('created_by') - table.timestamp('created_on') - table.primary(['file_path', 'revision']) - created = true - }) - - return created - } -} - -const tables: typeof Table[] = [GhostFilesTable, GhostRevisionsTable] - -export default <(new (knex: Knex) => Table)[]>tables diff --git a/packages/nlu-server/src/infrastructure/ghost/database/typings.ts b/packages/nlu-server/src/infrastructure/ghost/database/typings.ts deleted file mode 100644 index 061fd9d6..00000000 --- a/packages/nlu-server/src/infrastructure/ghost/database/typings.ts +++ /dev/null @@ -1,19 +0,0 @@ -import Knex from 'knex' -export interface KnexExtension { - isLite: boolean - location: string - createTableIfNotExists(tableName: string, cb: Knex.KnexCallback): Promise - date: Knex.Date - bool: Knex.Bool - json: Knex.Json - binary: Knex.Binary - insertAndRetrieve( - tableName: string, - data: {}, - returnColumns?: string | string[], - idColumnName?: string, - trx?: Knex.Transaction - ): Promise -} - -export type KnexExtended = Knex & KnexExtension diff --git a/packages/nlu-server/src/infrastructure/ghost/db-driver.ts b/packages/nlu-server/src/infrastructure/ghost/db-driver.ts deleted file mode 100644 index f2b792ab..00000000 --- a/packages/nlu-server/src/infrastructure/ghost/db-driver.ts +++ /dev/null @@ -1,222 +0,0 @@ -import Bluebird from 'bluebird' -import _ from 'lodash' -import { nanoid } from 'nanoid' -import path from 'path' -import { VError } from 'verror' - -import { StorageDriver, DirectoryListingOptions, FileRevision } from '.' -import { Database } from './database/db' -import { filterByGlobs, forceForwardSlashes } from './misc' - -export class DBStorageDriver implements StorageDriver { - constructor(private database: Database) {} - - async initialize() { - await this.database.initialize() - } - - async upsertFile(filePath: string, content: string | Buffer, recordRevision: boolean): Promise - async upsertFile(filePath: string, content: string | Buffer): Promise - async upsertFile(filePath: string, content: string | Buffer, recordRevision: boolean = true): Promise { - try { - let sql - - if (this.database.knex.isLite) { - sql = ` - INSERT OR REPLACE INTO :tableName: (:keyCol:, :valueCol:, deleted, :modifiedOnCol:) - VALUES (:key, :value, false, :now) - ` - } else { - sql = ` - INSERT INTO :tableName: (:keyCol:, :valueCol:, deleted, :modifiedOnCol:) - VALUES (:key, :value, false, :now) - ON CONFLICT (:keyCol:) DO UPDATE - SET :valueCol: = :value, :modifiedOnCol: = :now, deleted = false - ` - } - - await this.database.knex.raw(sql, { - modifiedOnCol: 'modified_on', - tableName: 'srv_ghost_files', - keyCol: 'file_path', - key: filePath, - valueCol: 'content', - value: this.database.knex.binary.set(content), - now: this.database.knex.date.now() - }) - - if (recordRevision) { - await this.database.knex('srv_ghost_index').insert({ - file_path: filePath, - revision: nanoid(8), - created_by: 'admin', - created_on: this.database.knex.date.now() - }) - } - } catch (e) { - throw new VError(e, `[DB Driver] Error upserting file "${filePath}"`) - } - } - - async fileExists(filePath: string): Promise { - try { - const exists = await this.database - .knex('srv_ghost_files') - .where({ file_path: filePath, deleted: false }) - .select('file_path') - .limit(1) - .first() - - return !!exists - } catch (e) { - throw new VError(e, `[DB Driver] Error checking if file exists "${filePath}"`) - } - } - - async fileSize(filePath: string): Promise { - try { - const size = await this.database - .knex('srv_ghost_files') - .where({ file_path: filePath, deleted: false }) - .select(this.database.knex.raw('length(content) as len')) - .limit(1) - .first() - .then((entry) => entry.len) - - return size - } catch (e) { - throw new VError(e, `[DB Driver] Error checking file size for "${filePath}"`) - } - } - - async readFile(filePath: string): Promise { - try { - const file = await this.database - .knex('srv_ghost_files') - .where({ - file_path: filePath, - deleted: false - }) - .select('content') - .limit(1) - .first() - .then() - - if (!file) { - throw new VError(`[DB Storage] File "${filePath}" not found`) - } - - return Buffer.from((file).content as Buffer) - } catch (e) { - throw new VError(e, `[DB Storage] Error reading file "${filePath}"`) - } - } - - // @WrapErrorsWith(args => `[DB Storage] Error while moving file from "${args[0]}" to "${args[1]}".`) - async moveFile(fromPath: string, toPath: string) { - await this.database.knex('srv_ghost_files').update({ file_path: toPath }).where({ file_path: fromPath }) - } - - async deleteFile(filePath: string, recordRevision: boolean): Promise - async deleteFile(filePath: string): Promise - async deleteFile(filePath: string, recordRevision: boolean = true): Promise { - try { - if (recordRevision) { - await this.database.knex('srv_ghost_files').where({ file_path: filePath }).update({ deleted: true }) - - await this.database.knex('srv_ghost_index').insert({ - file_path: filePath, - revision: nanoid(8), - created_by: 'admin', - created_on: this.database.knex.date.now() - }) - } else { - await this.database.knex('srv_ghost_files').where({ file_path: filePath }).del() - } - } catch (e) { - throw new VError(e, `[DB Storage] Error deleting file "${filePath}"`) - } - } - - async deleteDir(dirPath: string): Promise { - try { - await this.database.knex('srv_ghost_files').where('file_path', 'like', `${dirPath}%`).update({ deleted: true }) - } catch (e) { - throw new VError(e, `[DB Storage] Error deleting folder "${dirPath}"`) - } - } - - async directoryListing( - folder: string, - options: DirectoryListingOptions = { - excludes: [] - } - ): Promise { - try { - let query = this.database.knex('srv_ghost_files').select('file_path').where({ - deleted: false - }) - - if (folder.length) { - query = query.andWhere('file_path', 'like', `${folder}%`) - } - - if (options.sortOrder) { - const { column, desc } = options.sortOrder - query = query.orderBy(column === 'modifiedOn' ? 'modified_on' : 'file_path', desc ? 'desc' : 'asc') - } - - const queryResult = await query.then>() - - const paths = await Bluebird.map(queryResult, (x: any) => forceForwardSlashes(path.relative(folder, x.file_path))) - - if (!options.excludes || !options.excludes.length) { - return paths - } - - const ignoredGlobs = Array.isArray(options.excludes) ? options.excludes : [options.excludes] - return filterByGlobs(paths, (path) => path, ignoredGlobs) - } catch (e) { - throw new VError(e, `[DB Storage] Error listing directory content for folder "${folder}"`) - } - } - - async listRevisions(pathPrefix: string): Promise { - try { - let query = this.database.knex('srv_ghost_index') - - if (pathPrefix.length) { - pathPrefix = pathPrefix.replace(/^.\//g, '') // Remove heading './' if present - query = query.where('file_path', 'like', `${pathPrefix}%`) - } - - return await query.then((entries) => - entries.map( - (x) => - { - path: x.file_path, - revision: x.revision, - created_on: new Date(x.created_on), - created_by: x.created_by - } - ) - ) - } catch (e) { - throw new VError(e, `[DB Storage] Error getting revisions in "${pathPrefix}"`) - } - } - - async deleteRevision(filePath: string, revision: string): Promise { - try { - await this.database - .knex('srv_ghost_index') - .where({ - file_path: filePath, - revision - }) - .del() - } catch (e) { - throw new VError(e, `[DB Storage] Error deleting revision "${revision}" for file "${filePath}"`) - } - } -} diff --git a/packages/nlu-server/src/infrastructure/ghost/disk-driver.ts b/packages/nlu-server/src/infrastructure/ghost/disk-driver.ts deleted file mode 100644 index 0070ed13..00000000 --- a/packages/nlu-server/src/infrastructure/ghost/disk-driver.ts +++ /dev/null @@ -1,165 +0,0 @@ -import Bluebird from 'bluebird' -import fse from 'fs-extra' -import glob from 'glob' -import _ from 'lodash' -import path from 'path' -import { VError } from 'verror' - -import { DirectoryListingOptions, FileRevision, StorageDriver } from '.' -import { forceForwardSlashes } from './misc' - -interface Options { - basePath: string -} - -export class DiskStorageDriver implements StorageDriver { - constructor(private _options: Options) {} - - resolvePath = (p: string) => path.resolve(this._options.basePath, p) - - async upsertFile(filePath: string, content: string | Buffer): Promise - async upsertFile(filePath: string, content: string | Buffer, recordRevision: boolean = false): Promise { - try { - const folder = path.dirname(this.resolvePath(filePath)) - await fse.mkdirp(folder) - await fse.writeFile(this.resolvePath(filePath), content) - } catch (e) { - throw new VError(e, `[Disk Storage] Error upserting file "${filePath}"`) - } - } - - async createDir(dirname: string): Promise { - return fse.ensureDir(dirname) - } - - async readFile(filePath: string): Promise { - try { - return fse.readFile(this.resolvePath(filePath)) - } catch (e) { - if (e.code === 'ENOENT') { - throw new VError(`[Disk Storage] File "${filePath}" not found`) - } - - throw new VError(e, `[Disk Storage] Error reading file "${filePath}"`) - } - } - - async fileExists(filePath: string): Promise { - try { - return fse.pathExists(this.resolvePath(filePath)) - } catch (e) { - throw new VError(e, `[Disk Storage] Error deleting file "${filePath}"`) - } - } - - async fileSize(filePath: string): Promise { - try { - return (await fse.stat(filePath)).size - } catch (e) { - throw new VError(e, `[Disk Storage] Error calculating the size for file "${filePath}"`) - } - } - - async deleteFile(filePath: string): Promise - async deleteFile(filePath: string, recordRevision: boolean = false): Promise { - try { - return fse.unlink(this.resolvePath(filePath)) - } catch (e) { - throw new VError(e, `[Disk Storage] Error deleting file "${filePath}"`) - } - } - - async moveFile(fromPath: string, toPath: string): Promise { - return fse.move(this.resolvePath(fromPath), this.resolvePath(toPath)) - } - - async deleteDir(dirPath: string): Promise { - try { - return fse.remove(this.resolvePath(dirPath)) - } catch (e) { - throw new VError(e, `[Disk Storage] Error deleting directory "${dirPath}"`) - } - } - - async directoryListing( - folder: string, - options: DirectoryListingOptions = { - excludes: [], - includeDotFiles: false - } - ): Promise { - try { - await fse.access(this.resolvePath(folder), fse.constants.R_OK) - } catch (e) { - // if directory doesn't exist we don't care - if (e.code === 'ENOENT') { - return [] - } - - throw new VError(e, `[Disk Storage] No read access to directory "${folder}"`) - } - - const ghostIgnorePatterns = await this._getGhostIgnorePatterns(this.resolvePath('data/.ghostignore')) - const globOptions: glob.IOptions = { - cwd: this.resolvePath(folder), - dot: options.includeDotFiles - } - - // options.excludes can either be a string or an array of strings or undefined - if (Array.isArray(options.excludes)) { - globOptions['ignore'] = [...options.excludes, ...ghostIgnorePatterns] - } else if (options.excludes) { - globOptions['ignore'] = [options.excludes, ...ghostIgnorePatterns] - } else { - globOptions['ignore'] = ghostIgnorePatterns - } - - try { - const files = await Bluebird.fromCallback((cb) => glob('**/*.*', globOptions, cb)) - if (!options.sortOrder) { - return files.map((filePath) => forceForwardSlashes(filePath)) - } - - const { column, desc } = options.sortOrder - - const filesWithDate = await Bluebird.map(files, async (filePath) => ({ - filePath, - modifiedOn: (await fse.stat(path.join(this.resolvePath(folder), filePath))).mtime - })) - - return _.orderBy(filesWithDate, [column], [desc ? 'desc' : 'asc']).map((x) => forceForwardSlashes(x.filePath)) - } catch (e) { - return [] - } - } - - async deleteRevision(filePath: string, revision: string): Promise { - throw new Error('Method not implemented.') - } - - async listRevisions(pathPrefix: string): Promise { - try { - const content = await this.readFile(path.join(pathPrefix, 'revisions.json')) - return JSON.parse(content.toString()) - } catch (err) { - return [] - } - } - - async absoluteDirectoryListing(destination: string) { - try { - const files = await Bluebird.fromCallback((cb) => glob('**/*.*', { cwd: destination }, cb)) - return files.map((filePath) => forceForwardSlashes(filePath)) - } catch (e) { - return [] - } - } - - private async _getGhostIgnorePatterns(ghostIgnorePath: string): Promise { - if (await fse.pathExists(ghostIgnorePath)) { - const ghostIgnoreFile = await fse.readFile(ghostIgnorePath) - return ghostIgnoreFile.toString().split(/\r?\n/gi) - } - return [] - } -} diff --git a/packages/nlu-server/src/infrastructure/ghost/ghost.ts b/packages/nlu-server/src/infrastructure/ghost/ghost.ts deleted file mode 100644 index ce7ead84..00000000 --- a/packages/nlu-server/src/infrastructure/ghost/ghost.ts +++ /dev/null @@ -1,698 +0,0 @@ -import { Logger } from '@botpress/logger' -import Bluebird from 'bluebird' -import bytes from 'bytes' -import { diffLines } from 'diff' -import { EventEmitter2 } from 'eventemitter2' -import fse from 'fs-extra' -import jsonlintMod from 'jsonlint-mod' -import _ from 'lodash' -import minimatch from 'minimatch' -import mkdirp from 'mkdirp' -import path from 'path' -import replace from 'replace-in-file' -import tmp from 'tmp' -import { VError } from 'verror' - -import { FileRevision, PendingRevisions, ReplaceContent, ServerWidePendingRevisions, StorageDriver } from '.' -import { DBStorageDriver } from './db-driver' -import { DiskStorageDriver } from './disk-driver' -import { ObjectCache } from './memory-cache' -import { createArchive, filterByGlobs, forceForwardSlashes, sanitize } from './misc' -import { DirectoryListingOptions, ListenHandle, UpsertOptions } from './typings' - -export interface BpfsScopedChange { - // An undefined bot ID = global - botId: string | undefined - // The list of local files which will overwrite their remote counterpart - localFiles: string[] - // List of added/deleted files based on local and remote files, and differences between files from revisions - changes: FileChange[] -} - -export interface FileChange { - path: string - action: FileChangeAction - add?: number - del?: number - sizeDiff?: number -} - -export type FileChangeAction = 'add' | 'edit' | 'del' - -interface ScopedGhostOptions { - botId?: string - // Archive upload requires the full path, including drive letter, so it should not be sanitized - noSanitize?: boolean -} - -const MAX_GHOST_FILE_SIZE = '1Gb' -const bpfsIgnoredFiles = ['models/**', 'data/bots/*/models/**', '**/*.js.map'] -const GLOBAL_GHOST_KEY = '__global__' -const BOTS_GHOST_KEY = '__bots__' -const DIFFABLE_EXTS = ['.js', '.json', '.txt', '.csv', '.yaml'] - -export class GhostService { - private _scopedGhosts: Map = new Map() - public useDbDriver: boolean = false - - constructor( - private diskDriver: DiskStorageDriver, - private dbDriver: DBStorageDriver, - private cache: ObjectCache, - private logger: Logger - ) { - this.cache.events.on && this.cache.events.on('syncDbFilesToDisk', this._onSyncReceived) - } - - async initialize(useDbDriver: boolean, ignoreSync?: boolean) { - this.useDbDriver = useDbDriver - this._scopedGhosts.clear() - - if (useDbDriver) { - await this.dbDriver.initialize() - } - - const global = await this.global().directoryListing('/') - - if (useDbDriver && !ignoreSync && _.isEmpty(global)) { - this.logger.info('Syncing data/global/ to database') - await this.global().sync() - - this.logger.info('Syncing data/bots/ to database') - await this.bots().sync() - } - } - - // Not caching this scope since it's rarely used - root(useDbDriver?: boolean): ScopedGhostService { - return new ScopedGhostService('./data', this.diskDriver, this.dbDriver, useDbDriver ?? this.useDbDriver, this.cache) - } - - global(): ScopedGhostService { - if (this._scopedGhosts.has(GLOBAL_GHOST_KEY)) { - return this._scopedGhosts.get(GLOBAL_GHOST_KEY)! - } - - const scopedGhost = new ScopedGhostService( - './data/global', - this.diskDriver, - this.dbDriver, - this.useDbDriver, - this.cache - ) - - this._scopedGhosts.set(GLOBAL_GHOST_KEY, scopedGhost) - return scopedGhost - } - - custom(baseDir: string) { - return new ScopedGhostService(baseDir, this.diskDriver, this.dbDriver, false, this.cache, { noSanitize: true }) - } - - // TODO: refactor this - async forceUpdate(tmpFolder: string) { - const invalidateFile = async (fileName: string) => { - await this.cache.invalidate(`object::${fileName}`) - await this.cache.invalidate(`buffer::${fileName}`) - } - - const dbRevs = await this.dbDriver.listRevisions('data/') - await Bluebird.each(dbRevs, (rev) => this.dbDriver.deleteRevision(rev.path, rev.revision)) - - const allChanges = await this.listFileChanges(tmpFolder) - for (const { changes, localFiles } of allChanges) { - await Bluebird.map( - changes.filter((x) => x.action === 'del'), - async (file) => { - await this.dbDriver.deleteFile(file.path) - await invalidateFile(file.path) - } - ) - - // Upload all local files for that scope - if (localFiles.length) { - await Bluebird.map(localFiles, async (filePath) => { - const content = await this.diskDriver.readFile(path.join(tmpFolder, filePath)) - await this.dbDriver.upsertFile(filePath, content, false) - await invalidateFile(filePath) - }) - } - } - - return allChanges.filter((x) => x.localFiles.length && x.botId).map((x) => x.botId) - } - - // TODO: refactor this - async listFileChanges(tmpFolder: string): Promise { - const tmpDiskGlobal = this.custom(path.resolve(tmpFolder, 'data/global')) - const tmpDiskBot = (botId?: string) => this.custom(path.resolve(tmpFolder, 'data/bots', botId || '')) - - // We need local and remote bot ids to correctly display changes - const remoteBotIds = (await this.bots().directoryListing('/', 'bot.config.json')).map(path.dirname) - const localBotIds = (await tmpDiskBot().directoryListing('/', 'bot.config.json')).map(path.dirname) - const botsIds = _.uniq([...remoteBotIds, ...localBotIds]) - - const uniqueFile = (file) => `${file.path} | ${file.revision}` - - const getFileDiff = async (file: string): Promise => { - try { - const localFile = (await this.diskDriver.readFile(path.join(tmpFolder, file))).toString() - const dbFile = (await this.dbDriver.readFile(file)).toString() - - const diff = diffLines(dbFile, localFile) - - return { - path: file, - action: 'edit' as FileChangeAction, - add: _.sumBy( - diff.filter((d) => d.added), - 'count' - ), - del: _.sumBy( - diff.filter((d) => d.removed), - 'count' - ) - } - } catch (err) { - // Todo better handling - this.logger.attachError(err).error(`Error while checking diff for "${file}"`) - return { path: file, action: 'edit' as FileChangeAction } - } - } - - const fileSizeDiff = async (file: string): Promise => { - try { - const localFileSize = await this.diskDriver.fileSize(path.join(tmpFolder, file)) - const dbFileSize = await this.dbDriver.fileSize(file) - - return { - path: file, - action: 'edit' as FileChangeAction, - sizeDiff: Math.abs(dbFileSize - localFileSize) - } - } catch (err) { - this.logger.attachError(err).error(`Error while checking file size for "${file}"`) - return { path: file, action: 'edit' as FileChangeAction } - } - } - - // Adds the correct prefix to files so they are displayed correctly when reviewing changes - const getDirectoryFullPaths = async (botId: string | undefined, ghost: ScopedGhostService) => { - const getPath = (file: string) => (botId ? path.join('data/bots', botId, file) : path.join('data/global', file)) - const files = await ghost.directoryListing('/', '*.*', [...bpfsIgnoredFiles, '**/revisions.json']) - return files.map((f) => forceForwardSlashes(getPath(f))) - } - - const filterRevisions = (revisions: FileRevision[]) => filterByGlobs(revisions, (r) => r.path, bpfsIgnoredFiles) - - const getFileChanges = async ( - botId: string | undefined, - localGhost: ScopedGhostService, - remoteGhost: ScopedGhostService - ) => { - const localRevs = filterRevisions(await localGhost.listDiskRevisions()) - const remoteRevs = filterRevisions(await remoteGhost.listDbRevisions()) - const syncedRevs = _.intersectionBy(localRevs, remoteRevs, uniqueFile) - const unsyncedFiles = _.uniq(_.differenceBy(remoteRevs, syncedRevs, uniqueFile).map((x) => x.path)) - - const localFiles: string[] = await getDirectoryFullPaths(botId, localGhost) - const remoteFiles: string[] = await getDirectoryFullPaths(botId, remoteGhost) - - const deleted = _.difference(remoteFiles, localFiles).map((x) => ({ path: x, action: 'del' as FileChangeAction })) - const added = _.difference(localFiles, remoteFiles).map((x) => ({ path: x, action: 'add' as FileChangeAction })) - - const filterDeleted = (file) => !_.map([...deleted, ...added], 'path').includes(file) - const filterDiffable = (file) => DIFFABLE_EXTS.includes(path.extname(file)) - - const editedFiles = unsyncedFiles.filter(filterDeleted) - const checkFileDiff = editedFiles.filter(filterDiffable) - const checkFileSize = unsyncedFiles.filter((x) => !checkFileDiff.includes(x)) - - const edited = [ - ...(await Bluebird.map(checkFileDiff, getFileDiff)).filter((x) => x.add !== 0 || x.del !== 0), - ...(await Bluebird.map(checkFileSize, fileSizeDiff)).filter((x) => x.sizeDiff !== 0) - ] - - return { - botId, - changes: [...added, ...deleted, ...edited], - localFiles - } - } - - const botsFileChanges = await Bluebird.map(botsIds, (botId) => - getFileChanges(botId, tmpDiskBot(botId), this.forBot(botId)) - ) - - return [...botsFileChanges, await getFileChanges(undefined, tmpDiskGlobal, this.global())] - } - - bots(): ScopedGhostService { - if (this._scopedGhosts.has(BOTS_GHOST_KEY)) { - return this._scopedGhosts.get(BOTS_GHOST_KEY)! - } - - const scopedGhost = new ScopedGhostService( - './data/bots', - this.diskDriver, - this.dbDriver, - this.useDbDriver, - this.cache - ) - - this._scopedGhosts.set(BOTS_GHOST_KEY, scopedGhost) - return scopedGhost - } - - forBot(botId: string): ScopedGhostService { - if (this._scopedGhosts.has(botId)) { - return this._scopedGhosts.get(botId)! - } - - const scopedGhost = new ScopedGhostService( - `./data/bots/${botId}`, - this.diskDriver, - this.dbDriver, - this.useDbDriver, - this.cache, - { botId } - ) - - const listenForUnmount = (args) => { - if (args && args.botId === botId) { - scopedGhost.events.removeAllListeners() - } - } - listenForUnmount({}) - - this._scopedGhosts.set(botId, scopedGhost) - return scopedGhost - } - - public async exportArchive(): Promise { - const tmpDir = tmp.dirSync({ unsafeCleanup: true }) - - const getFullPath = (folder) => path.join(tmpDir.name, folder) - - try { - const botIds = (await this.bots().directoryListing('/', 'bot.config.json')).map(path.dirname) - const botFiles = await Bluebird.mapSeries(botIds, async (botId) => - (await this.forBot(botId).exportToDirectory(getFullPath(`bots/${botId}`), bpfsIgnoredFiles)).map((f) => - path.join(`bots/${botId}`, f) - ) - ) - - const allFiles = [ - ..._.flatten(botFiles), - ...(await this.global().exportToDirectory(getFullPath('global'), bpfsIgnoredFiles)).map((f) => - path.join('global', f) - ) - ] - - const archive = await createArchive(getFullPath('archive.tgz'), tmpDir.name, allFiles) - return await fse.readFile(archive) - } finally { - tmpDir.removeCallback() - } - } - - public async getPending(botIds: string[]): Promise { - if (!this.useDbDriver) { - return {} - } - - const global = await this.global().getPendingChanges() - const bots = await Bluebird.mapSeries(botIds, async (botId) => this.forBot(botId).getPendingChanges()) - return { - global, - bots - } - } - - private _onSyncReceived = async (message: string) => { - try { - const { rootFolder, botId } = JSON.parse(message) - if (botId) { - await this.forBot(botId).syncDatabaseFilesToDisk(rootFolder) - } else { - await this.global().syncDatabaseFilesToDisk(rootFolder) - } - } catch (err) { - this.logger.attachError(err).error('Could not sync files locally.') - } - } -} - -export interface FileContent { - name: string - content: string | Buffer -} - -export class ScopedGhostService { - isDirectoryGlob: boolean - primaryDriver: StorageDriver - events: EventEmitter2 = new EventEmitter2() - - constructor( - private baseDir: string, - private diskDriver: DiskStorageDriver, - private dbDriver: DBStorageDriver, - private useDbDriver: boolean, - private cache: ObjectCache, - private options: ScopedGhostOptions = { - botId: undefined, - noSanitize: true - } - ) { - if (![-1, this.baseDir.length - 1].includes(this.baseDir.indexOf('*'))) { - throw new Error("Base directory can only contain '*' at the end of the path") - } - - this.isDirectoryGlob = this.baseDir.endsWith('*') - this.primaryDriver = useDbDriver ? dbDriver : diskDriver - } - - private _normalizeFolderName(rootFolder: string) { - const folder = forceForwardSlashes(path.join(this.baseDir, rootFolder)) - return this.options.noSanitize ? folder : sanitize(folder, 'folder') - } - - private _normalizeFileName(rootFolder: string, file: string) { - const fullPath = path.join(rootFolder, file) - const folder = this._normalizeFolderName(path.dirname(fullPath)) - return forceForwardSlashes(path.join(folder, sanitize(path.basename(fullPath)))) - } - - objectCacheKey = (str) => `object::${str}` - bufferCacheKey = (str) => `buffer::${str}` - - private async _invalidateFile(fileName: string) { - await this.cache.invalidate(this.objectCacheKey(fileName)) - await this.cache.invalidate(this.bufferCacheKey(fileName)) - } - - async invalidateFile(rootFolder: string, fileName: string): Promise { - const filePath = this._normalizeFileName(rootFolder, fileName) - await this._invalidateFile(filePath) - } - - async ensureDirs(rootFolder: string, directories: string[]): Promise { - if (!this.useDbDriver) { - await Bluebird.mapSeries(directories, (d) => this.diskDriver.createDir(this._normalizeFileName(rootFolder, d))) - } - } - - // temporary until we implement a large file storage system - // size is increased because NLU models are getting bigger - private getFileSizeLimit(fileName: string): number { - const humanSize = fileName.endsWith('.model') ? '500mb' : MAX_GHOST_FILE_SIZE - return bytes(humanSize) - } - - async upsertFile( - rootFolder: string, - file: string, - content: string | Buffer, - options: UpsertOptions = { - recordRevision: true, - syncDbToDisk: false, - ignoreLock: false - } - ): Promise { - if (this.isDirectoryGlob) { - throw new Error("Ghost can't read or write under this scope") - } - - const fileName = this._normalizeFileName(rootFolder, file) - if (content.length > this.getFileSizeLimit(fileName)) { - throw new Error(`The size of the file ${fileName} is over the 100mb limit`) - } - - await this.primaryDriver.upsertFile(fileName, content, !!options.recordRevision) - this.events.emit('changed', fileName) - await this._invalidateFile(fileName) - - if (options.syncDbToDisk) { - await this.cache.sync(JSON.stringify({ rootFolder, botId: this.options.botId })) - } - } - - async upsertFiles(rootFolder: string, content: FileContent[], options?: UpsertOptions): Promise { - await Promise.all(content.map((c) => this.upsertFile(rootFolder, c.name, c.content))) - } - - /** - * Sync the local filesystem to the database. - * All files are tracked by default, unless `.ghostignore` is used to exclude them. - */ - async sync() { - if (!this.useDbDriver) { - // We don't have to sync anything as we're just using the files from disk - return - } - - const localFiles = await this.diskDriver.directoryListing(this.baseDir, { includeDotFiles: true }) - const diskRevs = await this.diskDriver.listRevisions(this.baseDir) - const dbRevs = await this.dbDriver.listRevisions(this.baseDir) - const syncedRevs = _.intersectionBy(diskRevs, dbRevs, (x) => `${x.path} | ${x.revision}`) - - await Bluebird.each(syncedRevs, (rev) => this.dbDriver.deleteRevision(rev.path, rev.revision)) - await this._updateProduction(localFiles) - } - - private async _updateProduction(localFiles: string[]) { - // Delete the prod files that has been deleted from disk - const prodFiles = await this.dbDriver.directoryListing(this._normalizeFolderName('./')) - const filesToDelete = _.difference(prodFiles, localFiles) - await Bluebird.map(filesToDelete, (filePath) => - this.dbDriver.deleteFile(this._normalizeFileName('./', filePath), false) - ) - - // Overwrite all of the prod files with the local files - await Bluebird.each(localFiles, async (file) => { - const filePath = this._normalizeFileName('./', file) - const content = await this.diskDriver.readFile(filePath) - await this.dbDriver.upsertFile(filePath, content, false) - }) - } - - public async exportToDirectory(directory: string, excludes?: string | string[]): Promise { - const allFiles = await this.directoryListing('./', '*.*', excludes, true) - - for (const file of allFiles.filter((x) => x !== 'revisions.json')) { - const content = await this.primaryDriver.readFile(this._normalizeFileName('./', file)) - const outPath = path.join(directory, file) - mkdirp.sync(path.dirname(outPath)) - await fse.writeFile(outPath, content) - } - - const dbRevs = await this.dbDriver.listRevisions(this.baseDir) - - await fse.writeFile(path.join(directory, 'revisions.json'), JSON.stringify(dbRevs, undefined, 2)) - if (!allFiles.includes('revisions.json')) { - allFiles.push('revisions.json') - } - - return allFiles - } - - public async importFromDirectory(directory: string) { - const filenames = await this.diskDriver.absoluteDirectoryListing(directory) - - const files = filenames.map((file) => { - return { - name: file, - content: fse.readFileSync(path.join(directory, file)) - } as FileContent - }) - - await this.upsertFiles('/', files, { ignoreLock: true }) - } - - public async exportToArchiveBuffer(excludes?: string | string[], replaceContent?: ReplaceContent): Promise { - const tmpDir = tmp.dirSync({ unsafeCleanup: true }) - - try { - const outFiles = await this.exportToDirectory(tmpDir.name, excludes) - if (replaceContent) { - await replace({ files: `${tmpDir.name}/**/*.json`, from: replaceContent.from, to: replaceContent.to }) - } - - const filename = path.join(tmpDir.name, 'archive.tgz') - - const archive = await createArchive(filename, tmpDir.name, outFiles) - return await fse.readFile(archive) - } finally { - tmpDir.removeCallback() - } - } - - public async isFullySynced(): Promise { - if (!this.useDbDriver) { - return true - } - - const revisions = await this.dbDriver.listRevisions(this.baseDir) - return revisions.length === 0 - } - - async readFileAsBuffer(rootFolder: string, file: string): Promise { - if (this.isDirectoryGlob) { - throw new Error("Ghost can't read or write under this scope") - } - - const fileName = this._normalizeFileName(rootFolder, file) - const cacheKey = this.bufferCacheKey(fileName) - - if (!(await this.cache.has(cacheKey))) { - const value = await this.primaryDriver.readFile(fileName) - await this.cache.set(cacheKey, value) - return value - } - - return this.cache.get(cacheKey) - } - - async readFileAsString(rootFolder: string, file: string): Promise { - return (await this.readFileAsBuffer(rootFolder, file)).toString() - } - - async readFileAsObject(rootFolder: string, file: string): Promise { - const fileName = this._normalizeFileName(rootFolder, file) - const cacheKey = this.objectCacheKey(fileName) - - if (!(await this.cache.has(cacheKey))) { - const value = await this.readFileAsString(rootFolder, file) - let obj - try { - obj = JSON.parse(value) - } catch (e) { - try { - jsonlintMod.parse(value) - } catch (e) { - throw new Error(`SyntaxError in your JSON: ${file}: \n ${e}`) - } - } - await this.cache.set(cacheKey, obj) - return obj - } - - return this.cache.get(cacheKey) - } - - async fileExists(rootFolder: string, file: string): Promise { - const fileName = this._normalizeFileName(rootFolder, file) - const cacheKey = this.objectCacheKey(fileName) - - try { - if (await this.cache.has(cacheKey)) { - return true - } - - return this.primaryDriver.fileExists(fileName) - } catch (err) { - return false - } - } - - async deleteFile(rootFolder: string, file: string): Promise { - if (this.isDirectoryGlob) { - throw new Error("Ghost can't read or write under this scope") - } - - const fileName = this._normalizeFileName(rootFolder, file) - await this.primaryDriver.deleteFile(fileName, true) - this.events.emit('changed', fileName) - await this._invalidateFile(fileName) - } - - async renameFile(rootFolder: string, fromName: string, toName: string): Promise { - const fromPath = this._normalizeFileName(rootFolder, fromName) - const toPath = this._normalizeFileName(rootFolder, toName) - - await this.primaryDriver.moveFile(fromPath, toPath) - } - - async syncDatabaseFilesToDisk(rootFolder: string): Promise { - if (!this.useDbDriver) { - return - } - - const remoteFiles = await this.dbDriver.directoryListing(this._normalizeFolderName(rootFolder)) - const filePath = (filename) => this._normalizeFileName(rootFolder, filename) - - await Bluebird.mapSeries(remoteFiles, async (file) => - this.diskDriver.upsertFile(filePath(file), await this.dbDriver.readFile(filePath(file))) - ) - } - - async deleteFolder(folder: string): Promise { - if (this.isDirectoryGlob) { - throw new Error("Ghost can't read or write under this scope") - } - - const folderName = this._normalizeFolderName(folder) - await this.primaryDriver.deleteDir(folderName) - } - - async directoryListing( - rootFolder: string, - fileEndingPattern: string = '*.*', - excludes?: string | string[], - includeDotFiles?: boolean, - options: DirectoryListingOptions = {} - ): Promise { - try { - const files = await this.primaryDriver.directoryListing(this._normalizeFolderName(rootFolder), { - excludes, - includeDotFiles, - ...options - }) - - return (files || []).filter( - minimatch.filter(fileEndingPattern, { matchBase: true, nocase: true, noglobstar: false, dot: includeDotFiles }) - ) - } catch (err) { - if (err && err.message && err.message.includes('ENOENT')) { - return [] - } - throw new VError(err, `Could not list directory under ${rootFolder}`) - } - } - - async getPendingChanges(): Promise { - if (!this.useDbDriver) { - return {} - } - - const revisions = await this.dbDriver.listRevisions(this.baseDir) - const result: PendingRevisions = {} - - for (const revision of revisions) { - const rPath = path.relative(this.baseDir, revision.path) - const folder = rPath.includes(path.sep) ? rPath.substr(0, rPath.indexOf(path.sep)) : 'root' - - if (!result[folder]) { - result[folder] = [] - } - - result[folder].push(revision) - } - - return result - } - - async listDbRevisions(): Promise { - return this.dbDriver.listRevisions(this.baseDir) - } - - async listDiskRevisions(): Promise { - return this.diskDriver.listRevisions(this.baseDir) - } - - onFileChanged(callback: (filePath: string) => void): ListenHandle { - const cb = (file) => callback && callback(file) - this.events.on('changed', cb) - return { remove: () => this.events.off('changed', cb) } - } -} diff --git a/packages/nlu-server/src/infrastructure/ghost/index.ts b/packages/nlu-server/src/infrastructure/ghost/index.ts deleted file mode 100644 index f0d297c4..00000000 --- a/packages/nlu-server/src/infrastructure/ghost/index.ts +++ /dev/null @@ -1,54 +0,0 @@ -import _ from 'lodash' -import { ReplaceInFileConfig } from 'replace-in-file' - -export interface SortOrder { - /** The name of the column */ - column: string - /** Is the sort order ascending or descending? Asc by default */ - desc?: boolean -} - -export interface DirectoryListingOptions { - excludes?: string | string[] - includeDotFiles?: boolean - sortOrder?: SortOrder & { column: 'filePath' | 'modifiedOn' } -} - -export interface StorageDriver { - upsertFile(filePath: string, content: Buffer | string, recordRevision: boolean): Promise - readFile(filePath: string): Promise - fileExists(filePath: string): Promise - deleteFile(filePath: string, recordRevision: boolean): Promise - deleteDir(dirPath: string): Promise - directoryListing(folder: string, options: DirectoryListingOptions): Promise - listRevisions(pathPrefix: string): Promise - deleteRevision(filePath: string, revision: string): Promise - fileSize(filePath: string): Promise - moveFile(fromPath: string, toPath: string): Promise -} - -export interface FileRevision { - path: string - revision: string - created_by: string - created_on: Date -} - -export interface PendingRevisions { - [rootFolder: string]: Array -} - -export interface ServerWidePendingRevisions { - global: PendingRevisions - bots: PendingRevisions[] -} - -export type ReplaceContent = Pick - -// export * from './cache-invalidators' - -export { Database } from './database/db' -export { DBStorageDriver } from './db-driver' -export { DiskStorageDriver } from './disk-driver' -export { GhostService, ScopedGhostService } from './ghost' -export { MemoryObjectCache } from './memory-cache' diff --git a/packages/nlu-server/src/infrastructure/ghost/memory-cache.ts b/packages/nlu-server/src/infrastructure/ghost/memory-cache.ts deleted file mode 100644 index 41182d7d..00000000 --- a/packages/nlu-server/src/infrastructure/ghost/memory-cache.ts +++ /dev/null @@ -1,99 +0,0 @@ -import bytes from 'bytes' -import chokidar from 'chokidar' -import { EventEmitter } from 'events' -import LRU from 'lru-cache' -import path from 'path' -import { getProjectLocation } from '../../project-location' -import { forceForwardSlashes } from './misc' - -export interface ObjectCache { - readonly events: EventEmitter - get(key: string): Promise - set(key: string, obj: T): Promise - has(key: string): Promise - invalidate(key: string): Promise - invalidateStartingWith(prefix: string): Promise - sync(message: string): Promise -} - -class FileChangedInvalidator { - constructor(private watcher: chokidar.FSWatcher) {} - - cache?: ObjectCache - - install(objectCache: ObjectCache) { - this.cache = objectCache - - this.watcher.on('add', this.handle) - this.watcher.on('change', this.handle) - this.watcher.on('unlink', this.handle) - // watcher.on('error', err => this.logger.attachError(err).error('Watcher error')) - } - - handle = async (file: string) => { - if (!this.cache) { - return - } - - const projectLocation = getProjectLocation() - const relativePath = forceForwardSlashes(path.relative(projectLocation, path.dirname(file))) - this.cache.events.emit('invalidation', relativePath) - await this.cache.invalidateStartingWith(relativePath) - } -} - -export class MemoryObjectCache implements ObjectCache { - private cache: LRU - private cacheInvalidator: FileChangedInvalidator - - public readonly events: EventEmitter = new EventEmitter() - - constructor(watcher: chokidar.FSWatcher) { - this.cacheInvalidator = new FileChangedInvalidator(watcher) - this.cache = new LRU({ - max: bytes(process.env.BP_MAX_MEMORY_CACHE_SIZE || '1gb'), - length: (obj) => { - if (Buffer.isBuffer(obj)) { - return obj.length - } else if (typeof obj === 'string') { - return obj.length * 2 // chars are 2 bytes in ECMAScript - } - - return 1024 // Assuming 1kb per object, this is kind of random - } - }) - - this.cacheInvalidator.install(this) - } - - async get(key: string): Promise { - return this.cache.get(key) - } - - async set(key: string, obj: T): Promise { - this.cache.set(key, obj) - this.events.emit('invalidation', key) - } - - async has(key: string): Promise { - return this.cache.has(key) - } - - async invalidate(key: string): Promise { - this.cache.del(key) - this.events.emit('invalidation', key) - } - - async invalidateStartingWith(prefix: string): Promise { - const keys = this.cache.keys().filter((x) => { - return x.startsWith('buffer::' + prefix) || x.startsWith('string::' + prefix) || x.startsWith('object::' + prefix) - }) - - keys.forEach((x) => this.cache.del(x)) - this.events.emit('invalidation', prefix) - } - - async sync(message: string): Promise { - this.events.emit('syncDbFilesToDisk', message) - } -} diff --git a/packages/nlu-server/src/infrastructure/ghost/misc.ts b/packages/nlu-server/src/infrastructure/ghost/misc.ts deleted file mode 100644 index e916f276..00000000 --- a/packages/nlu-server/src/infrastructure/ghost/misc.ts +++ /dev/null @@ -1,121 +0,0 @@ -import Bluebird from 'bluebird' -import fse from 'fs-extra' -import glob from 'glob' -import globrex from 'globrex' -import _ from 'lodash' -import mkdirp from 'mkdirp' -import path from 'path' -import stream from 'stream' -import tar from 'tar' -import tmp from 'tmp' -import unzipper from 'unzipper' -import { VError } from 'verror' - -export const forceForwardSlashes = (path) => path.replace(/\\/g, '/') - -export function filterByGlobs(array: T[], iteratee: (value: T) => string, globs: string[]): T[] { - const rules: { regex: RegExp }[] = globs.map((g) => globrex(g, { globstar: true })) - - return array.filter((x) => _.every(rules, (rule) => !rule.regex.test(iteratee(x)))) -} - -// -// No idea of this is necessary -// - -// Source: https://github.com/kevva/is-zip -const isZip = (buf) => { - if (!buf || buf.length < 4) { - return false - } - - return ( - buf[0] === 0x50 && - buf[1] === 0x4b && - (buf[2] === 0x03 || buf[2] === 0x05 || buf[2] === 0x07) && - (buf[3] === 0x04 || buf[3] === 0x06 || buf[3] === 0x08) - ) -} - -export const extractArchive = async (archive: Buffer, destination: string): Promise => { - try { - if (!(await fse.pathExists(destination))) { - await mkdirp(destination) - } - - const buffStream = new stream.PassThrough() - buffStream.end(archive) - - let writeStream - if (isZip(archive)) { - writeStream = unzipper.Extract({ path: destination }) - } else { - writeStream = tar.extract({ strict: true, cwd: destination }) - } - - buffStream.pipe(writeStream) - - await new Promise((resolve, reject) => { - writeStream.on('close', resolve) // emitted by unzipper - writeStream.on('end', resolve) // emitted by tar - writeStream.on('error', reject) - }) - - const files = await Bluebird.fromCallback((cb) => glob('**/*.*', { cwd: destination }, cb)) - return files.map((filePath) => forceForwardSlashes(filePath)) - } catch (err) { - throw new VError(err, `[Archive] Error extracting archive to "${destination}"`) - } -} - -export const createArchive = async (fileName: string, folder: string, files: string[]): Promise => { - try { - await tar.create( - { - cwd: folder, - file: fileName, - portable: true, - gzip: true - }, - files - ) - return fileName - } catch (err) { - throw new VError(err, `[Archive] Error creating archive "${fileName}"`) - } -} - -export const createArchiveFromFolder = async (folder: string, ignoredFiles: string[]): Promise => { - const tmpDir = tmp.dirSync({ unsafeCleanup: true }) - - try { - const files: string[] = await Bluebird.fromCallback((cb) => - glob('**/*', { cwd: folder, ignore: ignoredFiles, nodir: true, dot: true }, cb) - ) - - for (const file of files) { - await mkdirp(path.dirname(path.join(tmpDir.name, file))) - await fse.copyFile(path.resolve(folder, file), path.resolve(tmpDir.name, file)) - } - - const filename = path.join(tmpDir.name, 'archive.tgz') - const archive = await createArchive(filename, tmpDir.name, files) - return await fse.readFile(archive) - } finally { - tmpDir.removeCallback() - } -} - -const regex = { - illegalFile: /[\/\?<>\\:\*\|"]/g, - illegalFolder: /[\?<>\\:\*\|"]/g, - control: /[\x00-\x1f\x80-\x9f]/g, - reserved: /^\.+$/ -} - -export const sanitize = (input: string, type?: 'file' | 'folder') => { - return input - .replace(regex.control, '') - .replace(regex.reserved, '') - .replace(type === 'folder' ? regex.illegalFolder : regex.illegalFile, '') -} diff --git a/packages/nlu-server/src/infrastructure/ghost/typings.ts b/packages/nlu-server/src/infrastructure/ghost/typings.ts deleted file mode 100644 index e6ff4d67..00000000 --- a/packages/nlu-server/src/infrastructure/ghost/typings.ts +++ /dev/null @@ -1,64 +0,0 @@ -export interface DirectoryListingOptions { - excludes?: string | string[] - includeDotFiles?: boolean - sortOrder?: SortOrder & { column: 'filePath' | 'modifiedOn' } -} - -export interface SortOrder { - /** The name of the column */ - column: string - /** Is the sort order ascending or descending? Asc by default */ - desc?: boolean -} - -export interface UpsertOptions { - /** Whether or not to record a revision @default true */ - recordRevision?: boolean - /** When enabled, files changed on the database are synced locally so they can be used locally (eg: require in actions) @default false */ - syncDbToDisk?: boolean - /** This is only applicable for bot-scoped ghost. When true, the lock status of the bot is ignored. @default false */ - ignoreLock?: boolean -} - -export interface ListenHandle { - /** Stops listening from the event */ - remove(): void -} - -export interface ScopedGhostService { - /** - * Insert or Update the file at the specified location - * @param rootFolder - Folder relative to the scoped parent - * @param file - The name of the file - * @param content - The content of the file - */ - upsertFile(rootFolder: string, file: string, content: string | Buffer, options?: UpsertOptions): Promise - readFileAsBuffer(rootFolder: string, file: string): Promise - readFileAsString(rootFolder: string, file: string): Promise - readFileAsObject(rootFolder: string, file: string): Promise - renameFile(rootFolder: string, fromName: string, toName: string): Promise - deleteFile(rootFolder: string, file: string): Promise - /** - * List all the files matching the ending pattern in the folder. - * DEPRECATE WARNING: exclude and includedDotFiles must be defined in options in future versions - * @example bp.ghost.forBot('welcome-bot').directoryListing('./questions', '*.json') - * @param rootFolder - Folder relative to the scoped parent - * @param fileEndingPattern - The pattern to match. Don't forget to include wildcards! - * @param @deprecated exclude - The pattern to match excluded files. - * @param @deprecated includeDotFiles - Whether or not to include files starting with a dot (normally disabled files) - */ - directoryListing( - rootFolder: string, - fileEndingPattern: string, - exclude?: string | string[], - includeDotFiles?: boolean, - options?: DirectoryListingOptions - ): Promise - /** - * Starts listening on all file changes (deletion, inserts and updates) - * `callback` will be called for every change - * To stop listening, call the `remove()` method of the returned ListenHandle - */ - onFileChanged(callback: (filePath: string) => void): ListenHandle - fileExists(rootFolder: string, file: string): Promise -} diff --git a/packages/nlu-server/src/infrastructure/index.ts b/packages/nlu-server/src/infrastructure/index.ts new file mode 100644 index 00000000..b208a66c --- /dev/null +++ b/packages/nlu-server/src/infrastructure/index.ts @@ -0,0 +1,3 @@ +export * from './model-repo' +export * from './training-repo' +export * from './linting-repo' diff --git a/packages/nlu-server/src/infrastructure/linting-repo/db-linting-repo.ts b/packages/nlu-server/src/infrastructure/linting-repo/db-linting-repo.ts new file mode 100644 index 00000000..3e07d837 --- /dev/null +++ b/packages/nlu-server/src/infrastructure/linting-repo/db-linting-repo.ts @@ -0,0 +1,277 @@ +import { + LintingErrorType, + LintingStatus, + DatasetIssue, + IssueCode, + IssueData, + IssueDefinition, + LintingError, + IssueComputationSpeed +} from '@botpress/nlu-client' +import * as NLUEngine from '@botpress/nlu-engine' +import { Logger } from '@bpinternal/log4bot' +import Bluebird from 'bluebird' +import { Knex } from 'knex' +import _ from 'lodash' +import moment from 'moment' +import ms from 'ms' +import { createTableIfNotExists } from '../database-utils' +import { packTrainSet, unpackTrainSet } from '../dataset-serializer' +import { LintingRepository } from '.' +import { Linting, LintingId, LintingState } from './typings' + +type IssuesRow = { + id: string + appId: string + modelId: string + code: string + message: string + data: object +} + +type LintingRowId = { + appId: string + modelId: string + speed: IssueComputationSpeed +} + +type LintingRow = LintingRowId & { + status: LintingStatus + currentCount: number + totalCount: number + cluster: string + dataset: string + error_type?: LintingErrorType + error_message?: string + error_stack?: string + startedOn: string + updatedOn: string +} + +const ISSUES_TABLE_NAME = 'nlu_dataset_issues' +const LINTINGS_TABLE_NAME = 'nlu_lintings' + +const JANITOR_MS_INTERVAL = ms('1m') // 60,000 ms +const MS_BEFORE_PRUNE = ms('1h') + +export class DatabaseLintingRepo implements LintingRepository { + private _logger: Logger + private _janitorIntervalId: NodeJS.Timeout | undefined + + private get _issues() { + return this._database.table(ISSUES_TABLE_NAME) + } + + private get _lintings() { + return this._database.table(LINTINGS_TABLE_NAME) + } + + constructor(protected _database: Knex, logger: Logger, private _engine: NLUEngine.Engine) { + this._logger = logger.sub('linting-repo') + } + + public async initialize() { + await createTableIfNotExists(this._database, LINTINGS_TABLE_NAME, (table: Knex.CreateTableBuilder) => { + table.string('appId').notNullable() + table.string('modelId').notNullable() + table.string('speed').notNullable() + table.string('status').notNullable() + table.string('currentCount').notNullable() + table.string('totalCount').notNullable() + table.string('cluster').nullable() + table.text('dataset').notNullable() + table.string('error_type').nullable() + table.text('error_message').nullable() + table.text('error_stack').nullable() + table.timestamp('startedOn').notNullable() + table.timestamp('updatedOn').notNullable() + table.primary(['appId', 'modelId', 'speed']) + }) + + await createTableIfNotExists(this._database, ISSUES_TABLE_NAME, (table: Knex.CreateTableBuilder) => { + table.string('id').primary() + table.string('appId').notNullable() + table.string('modelId').notNullable() + table.string('speed').notNullable() + table.string('code').notNullable() + table.text('message').notNullable() + table.json('data').notNullable() + + table + .foreign(['appId', 'modelId', 'speed']) + .references(['appId', 'modelId', 'speed']) + .inTable(LINTINGS_TABLE_NAME) + .onDelete('CASCADE') + }) + + this._janitorIntervalId = setInterval(this._janitor.bind(this), JANITOR_MS_INTERVAL) + } + + public async teardown() { + this._logger.debug('Linting repo teardown...') + this._janitorIntervalId && clearInterval(this._janitorIntervalId) + } + + public async has(id: LintingId): Promise { + const { appId, modelId, speed } = id + const stringId = NLUEngine.modelIdService.toString(modelId) + const lintingId: LintingRowId = { appId, modelId: stringId, speed } + const linting = await this._lintings.select('*').where(lintingId).first() + return !!linting + } + + public async get(id: LintingId): Promise { + const { appId, modelId, speed } = id + const stringId = NLUEngine.modelIdService.toString(modelId) + const lintingId: LintingRowId = { appId, modelId: stringId, speed } + const lintingRow = await this._lintings.select('*').where(lintingId).first() + if (!lintingRow) { + return + } + return this._fromLintingRow(lintingRow) + } + + public async set(linting: Linting): Promise { + const { modelId, appId, speed, currentCount, cluster, dataset, issues, totalCount, status, error } = linting + const { type: error_type, message: error_message, stack: error_stack } = error ?? {} + const stringId = NLUEngine.modelIdService.toString(modelId) + + const lintingTaskRow: LintingRow = { + appId, + modelId: stringId, + speed, + currentCount, + totalCount, + cluster, + dataset: packTrainSet(dataset), + status, + error_type, + error_stack, + error_message, + startedOn: new Date().toISOString(), + updatedOn: new Date().toISOString() + } + + await this._lintings + .insert(lintingTaskRow) + .onConflict(['appId', 'modelId', 'speed']) + .merge([ + 'status', + 'currentCount', + 'totalCount', + 'cluster', + 'dataset', + 'error_type', + 'error_message', + 'error_stack', + 'updatedOn' + ]) + + if (!issues.length) { + return + } + + const issueRows: IssuesRow[] = issues + .map(this._issueToRow.bind(this)) + .map((r) => ({ speed, appId, modelId: stringId, ...r })) + await this._issues.insert(issueRows).onConflict('id').merge() + } + + public async query(query: Partial): Promise { + const { status, currentCount, totalCount } = query + const rowFilters: Partial = _.pickBy({ status, currentCount, totalCount }, (x) => x !== undefined) + const rows: LintingRow[] = await this._lintings.where(rowFilters).select('*') + return Bluebird.map(rows, this._fromLintingRow.bind(this)) + } + + public async queryOlderThan(query: Partial, treshold: Date): Promise { + const iso = treshold.toISOString() + const { status, currentCount, totalCount } = query + const rowFilters: Partial = _.pickBy({ status, currentCount, totalCount }, (x) => x !== undefined) + const rows: LintingRow[] = await this._lintings.where(rowFilters).where('updatedOn', '<=', iso).select('*') + return Bluebird.map(rows, this._fromLintingRow.bind(this)) + } + + private _fromLintingRow = async (row: LintingRow): Promise => { + const { + appId, + modelId, + speed, + status, + currentCount, + cluster, + dataset, + totalCount, + error_message, + error_stack, + error_type + } = row + + const issueRows = await this._issues.select('*').where({ appId, modelId }) + const issues = issueRows.map(this._rowToIssue.bind(this)) + + const error = this._toError(error_type, error_message, error_stack) + const state: Linting = { + appId, + modelId: NLUEngine.modelIdService.fromString(modelId), + speed, + status, + currentCount, + totalCount, + cluster, + dataset: unpackTrainSet(dataset), + error, + issues + } + return state + } + + private async _janitor() { + const now = moment() + const before = now.subtract({ milliseconds: MS_BEFORE_PRUNE }) + const nDeletions = await this._deleteOlderThan(before.toDate()) + if (nDeletions) { + this._logger.debug(`Pruning ${nDeletions} linting state from database`) + } + return + } + + private _deleteOlderThan = async (threshold: Date): Promise => { + const iso = threshold.toISOString() + return this._lintings.where('updatedOn', '<=', iso).delete() + } + + private _toError = ( + error_type: LintingErrorType | undefined, + error_message: string | undefined, + error_stack: string | undefined + ): LintingError | undefined => { + if (!error_type) { + return + } + return { message: error_message!, stack: error_stack!, type: error_type! } + } + + private _rowToIssue = (row: IssuesRow & { id: string }): DatasetIssue => { + const { code: rawCode, data, message, id } = row + + const code = rawCode as IssueCode + const definition: IssueDefinition | undefined = this._engine.getIssueDetails(code) + + if (!definition) { + throw new Error(`Code "${rawCode}" found in table "${ISSUES_TABLE_NAME}" does not seem to exist.`) + } + + return >{ + ...definition, + id, + data: data as IssueData, + message + } + } + + private _issueToRow = (issue: DatasetIssue): Omit => { + const { code, message, data, id } = issue + return { id, code, message, data: data as object } + } +} diff --git a/packages/nlu-server/src/infrastructure/linting-repo/in-mem-linting-repo.ts b/packages/nlu-server/src/infrastructure/linting-repo/in-mem-linting-repo.ts new file mode 100644 index 00000000..dc3ccdc8 --- /dev/null +++ b/packages/nlu-server/src/infrastructure/linting-repo/in-mem-linting-repo.ts @@ -0,0 +1,119 @@ +import { IssueComputationSpeed, LintingState } from '@botpress/nlu-client' +import * as NLUEngine from '@botpress/nlu-engine' +import { Logger } from '@bpinternal/log4bot' +import _ from 'lodash' +import moment from 'moment' +import ms from 'ms' +import { LintingRepository } from '.' +import { Linting, LintingId } from './typings' + +type LintEntry = Linting & { updatedOn: Date } + +const KEY_JOIN_CHAR = '\u2581' +const JANITOR_MS_INTERVAL = ms('1m') // 60,000 ms +const MS_BEFORE_PRUNE = ms('1h') + +export class InMemoryLintingRepo implements LintingRepository { + private _logger: Logger + private _janitorIntervalId: NodeJS.Timeout | undefined + private _lintingTable: { [id: string]: LintEntry } = {} + + constructor(logger: Logger) { + this._logger = logger.sub('linting-repo') + } + + public async initialize() { + this._logger.debug('Linting repo initializing...') + this._janitorIntervalId = setInterval(this._janitor.bind(this), JANITOR_MS_INTERVAL) + } + + public async teardown() { + this._logger.debug('Linting repo teardown...') + this._janitorIntervalId && clearInterval(this._janitorIntervalId) + } + + public async has(id: LintingId): Promise { + const { appId, modelId, speed } = id + const taskId = this._makeLintingKey({ appId, modelId, speed }) + return !!this._lintingTable[taskId] + } + + public async get(id: LintingId): Promise { + const { appId, modelId, speed } = id + const taskId = this._makeLintingKey({ appId, modelId, speed }) + const linting = this._lintingTable[taskId] + if (!linting) { + return + } + return linting + } + + public async set(linting: Linting): Promise { + const { appId, modelId, speed } = linting + const current = await this.get({ appId, modelId, speed }) + const currentIssues = current?.issues ?? [] + const updatedIssues = _.uniqBy([...currentIssues, ...linting.issues], (i) => i.id) + return this._set(appId, modelId, speed, { ...linting, issues: updatedIssues }) + } + + public async query(query: Partial): Promise { + const allLintings = this._getAllLintings() + return this._filter(allLintings, query) + } + + public queryOlderThan = async (query: Partial, threshold: Date): Promise => { + const allLintings = this._getAllLintings() + const olderThan = allLintings.filter((t) => moment(t.updatedOn).isBefore(threshold)) + return this._filter(olderThan, query) + } + + private _filter = (trainings: Linting[], filters: Partial) => { + let queryResult: Linting[] = trainings + for (const field in filters) { + queryResult = queryResult.filter((t) => t[field] === filters[field]) + } + return queryResult + } + + private async _janitor() { + const threshold = moment().subtract(MS_BEFORE_PRUNE, 'ms').toDate() + + const trainingsToPrune = await this.queryOlderThan({}, threshold) + if (trainingsToPrune.length) { + this._logger.debug(`Pruning ${trainingsToPrune.length} linting state from memory`) + } + + for (const t of trainingsToPrune) { + this._delete(t) + } + } + + private _delete = (id: LintingId) => { + const key = this._makeLintingKey(id) + delete this._lintingTable[key] + } + + private _getAllLintings = (): (Linting & { updatedOn: Date })[] => { + return _(this._lintingTable) + .toPairs() + .map(([key, value]) => ({ ...this._parseLintingKey(key), ...value })) + .value() + } + + private async _set(appId: string, modelId: NLUEngine.ModelId, speed: IssueComputationSpeed, linting: Linting) { + const taskId = this._makeLintingKey({ appId, modelId, speed }) + this._lintingTable[taskId] = { ...linting, updatedOn: new Date() } + } + + private _makeLintingKey = (id: LintingId): string => { + const { appId, modelId, speed } = id + const stringId = NLUEngine.modelIdService.toString(modelId) + return [stringId, appId, speed].join(KEY_JOIN_CHAR) + } + + private _parseLintingKey(key: string): LintingId { + const [stringId, appId, speed] = key.split(KEY_JOIN_CHAR) + const modelId = NLUEngine.modelIdService.fromString(stringId) + return { modelId, appId, speed: speed as IssueComputationSpeed } + } +} diff --git a/packages/nlu-server/src/infrastructure/linting-repo/index.ts b/packages/nlu-server/src/infrastructure/linting-repo/index.ts new file mode 100644 index 00000000..bd3c8460 --- /dev/null +++ b/packages/nlu-server/src/infrastructure/linting-repo/index.ts @@ -0,0 +1,3 @@ +export { InMemoryLintingRepo } from './in-mem-linting-repo' +export { DatabaseLintingRepo } from './db-linting-repo' +export { LintingRepository, Linting, LintingId, LintingState } from './typings' diff --git a/packages/nlu-server/src/infrastructure/linting-repo/typings.ts b/packages/nlu-server/src/infrastructure/linting-repo/typings.ts new file mode 100644 index 00000000..2289a9c5 --- /dev/null +++ b/packages/nlu-server/src/infrastructure/linting-repo/typings.ts @@ -0,0 +1,27 @@ +import { IssueComputationSpeed, LintingState as ClientLintingState, TrainInput } from '@botpress/nlu-client' +import { ModelId } from '@botpress/nlu-engine' + +export type LintingRepository = { + initialize(): Promise + teardown(): Promise + get(id: LintingId): Promise + set(linting: Linting): Promise + has(id: LintingId): Promise + query(query: Partial): Promise + queryOlderThan(query: Partial, treshold: Date): Promise +} + +export type LintingId = { + modelId: ModelId + appId: string + speed: IssueComputationSpeed +} + +export type LintingState = ClientLintingState & { + cluster: string +} + +export type Linting = LintingId & + LintingState & { + dataset: TrainInput + } diff --git a/packages/nlu-server/src/infrastructure/make-ghost.ts b/packages/nlu-server/src/infrastructure/make-ghost.ts deleted file mode 100644 index dada660f..00000000 --- a/packages/nlu-server/src/infrastructure/make-ghost.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { Logger } from '@botpress/logger' -import chokidar from 'chokidar' -import { Database, DBStorageDriver, DiskStorageDriver, GhostService, MemoryObjectCache } from './ghost' - -export const makeGhost = (logger: Logger, modelDir: string, watcher: chokidar.FSWatcher, dbURL?: string) => { - const _db = new Database(logger, dbURL) - const diskDriver = new DiskStorageDriver({ basePath: modelDir }) - const dbdriver = new DBStorageDriver(_db) - const cache = new MemoryObjectCache(watcher) - const ghost = new GhostService(diskDriver, dbdriver, cache, logger) - return ghost -} diff --git a/packages/nlu-server/src/infrastructure/model-repo.ts b/packages/nlu-server/src/infrastructure/model-repo.ts deleted file mode 100644 index ce0df59a..00000000 --- a/packages/nlu-server/src/infrastructure/model-repo.ts +++ /dev/null @@ -1,153 +0,0 @@ -import { Logger } from '@botpress/logger' -import * as NLUEngine from '@botpress/nlu-engine' -import Bluebird from 'bluebird' -import fse, { WriteStream } from 'fs-extra' -import _ from 'lodash' -import path from 'path' -import { Stream } from 'stream' -import tar from 'tar' -import tmp from 'tmp' -import { GhostService, ScopedGhostService } from './ghost' - -interface PruneOptions { - keep: number -} - -const MODELS_DIR = './models' -const MODELS_EXT = 'model' - -const { modelIdService } = NLUEngine - -export class ModelRepository { - private _logger: Logger - - constructor(private _ghost: GhostService, logger: Logger) { - this._logger = logger.sub('model-repo') - } - - async initialize() { - this._logger.debug('Model repo initializing...') - } - - async teardown() { - this._logger.debug('Model repo teardown...') - } - - public async hasModel(appId: string, modelId: NLUEngine.ModelId): Promise { - return !!(await this.getModel(appId, modelId)) - } - - /** - * - * @param modelId The desired model id - * @returns the corresponding model - */ - public async getModel(appId: string, modelId: NLUEngine.ModelId): Promise { - const scopedGhost = this._getScopedGhostForAppID(appId) - - const stringId = modelIdService.toString(modelId) - const fname = `${stringId}.${MODELS_EXT}` - - if (!(await scopedGhost.fileExists(MODELS_DIR, fname))) { - return - } - const buffStream = new Stream.PassThrough() - buffStream.end(await scopedGhost.readFileAsBuffer(MODELS_DIR, fname)) - const tmpDir = tmp.dirSync({ unsafeCleanup: true }) - - const tarStream = tar.x({ cwd: tmpDir.name, strict: true }, ['model']) as WriteStream - buffStream.pipe(tarStream) - await new Promise((resolve) => tarStream.on('close', resolve)) - - const modelBuff = await fse.readFile(path.join(tmpDir.name, 'model')) - let mod - try { - mod = JSON.parse(modelBuff.toString()) - } catch (err) { - await scopedGhost.deleteFile(MODELS_DIR, fname) - } finally { - tmpDir.removeCallback() - return mod - } - } - - public async saveModel(appId: string, model: NLUEngine.Model): Promise { - const serialized = JSON.stringify(model) - - const stringId = modelIdService.toString(model.id) - const fname = `${stringId}.${MODELS_EXT}` - - const scopedGhost = this._getScopedGhostForAppID(appId) - - // TODO replace that logic with in-memory streams - const tmpDir = tmp.dirSync({ unsafeCleanup: true }) - const tmpFileName = path.join(tmpDir.name, 'model') - await fse.writeFile(tmpFileName, serialized) - const archiveName = path.join(tmpDir.name, fname) - await tar.create( - { - file: archiveName, - cwd: tmpDir.name, - portable: true, - gzip: true - }, - ['model'] - ) - const buffer = await fse.readFile(archiveName) - await scopedGhost.upsertFile(MODELS_DIR, fname, buffer) - tmpDir.removeCallback() - } - - public async listModels(appId: string, filters: Partial = {}): Promise { - const scopedGhost = this._getScopedGhostForAppID(appId) - const files = await scopedGhost.directoryListing(MODELS_DIR, `*.${MODELS_EXT}`, undefined, undefined, { - sortOrder: { - column: 'modifiedOn', - desc: true - } - }) - - const modelIds = files - .map((f) => f.substring(0, f.lastIndexOf(`.${MODELS_EXT}`))) - .filter((stringId) => modelIdService.isId(stringId)) - .map((stringId) => modelIdService.fromString(stringId)) - - return _.filter(modelIds, filters) - } - - public async pruneModels( - appId: string, - options: PruneOptions, - filters: Partial = {} - ): Promise { - const models = await this.listModels(appId, filters) - - const { keep } = options - const toPrune = models.slice(keep) - await Bluebird.each(toPrune, (m) => this.deleteModel(appId, m)) - - return toPrune - } - - public async exists(appId: string, modelId: NLUEngine.ModelId): Promise { - const scopedGhost = this._getScopedGhostForAppID(appId) - - const stringId = modelIdService.toString(modelId) - const fname = `${stringId}.${MODELS_EXT}` - - return scopedGhost.fileExists(MODELS_DIR, fname) - } - - public async deleteModel(appId: string, modelId: NLUEngine.ModelId): Promise { - const scopedGhost = this._getScopedGhostForAppID(appId) - - const stringId = modelIdService.toString(modelId) - const fname = `${stringId}.${MODELS_EXT}` - - return scopedGhost.deleteFile(MODELS_DIR, fname) - } - - private _getScopedGhostForAppID(appId: string): ScopedGhostService { - return appId ? this._ghost.forBot(appId) : this._ghost.root() - } -} diff --git a/packages/nlu-server/src/infrastructure/model-repo/db-model-repo.ts b/packages/nlu-server/src/infrastructure/model-repo/db-model-repo.ts new file mode 100644 index 00000000..af289e01 --- /dev/null +++ b/packages/nlu-server/src/infrastructure/model-repo/db-model-repo.ts @@ -0,0 +1,121 @@ +import * as NLUEngine from '@botpress/nlu-engine' +import { Logger } from '@bpinternal/log4bot' +import Bluebird from 'bluebird' +import { Knex } from 'knex' +import _ from 'lodash' +import { createTableIfNotExists } from '../database-utils' +import { ModelRepository, PruneOptions } from './typings' + +const TABLE_NAME = 'nlu_models' +const { modelIdService } = NLUEngine + +type TableKey = { + appId: string + modelId: string +} + +type TableRow = { + content: Buffer + updatedOn: string +} & TableKey + +type Column = keyof TableRow + +type Result> = { + [c in C[number]]: TableRow[c] +} + +export class DbModelRepository implements ModelRepository { + private _logger: Logger + + constructor(private _database: Knex, logger: Logger) { + this._logger = logger.sub('model-repo') + } + + private get table() { + return this._database.table(TABLE_NAME) + } + + public async initialize() { + this._logger.debug('Model repo initializing...') + await createTableIfNotExists(this._database, TABLE_NAME, (table: Knex.CreateTableBuilder) => { + table.string('appId').notNullable() + table.string('modelId').notNullable() + table.binary('content').notNullable() + table.timestamp('updatedOn').notNullable() + table.primary(['appId', 'modelId']) + }) + } + + public async teardown() { + this._logger.debug('Model repo teardown...') + return this._database.destroy() + } + + public async getModel(appId: string, modelId: NLUEngine.ModelId): Promise { + const stringId = modelIdService.toString(modelId) + const filter: Partial = { appId, modelId: stringId } + const row = await this.table.select('*').where(filter).first() + if (!row) { + return + } + + const { content } = row + return Buffer.from(content) + } + + public async saveModel(appId: string, modelId: NLUEngine.ModelId, modelBuffer: Buffer): Promise { + const modelExists = await this.exists(appId, modelId) + const stringId = modelIdService.toString(modelId) + const iso = new Date().toISOString() + if (modelExists) { + const filter: TableKey = { appId, modelId: stringId } + const row: Partial = { content: modelBuffer, updatedOn: iso } + await this.table.update(row).where(filter) + return + } + const row: TableRow = { appId, modelId: stringId, content: modelBuffer, updatedOn: iso } + return this.table.insert(row) + } + + public async listModels(appId: string, filters: Partial = {}): Promise { + const rowfilters: Partial = { appId } + const columns = ['appId', 'modelId', 'updatedOn'] as const + const queryResult: Result[] = await this.table.select(...columns).where(rowfilters) + + return _(queryResult) + .orderBy(({ updatedOn }) => new Date(updatedOn).getTime(), 'asc') + .map(({ modelId }) => modelIdService.fromString(modelId)) + .filter(filters) + .value() + } + + public async pruneModels( + appId: string, + options: PruneOptions, + filters: Partial = {} + ): Promise { + const models = await this.listModels(appId, filters) + const { keep } = options + const toPrune = models.slice(keep) + await Bluebird.each(toPrune, (m) => this.deleteModel(appId, m)) + return toPrune + } + + public async exists(appId: string, modelId: NLUEngine.ModelId): Promise { + const stringId = modelIdService.toString(modelId) + const filter: TableKey = { appId, modelId: stringId } + const columns = ['appId', 'modelId'] as const + const row: Result | undefined = await this.table + .select(...columns) + .where(filter) + .first() + return !!row + } + + public async deleteModel(appId: string, modelId: NLUEngine.ModelId): Promise { + const stringId = modelIdService.toString(modelId) + const filter: TableKey = { appId, modelId: stringId } + await this.table.delete().where(filter) + } +} diff --git a/packages/nlu-server/src/infrastructure/model-repo/fs-model-repo.ts b/packages/nlu-server/src/infrastructure/model-repo/fs-model-repo.ts new file mode 100644 index 00000000..fcca401c --- /dev/null +++ b/packages/nlu-server/src/infrastructure/model-repo/fs-model-repo.ts @@ -0,0 +1,111 @@ +import * as NLUEngine from '@botpress/nlu-engine' +import { Logger } from '@bpinternal/log4bot' +import Bluebird from 'bluebird' +import fse from 'fs-extra' +import _ from 'lodash' +import path from 'path' +import { ModelRepository, PruneOptions } from './typings' + +const MODELS_DIR = 'models' +const MODELS_EXT = 'model' +const { modelIdService } = NLUEngine + +export class FileSystemModelRepository implements ModelRepository { + private _logger: Logger + + constructor(private _basePath: string, logger: Logger) { + this._logger = logger.sub('model-repo') + } + + public async initialize() { + this._logger.debug('Model repo initializing...') + const basePathExists = fse.existsSync(this._basePath) + if (!basePathExists) { + throw new Error(`Model directory \"${this._basePath}\" does not exist.`) + } + return this._syncDir(path.join(this._basePath, MODELS_DIR)) + } + + public async teardown() { + this._logger.debug('Model repo teardown...') + } + + public async getModel(appId: string, modelId: NLUEngine.ModelId): Promise { + const fileName = this._computeFilePath(appId, modelId) + if (!fse.existsSync(fileName)) { + return + } + return fse.readFile(fileName) + } + + public async saveModel(appId: string, modelId: NLUEngine.ModelId, modelBuffer: Buffer): Promise { + const filePath = this._computeFilePath(appId, modelId) + + await this._syncDir(this._computeDirPath(appId)) + return fse.writeFile(filePath, modelBuffer) + } + + public async listModels(appId: string, filters: Partial = {}): Promise { + const dirPath = this._computeDirPath(appId) + await this._syncDir(dirPath) + const allFiles = await fse.readdir(dirPath) + const allFileStats = await Bluebird.map(allFiles, async (f) => ({ + file: f, + stat: await fse.stat(path.join(dirPath, f)) + })) + + const modelfileEndingPattern = `.${MODELS_EXT}` + + return _(allFileStats) + .orderBy(({ stat }) => stat.mtime.getTime(), 'asc') + .filter(({ file }) => file.endsWith(modelfileEndingPattern)) + .map(({ file }) => file.substring(0, file.lastIndexOf(modelfileEndingPattern))) + .filter((stringId) => modelIdService.isId(stringId)) + .map((stringId) => modelIdService.fromString(stringId)) + .filter(filters) + .value() + } + + public async pruneModels( + appId: string, + options: PruneOptions, + filters: Partial = {} + ): Promise { + const models = await this.listModels(appId, filters) + const { keep } = options + const toPrune = models.slice(keep) + await Bluebird.each(toPrune, (m) => this.deleteModel(appId, m)) + return toPrune + } + + public async exists(appId: string, modelId: NLUEngine.ModelId): Promise { + const filePath = this._computeFilePath(appId, modelId) + return fse.existsSync(filePath) + } + + public async deleteModel(appId: string, modelId: NLUEngine.ModelId): Promise { + const filePath = this._computeFilePath(appId, modelId) + return fse.unlink(filePath) + } + + private _computeFilePath = (appId: string, modelId: NLUEngine.ModelId): string => { + const dirPath = this._computeDirPath(appId) + const stringId = modelIdService.toString(modelId) + const fname = `${stringId}.${MODELS_EXT}` + const rawPath = path.join(dirPath, fname) + return path.normalize(rawPath) + } + + private _computeDirPath = (appId: string): string => { + const appIdDir = encodeURIComponent(appId) + const rawPath = path.join(this._basePath, MODELS_DIR, appIdDir) + return path.normalize(rawPath) + } + + private _syncDir = async (dirPath: string): Promise => { + if (fse.existsSync(dirPath)) { + return + } + return fse.mkdir(dirPath) + } +} diff --git a/packages/nlu-server/src/infrastructure/model-repo/index.ts b/packages/nlu-server/src/infrastructure/model-repo/index.ts new file mode 100644 index 00000000..feb0e009 --- /dev/null +++ b/packages/nlu-server/src/infrastructure/model-repo/index.ts @@ -0,0 +1,3 @@ +export { DbModelRepository } from './db-model-repo' +export { FileSystemModelRepository } from './fs-model-repo' +export { ModelRepository } from './typings' diff --git a/packages/nlu-server/src/infrastructure/model-repo/typings.ts b/packages/nlu-server/src/infrastructure/model-repo/typings.ts new file mode 100644 index 00000000..dec16417 --- /dev/null +++ b/packages/nlu-server/src/infrastructure/model-repo/typings.ts @@ -0,0 +1,16 @@ +import { ModelId } from '@botpress/nlu-engine' + +export type PruneOptions = { + keep: number +} + +export type ModelRepository = { + initialize(): Promise + teardown(): Promise + getModel(appId: string, modelId: ModelId): Promise + saveModel(appId: string, modelId: ModelId, model: Buffer): Promise + listModels(appId: string, filters?: Partial): Promise + pruneModels(appId: string, options: PruneOptions, filters?: Partial): Promise + exists(appId: string, modelId: ModelId): Promise + deleteModel(appId: string, modelId: ModelId): Promise +} diff --git a/packages/nlu-server/src/infrastructure/training-repo/db-training-repo.ts b/packages/nlu-server/src/infrastructure/training-repo/db-training-repo.ts index b9039866..b3a228cf 100644 --- a/packages/nlu-server/src/infrastructure/training-repo/db-training-repo.ts +++ b/packages/nlu-server/src/infrastructure/training-repo/db-training-repo.ts @@ -1,38 +1,23 @@ -import { LockedTransactionQueue } from '@botpress/locks' -import { Logger } from '@botpress/logger' -import { TrainingError, TrainingErrorType, TrainingStatus, TrainInput } from '@botpress/nlu-client' +import { TrainingError, TrainingErrorType, TrainingStatus } from '@botpress/nlu-client' import { modelIdService } from '@botpress/nlu-engine' -import jsonpack from 'jsonpack' -import Knex from 'knex' +import { Logger } from '@bpinternal/log4bot' +import { Knex } from 'knex' import _ from 'lodash' import moment from 'moment' import ms from 'ms' -import { - Training, - TrainingId, - TrainingState, - WrittableTrainingRepository, - TrainingTrx, - TrainingRepository -} from './typings' +import { createTableIfNotExists } from '../database-utils' +import { packTrainSet, unpackTrainSet } from '../dataset-serializer' +import { Training, TrainingId, TrainingState, TrainingRepository, TrainingListener } from './typings' const TABLE_NAME = 'nlu_trainings' -const TRANSACTION_TIMEOUT_MS = ms('5s') - -const timeout = (ms: number) => { - return new Promise((_, reject) => { - setTimeout(() => reject(new Error("Transaction exceeded it's time limit")), ms) - }) -} - const JANITOR_MS_INTERVAL = ms('1m') // 60,000 ms const MS_BEFORE_PRUNE = ms('1h') -interface TableId { +type TableId = { appId: string modelId: string } -interface TableRow extends TableId { +type TableRow = { status: TrainingStatus progress: number error_type?: TrainingErrorType @@ -41,13 +26,24 @@ interface TableRow extends TableId { cluster: string dataset: string updatedOn: string -} +} & TableId + +export class DbTrainingRepository implements TrainingRepository { + private _listeners: TrainingListener[] = [] + private _janitorIntervalId: NodeJS.Timeout | undefined + + constructor(private _database: Knex, private _logger: Logger) {} + + public addListener(listener: TrainingListener) { + this._listeners.push(listener) + } -class DbWrittableTrainingRepo implements WrittableTrainingRepository { - constructor(protected _database: Knex, private _clusterId: string) {} + public removeListener(listenerToRemove: TrainingListener) { + _.remove(this._listeners, (listener) => listener === listenerToRemove) + } public async initialize(): Promise { - await this._createTableIfNotExists(this._database, TABLE_NAME, (table) => { + await createTableIfNotExists(this._database, TABLE_NAME, (table: Knex.CreateTableBuilder) => { table.string('appId').notNullable() table.string('modelId').notNullable() table.string('status').notNullable() @@ -60,24 +56,20 @@ class DbWrittableTrainingRepo implements WrittableTrainingRepository { table.timestamp('updatedOn').notNullable() table.primary(['appId', 'modelId']) }) - } - private _createTableIfNotExists = async (knex: Knex, tableName: string, cb: Knex.KnexCallback): Promise => { - return knex.schema.hasTable(tableName).then((exists) => { - if (exists) { - return false - } - return knex.schema.createTable(tableName, cb).then(() => true) - }) + this._janitorIntervalId = setInterval(this._janitor.bind(this), JANITOR_MS_INTERVAL) } - public async teardown(): Promise {} + public async teardown(): Promise { + this._janitorIntervalId && clearInterval(this._janitorIntervalId) + } private get table() { - return this._database.table(TABLE_NAME) + return this._database.table(TABLE_NAME) } public set = async (training: Training): Promise => { + this._onTrainingEvent(training) const row = this._trainingToRow(training) const { appId, modelId } = row @@ -109,13 +101,8 @@ class DbWrittableTrainingRepo implements WrittableTrainingRepository { return this.table.where(tableId).delete() } - public deleteOlderThan = async (threshold: Date): Promise => { - const iso = moment(threshold).toDate().toISOString() - return this.table.where('updatedOn', '<=', iso).delete() - } - public queryOlderThan = async (query: Partial, threshold: Date): Promise => { - const iso = this._toISO(threshold) + const iso = threshold.toISOString() const rowFilters: Partial = this._partialTrainStateToQuery(query) const rows: TableRow[] = await this.table.where(rowFilters).where('updatedOn', '<=', iso).select('*') @@ -123,10 +110,25 @@ class DbWrittableTrainingRepo implements WrittableTrainingRepository { return rows.map(this._rowToTraining.bind(this)) } + private async _janitor() { + const now = moment() + const before = now.subtract({ milliseconds: MS_BEFORE_PRUNE }) + const nDeletions = await this._deleteOlderThan(before.toDate()) + if (nDeletions) { + this._logger.debug(`Pruning ${nDeletions} training state from database`) + } + return + } + + private _deleteOlderThan = async (threshold: Date): Promise => { + const iso = threshold.toISOString() + return this.table.where('updatedOn', '<=', iso).delete() + } + private _trainingToRow(train: Training): TableRow { const id = this._trainIdToRow(train) const state = this._trainStateToRow(train) - const dataset = this.packTrainSet(train.dataset) + const dataset = packTrainSet(train.dataset) return { ...id, ...state, @@ -144,7 +146,7 @@ class DbWrittableTrainingRepo implements WrittableTrainingRepository { private _partialTrainStateToQuery = (state: Partial): Partial> => { const { progress, status, error, cluster } = state - const { type: error_type, message: error_message, stackTrace: error_stack } = error || {} + const { type: error_type, message: error_message, stack: error_stack } = error || {} const rowFilters = { status, progress, @@ -157,23 +159,19 @@ class DbWrittableTrainingRepo implements WrittableTrainingRepository { } private _trainStateToRow = (state: TrainingState): Omit => { - const { progress, status, error } = state - const { type: error_type, message: error_message, stackTrace: error_stack } = error || {} + const { progress, status, error, cluster } = state + const { type: error_type, message: error_message, stack: error_stack } = error || {} return { status, progress, error_type, error_message, error_stack, - cluster: this._clusterId, - updatedOn: this._toISO(new Date()) + cluster, + updatedOn: new Date().toISOString() } } - private _toISO(date: Date): string { - return moment(date).toDate().toISOString() - } - private _rowToTraining(row: TableRow): Training { const { appId, modelId: stringId, status, progress, error_type, error_message, error_stack, cluster, dataset } = row @@ -184,7 +182,7 @@ class DbWrittableTrainingRepo implements WrittableTrainingRepository { ? { type: error_type!, message: error_message!, - stackTrace: error_stack! + stack: error_stack! } : undefined @@ -195,87 +193,16 @@ class DbWrittableTrainingRepo implements WrittableTrainingRepository { progress, error, cluster, - dataset: this.unpackTrainSet(dataset) - } - } - - private packTrainSet(ts: TrainInput): string { - return jsonpack.pack(ts) - } - - private unpackTrainSet(compressed: string): TrainInput { - return jsonpack.unpack(compressed) - } -} - -export class DbTrainingRepository implements TrainingRepository { - private _writtableTrainingRepo: DbWrittableTrainingRepo - private _janitorIntervalId: NodeJS.Timeout | undefined - private _logger: Logger - - constructor( - private _database: Knex, - private _trxQueue: LockedTransactionQueue, - logger: Logger, - private _clusterId: string - ) { - this._writtableTrainingRepo = new DbWrittableTrainingRepo(_database, this._clusterId) - this._janitorIntervalId = setInterval(this._janitor.bind(this), JANITOR_MS_INTERVAL) - this._logger = logger.sub('training-repo') - } - - public initialize = async (): Promise => { - await this._writtableTrainingRepo.initialize() - await this._trxQueue.initialize() - } - - public async teardown(): Promise { - this._janitorIntervalId && clearInterval(this._janitorIntervalId) - await this._trxQueue.teardown() - } - - private async _janitor() { - const now = moment() - const before = now.subtract({ milliseconds: MS_BEFORE_PRUNE }) - const nDeletions = await this._writtableTrainingRepo.deleteOlderThan(before.toDate()) - if (nDeletions) { - this._logger.debug(`Pruning ${nDeletions} training state from database`) + dataset: unpackTrainSet(dataset) } - return } - public inTransaction = async (action: TrainingTrx, name: string): Promise => { - const cb = async () => { - const operation = async () => { - const ctx = new DbWrittableTrainingRepo(this._database, this._clusterId) - return action(ctx) - } - return Promise.race([operation(), timeout(TRANSACTION_TIMEOUT_MS)]) - } - - return this._trxQueue.runInLock({ - name, - cb + private _onTrainingEvent(training: Training) { + this._listeners.forEach((listener) => { + // The await keyword isn't used to prevent a listener from blocking the training repo + listener(training).catch((e) => + this._logger.attachError(e).error('an error occured in the training repository listener') + ) }) } - - public get = async (trainId: TrainingId): Promise => { - return this._writtableTrainingRepo.get(trainId) - } - - public has = async (trainId: TrainingId): Promise => { - return this._writtableTrainingRepo.has(trainId) - } - - public query = async (query: Partial): Promise => { - return this._writtableTrainingRepo.query(query) - } - - public queryOlderThan = async (query: Partial, threshold: Date): Promise => { - return this._writtableTrainingRepo.queryOlderThan(query, threshold) - } - - public async delete(id: TrainingId): Promise { - return this._writtableTrainingRepo.delete(id) - } } diff --git a/packages/nlu-server/src/infrastructure/training-repo/in-memory-training-repo.ts b/packages/nlu-server/src/infrastructure/training-repo/in-memory-training-repo.ts index 0eb800b4..65e62a12 100644 --- a/packages/nlu-server/src/infrastructure/training-repo/in-memory-training-repo.ts +++ b/packages/nlu-server/src/infrastructure/training-repo/in-memory-training-repo.ts @@ -1,20 +1,12 @@ -import { makeInMemoryTrxQueue, LockedTransactionQueue } from '@botpress/locks' -import { Logger } from '@botpress/logger' import { TrainInput } from '@botpress/nlu-client' import * as NLUEngine from '@botpress/nlu-engine' +import { Logger } from '@bpinternal/log4bot' import Bluebird from 'bluebird' import _ from 'lodash' import moment from 'moment' import ms from 'ms' -import { - Training, - TrainingId, - TrainingState, - TrainingRepository, - TrainingTrx, - WrittableTrainingRepository -} from './typings' +import { Training, TrainingId, TrainingState, TrainingRepository, TrainingListener } from './typings' const KEY_JOIN_CHAR = '\u2581' @@ -25,17 +17,23 @@ type TrainEntry = TrainingState & { updatedOn: Date } & { dataset: TrainInput } -class WrittableTrainingRepo implements WrittableTrainingRepository { +export class InMemoryTrainingRepo implements TrainingRepository { + private _listeners: TrainingListener[] = [] private _trainSessions: { [key: string]: TrainEntry } = {} - private _logger: Logger - constructor(logger: Logger) { - this._logger = logger.sub('training-repo') + private _janitorIntervalId: NodeJS.Timeout | undefined + + constructor(private _logger: Logger) {} + + public addListener(listener: TrainingListener) { + this._listeners.push(listener) } - private _janitorIntervalId: NodeJS.Timeout | undefined + public removeListener(listenerToRemove: TrainingListener) { + _.remove(this._listeners, (listener) => listener === listenerToRemove) + } public async initialize(): Promise { this._janitorIntervalId = setInterval(this._janitor.bind(this), JANITOR_MS_INTERVAL) @@ -65,10 +63,16 @@ class WrittableTrainingRepo implements WrittableTrainingRepository { } public async set(training: Training): Promise { + this._onTrainingEvent(training) const key = this._makeTrainingKey(training) this._trainSessions[key] = { ...training, updatedOn: new Date() } } + public has = async (trainId: TrainingId): Promise => { + const result = !!(await this.get(trainId)) + return result + } + public async query(query: Partial): Promise { const allTrainings = this._getAllTrainings() return this._filter(allTrainings, query) @@ -97,7 +101,7 @@ class WrittableTrainingRepo implements WrittableTrainingRepository { .value() } - async delete(id: TrainingId): Promise { + public async delete(id: TrainingId): Promise { const key = this._makeTrainingKey(id) delete this._trainSessions[key] } @@ -113,46 +117,13 @@ class WrittableTrainingRepo implements WrittableTrainingRepository { const modelId = NLUEngine.modelIdService.fromString(stringId) return { modelId, appId } } -} - -export default class InMemoryTrainingRepo implements TrainingRepository { - private _trxQueue: LockedTransactionQueue - private _writtableRepo: WrittableTrainingRepo - - constructor(logger: Logger) { - const logCb = (msg: string) => logger.sub('trx-queue').debug(msg) - this._trxQueue = makeInMemoryTrxQueue(logCb) - this._writtableRepo = new WrittableTrainingRepo(logger) - } - - public async initialize(): Promise { - return this._writtableRepo.initialize() - } - - public async get(id: TrainingId): Promise { - return this._writtableRepo.get(id) - } - - public async query(query: Partial): Promise { - return this._writtableRepo.query(query) - } - - public async queryOlderThan(query: Partial, threshold: Date): Promise { - return this._writtableRepo.queryOlderThan(query, threshold) - } - - public async delete(id: TrainingId): Promise { - return this._writtableRepo.delete(id) - } - - public async teardown() { - return this._writtableRepo.teardown() - } - public async inTransaction(trx: TrainingTrx, name: string): Promise { - return this._trxQueue.runInLock({ - name, - cb: () => trx(this._writtableRepo) + private _onTrainingEvent(training: Training) { + this._listeners.forEach((listener) => { + // The await keyword isn't used to prevent a listener from blocking the training repo + listener(training).catch((e) => + this._logger.attachError(e).error('an error occured in the training repository listener') + ) }) } } diff --git a/packages/nlu-server/src/infrastructure/training-repo/index.ts b/packages/nlu-server/src/infrastructure/training-repo/index.ts new file mode 100644 index 00000000..f975aee6 --- /dev/null +++ b/packages/nlu-server/src/infrastructure/training-repo/index.ts @@ -0,0 +1,9 @@ +export { DbTrainingRepository } from './db-training-repo' +export { InMemoryTrainingRepo } from './in-memory-training-repo' +export { + TrainingRepository as TrainingRepository, + Training, + TrainingId, + TrainingState, + TrainingListener +} from './typings' diff --git a/packages/nlu-server/src/infrastructure/training-repo/typings.ts b/packages/nlu-server/src/infrastructure/training-repo/typings.ts index 01d6c55a..0907d1d7 100644 --- a/packages/nlu-server/src/infrastructure/training-repo/typings.ts +++ b/packages/nlu-server/src/infrastructure/training-repo/typings.ts @@ -1,34 +1,32 @@ -import { TrainingState as TrainingStateDto, TrainInput } from '@botpress/nlu-client' +import { TrainingState as ClientTrainingState, TrainInput } from '@botpress/nlu-client' import { ModelId } from '@botpress/nlu-engine' -export type TrainingTrx = (repo: WrittableTrainingRepository) => Promise +export type TrainingListener = (training: Training) => Promise -export interface ReadonlyTrainingRepository { +export type TrainingRepository = { + addListener: (listener: TrainingListener) => void + removeListener: (listener: TrainingListener) => void initialize: () => Promise teardown: () => Promise get: (id: TrainingId) => Promise + has: (id: TrainingId) => Promise query: (query: Partial) => Promise queryOlderThan: (query: Partial, threshold: Date) => Promise delete: (id: TrainingId) => Promise -} - -export interface WrittableTrainingRepository extends ReadonlyTrainingRepository { set: (training: Training) => Promise } -export interface TrainingRepository extends ReadonlyTrainingRepository { - inTransaction: (trx: TrainingTrx, name: string) => Promise // Promise resolves once transaction is over -} - -export interface TrainingId { +export type TrainingId = { modelId: ModelId appId: string } -export type TrainingState = TrainingStateDto & { +export type TrainingState = ClientTrainingState & { cluster: string + trainingTime?: number } -export interface Training extends TrainingId, TrainingState { - dataset: TrainInput -} +export type Training = TrainingId & + TrainingState & { + dataset: TrainInput + } diff --git a/packages/nlu-server/src/project-location.ts b/packages/nlu-server/src/project-location.ts deleted file mode 100644 index b966950b..00000000 --- a/packages/nlu-server/src/project-location.ts +++ /dev/null @@ -1,7 +0,0 @@ -import path from 'path' - -export const getProjectLocation = () => { - return process.pkg - ? path.dirname(process.execPath) // We point at the binary path - : __dirname // e.g. /dist/.. -} diff --git a/packages/nlu-server/src/telemetry/metric/index.ts b/packages/nlu-server/src/telemetry/metric/index.ts new file mode 100644 index 00000000..60c67de9 --- /dev/null +++ b/packages/nlu-server/src/telemetry/metric/index.ts @@ -0,0 +1 @@ +export * from './meter' diff --git a/packages/nlu-server/src/telemetry/metric/meter.ts b/packages/nlu-server/src/telemetry/metric/meter.ts new file mode 100644 index 00000000..cdf9916e --- /dev/null +++ b/packages/nlu-server/src/telemetry/metric/meter.ts @@ -0,0 +1,25 @@ +import client from 'prom-client' + +export const trainingDuration = new client.Histogram({ + name: 'training_duration_seconds', + help: 'Histogram of training duration in seconds.', + labelNames: ['status'], + buckets: [0.1, 0.5, 1, 5, 15, 30, 60] +}) + +export const modelStorageReadDuration = new client.Histogram({ + name: 'model_storage_read_duration', + help: 'Histogram of the duration required to read a model from storage in ms.', + buckets: [64, 128, 256, 512, 1024, 2048, 4096, 8192] +}) + +export const modelMemoryLoadDuration = new client.Histogram({ + name: 'model_memory_load_duration', + help: 'Histogram of the duration required to load a model in memory in ms.', + buckets: [64, 128, 256, 512, 1024, 2048, 4096, 8192] +}) + +export const trainingCount = new client.Gauge({ + name: 'training_count', + help: 'Gauge of all trainings.' +}) diff --git a/packages/nlu-server/src/telemetry/usage-client/client.ts b/packages/nlu-server/src/telemetry/usage-client/client.ts new file mode 100644 index 00000000..c88525e2 --- /dev/null +++ b/packages/nlu-server/src/telemetry/usage-client/client.ts @@ -0,0 +1,44 @@ +import axios from 'axios' +import _ from 'lodash' +import { UsagePayload, UsageData, UsageSender, UsageType } from './typings' + +export class UsageClient { + constructor(private usageURL: string) {} + + public async sendUsage(sender: S, type: T, records: UsageData[]) { + const timestamp = new Date().toISOString() + const usage: UsagePayload = { + meta: { + timestamp, + schema_version: '1.0.0', + sender, + type + }, + schema_version: '1.0.0', + records + } + + try { + await axios.post(this.usageURL, usage) + } catch (err) { + if (axios.isAxiosError(err) && err.response?.data) { + const { data } = err.response + const message = this._serialize(data) + err.message += `: ${message}` + } + throw err + } + } + + private _serialize = (data: any): string => { + if (_.isString(data)) { + return data + } + try { + const str = JSON.stringify(data) + return str + } catch (err) { + return `${data}` + } + } +} diff --git a/packages/nlu-server/src/telemetry/usage-client/index.ts b/packages/nlu-server/src/telemetry/usage-client/index.ts new file mode 100644 index 00000000..1dfe5704 --- /dev/null +++ b/packages/nlu-server/src/telemetry/usage-client/index.ts @@ -0,0 +1,2 @@ +export * from './typings' +export * from './client' diff --git a/packages/nlu-server/src/telemetry/usage-client/typings.ts b/packages/nlu-server/src/telemetry/usage-client/typings.ts new file mode 100644 index 00000000..439a6f10 --- /dev/null +++ b/packages/nlu-server/src/telemetry/usage-client/typings.ts @@ -0,0 +1,30 @@ +type Is = X extends Y ? true : false +type And = X extends false ? false : Y extends false ? false : true + +export type UsageSender = 'nlu' // other services might also send usage +export type UsageType = 'training_time' // other services might also send other usage types + +export type UsageMetadata = { + timestamp: string + sender: S + type: T + schema_version: string +} + +export type UsageData = And< + Is, + Is +> extends true + ? { + app_id: string + model_id: string + training_time: number + timestamp: string + } + : never // other combination of sender + type might have other payload + +export type UsagePayload = { + meta: UsageMetadata + schema_version: string + records: UsageData[] +} diff --git a/packages/nlu-server/src/typings.d.ts b/packages/nlu-server/src/typings.d.ts deleted file mode 100644 index 559ce515..00000000 --- a/packages/nlu-server/src/typings.d.ts +++ /dev/null @@ -1,28 +0,0 @@ -interface BuildInfo { - date: number - branch: string -} - -interface Options { - host: string - port: number - limitWindow: string - limit: number - bodySize: string - batchSize: number - modelCacheSize: string - dbURL?: string - modelDir?: string - verbose: number - doc: boolean - logFilter?: string[] - languageURL: string - languageAuthToken?: string - ducklingURL: string - ducklingEnabled: boolean - config?: string - maxTraining: number -} - -export const version: string -export const run: (argv: Options) => Promise diff --git a/packages/nlu-server/src/typings.ts b/packages/nlu-server/src/typings.ts new file mode 100644 index 00000000..4b10dff8 --- /dev/null +++ b/packages/nlu-server/src/typings.ts @@ -0,0 +1,38 @@ +import { LogLevel } from '@bpinternal/log4bot' + +export type BuildInfo = { + date: number + branch: string +} + +export type LogFormat = 'text' | 'json' +export type NLUServerOptions = { + host: string + port: number + limitWindow: string + limit: number + bodySize: string + modelSize: string + batchSize: number + modelCacheSize: string + dbURL?: string + modelDir: string + doc: boolean + logLevel: LogLevel + logFormat: LogFormat + debugFilter?: string + prometheusEnabled: boolean + apmEnabled: boolean + apmSampleRate?: number + maxTraining: number + maxLinting: number + languageURL: string + languageAuthToken?: string + ducklingURL: string + ducklingEnabled: boolean + usageURL?: string + modelTransferEnabled: boolean + reverseProxy?: string +} + +export type CommandLineOptions = Partial diff --git a/packages/nlu-server/src/uncaught-errors.ts b/packages/nlu-server/src/uncaught-errors.ts new file mode 100644 index 00000000..8792635b --- /dev/null +++ b/packages/nlu-server/src/uncaught-errors.ts @@ -0,0 +1,13 @@ +import { Logger } from '@bpinternal/log4bot' + +export const listenForUncaughtErrors = (logger: Logger) => { + process.on('unhandledRejection', (thrown: any) => { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + logger.critical(`Unhandled rejection: "${err.message}"`) + }) + + process.on('uncaughtException', (thrown: Error) => { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + logger.critical(`Uncaught exceptions: "${err.message}"`) + }) +} diff --git a/packages/nlu-server/src/utils/broadcast.ts b/packages/nlu-server/src/utils/broadcast.ts deleted file mode 100644 index 8eedcdd2..00000000 --- a/packages/nlu-server/src/utils/broadcast.ts +++ /dev/null @@ -1,20 +0,0 @@ -import PGPubSub from 'pg-pubsub' - -type Func = (...x: X) => Y - -interface Task { - name: string - run: Func> -} - -export class Broadcaster { - public constructor(private _pubsub: PGPubSub) {} - - public async broadcast(t: Task): Promise>> { - await this._pubsub.addChannel(t.name, (x) => t.run(...x)) - - return (...x: X) => { - return this._pubsub.publish(t.name, x) - } - } -} diff --git a/packages/nlu-server/src/utils/error-utils.ts b/packages/nlu-server/src/utils/error-utils.ts deleted file mode 100644 index ea1d3a0c..00000000 --- a/packages/nlu-server/src/utils/error-utils.ts +++ /dev/null @@ -1,29 +0,0 @@ -import _ from 'lodash' - -export interface ErrorMessage { - message: string - stackTrace?: string -} - -export function serializeError(err: any): ErrorMessage { - if (err instanceof Error) { - const { message, stack } = err - return { message, stackTrace: stack } - } - - if (_.isString(err)) { - return { message: err } - } - - if (_.isObject(err)) { - return { message: JSON.stringify(err, null, 2) } - } - - return { message: '' } -} - -export function deserializeError(err: ErrorMessage): Error { - const newErr = new Error(err.message) - newErr.stack = err.stackTrace - return newErr -} diff --git a/packages/nlu-server/src/utils/guards.ts b/packages/nlu-server/src/utils/guards.ts deleted file mode 100644 index a14274a8..00000000 --- a/packages/nlu-server/src/utils/guards.ts +++ /dev/null @@ -1,9 +0,0 @@ -import { PatternEntityDefinition, ListEntityDefinition } from '@botpress/nlu-client' - -export const isListEntity = (e: ListEntityDefinition | PatternEntityDefinition): e is ListEntityDefinition => { - return e.type === 'list' -} - -export const isPatternEntity = (e: ListEntityDefinition | PatternEntityDefinition): e is PatternEntityDefinition => { - return e.type === 'pattern' -} diff --git a/packages/nlu-server/tsconfig.json b/packages/nlu-server/tsconfig.json index 03d738c9..ff01d527 100644 --- a/packages/nlu-server/tsconfig.json +++ b/packages/nlu-server/tsconfig.json @@ -1,18 +1,34 @@ { "extends": "../../tsconfig.packages.json", "references": [ - { "path": "../nlu-client" }, - { "path": "../nlu-engine" }, - { "path": "../logger" }, - { "path": "../locks" } + { + "path": "../telemetry" + }, + { + "path": "../distributed" + }, + { + "path": "../nlu-client" + }, + { + "path": "../nlu-engine" + } ], "compilerOptions": { "outDir": "./dist" /* Redirect output structure to the directory. */, "rootDir": "./src" /* Specify the root directory of input files. Use to control the output directory structure with --outDir. */, "forceConsistentCasingInFileNames": true /* Disallow inconsistently-cased references to the same file. */, - "types": ["jest", "node"], + "types": [ + "jest", + "node" + ], "composite": true }, - "include": ["src/**/*.ts"], - "typeRoots": ["./node_modules/@types", "../../node_modules/@types"] + "include": [ + "src/**/*.ts" + ], + "typeRoots": [ + "./node_modules/@types", + "../../node_modules/@types" + ] } diff --git a/packages/node-crfsuite/package.json b/packages/node-crfsuite/package.json index fa94b438..5344e20d 100644 --- a/packages/node-crfsuite/package.json +++ b/packages/node-crfsuite/package.json @@ -11,7 +11,7 @@ "install": ":", "build:native": "node-gyp install && node-gyp rebuild", "build": "tsc --build", - "test": "echo \"no tests\"" + "clean": "rimraf ./dist && rimraf ./node_modules" }, "keywords": [ "crf", @@ -40,7 +40,7 @@ "devDependencies": { "node-addon-api": "^3.0.0", "node-gyp": "^5.0.0", - "@types/node": "^12.13.0", - "typescript": "^3.9.10" + "@types/node": "^16.11.10", + "typescript": "^5.0.4" } } diff --git a/packages/node-crfsuite/src/index.ts b/packages/node-crfsuite/src/index.ts index 440cbbf0..93398b4d 100644 --- a/packages/node-crfsuite/src/index.ts +++ b/packages/node-crfsuite/src/index.ts @@ -4,7 +4,7 @@ import { Tagger, Trainer, TrainerOptions } from './typings' type TaggerCtor = new () => Tagger type TrainerCtor = new (opt?: TrainerOptions) => Trainer -interface BindingType { +type BindingType = { Tagger: TaggerCtor Trainer: TrainerCtor } diff --git a/packages/node-crfsuite/src/initialize.ts b/packages/node-crfsuite/src/initialize.ts index 81fec8d7..cd54d4a8 100644 --- a/packages/node-crfsuite/src/initialize.ts +++ b/packages/node-crfsuite/src/initialize.ts @@ -39,7 +39,7 @@ import { binName } from './constants' */ type ExtensionDir = Record<'dirName' | 'dist' | 'version', string> -interface Mutex { +type Mutex = { release: () => void } diff --git a/packages/node-crfsuite/src/typings.d.ts b/packages/node-crfsuite/src/typings.d.ts index 29bcd1e5..4eaf6fce 100644 --- a/packages/node-crfsuite/src/typings.d.ts +++ b/packages/node-crfsuite/src/typings.d.ts @@ -2,25 +2,25 @@ export const makeTrainer: (args?: TrainerOptions) => Promise export const makeTagger: () => Promise export declare class Tagger { - tag(xseq: Array): { probability: number; result: string[] } - open(model_filename: string): boolean - marginal(xseq: Array): { [key: string]: number }[] + public tag(xseq: Array): { probability: number; result: string[] } + public open(model_filename: string): boolean + public marginal(xseq: Array): { [key: string]: number }[] } -export interface Options { +export type Options = { [key: string]: string } -export interface TrainerOptions { +export type TrainerOptions = { [key: string]: any debug?: boolean } export declare class Trainer { constructor(opts?: TrainerOptions) - append(xseq: Array, yseq: string[]): void - train(model_filename: string, cb?: (iteration: number) => number | undefined): number - train_async(model_filename: string, cb?: (iteration: number) => number | undefined): Promise - get_params(options: Options): any - set_params(options: Options): void + public append(xseq: Array, yseq: string[]): void + public train(model_filename: string, cb?: (iteration: number) => number | undefined): number + public train_async(model_filename: string, cb?: (iteration: number) => number | undefined): Promise + public get_params(options: Options): any + public set_params(options: Options): void } diff --git a/packages/node-fasttext/package.json b/packages/node-fasttext/package.json index 543eae56..15d5cbf8 100644 --- a/packages/node-fasttext/package.json +++ b/packages/node-fasttext/package.json @@ -11,7 +11,7 @@ "install": ":", "build:native": "node-gyp install && node-gyp rebuild", "build": "tsc --build", - "test": "echo \"no tests\"" + "clean": "rimraf ./dist && rimraf ./node_modules" }, "repository": { "type": "git", @@ -33,10 +33,10 @@ "yn": "^4.0.0" }, "devDependencies": { - "@types/node": "^12.13.0", + "@types/node": "^16.11.10", "node-addon-api": "^3.0.0", "node-gyp": "^7.0.0", - "typescript": "^3.9.10" + "typescript": "^5.0.4" }, "files": [ "cppsrc", diff --git a/packages/node-fasttext/src/index.ts b/packages/node-fasttext/src/index.ts index 3f652f27..3f5b8dc8 100644 --- a/packages/node-fasttext/src/index.ts +++ b/packages/node-fasttext/src/index.ts @@ -4,7 +4,7 @@ import { Classifier, Query } from './typings' type ClassifierCtor = new (modelFilename?: string) => Classifier type QueryCtor = new (modelFilename: string) => Query -interface BindingType { +type BindingType = { Classifier: ClassifierCtor Query: QueryCtor } diff --git a/packages/node-fasttext/src/initialize.ts b/packages/node-fasttext/src/initialize.ts index 81fec8d7..cd54d4a8 100644 --- a/packages/node-fasttext/src/initialize.ts +++ b/packages/node-fasttext/src/initialize.ts @@ -39,7 +39,7 @@ import { binName } from './constants' */ type ExtensionDir = Record<'dirName' | 'dist' | 'version', string> -interface Mutex { +type Mutex = { release: () => void } diff --git a/packages/node-fasttext/src/typings.d.ts b/packages/node-fasttext/src/typings.d.ts index 830712e5..3deed57f 100644 --- a/packages/node-fasttext/src/typings.d.ts +++ b/packages/node-fasttext/src/typings.d.ts @@ -1,7 +1,7 @@ export const makeClassifier: (modelFilename?: string) => Promise export const makeQuery: (modelFilename: string) => Promise -export interface Classifier { +export type Classifier = { loadModel(modelFilename: string): Promise predict(sentence: string, k: number, callback?: DoneCallback): Promise> train( @@ -12,12 +12,12 @@ export interface Classifier { quantize(options: Options, callback?: DoneCallback) } -export interface Query { +export type Query = { nn(word: string, neighbors: number): Promise> getWordVector(word: string): Promise } -export interface Options { +export type Options = { [key: string]: any // The following arguments are mandatory input: string // training file path @@ -56,6 +56,6 @@ export interface Options { dsub: number // size of each sub-vector [2] } -export interface DoneCallback { +export type DoneCallback = { (error: any, result: any): void } diff --git a/packages/node-sentencepiece/package.json b/packages/node-sentencepiece/package.json index 74988115..41d59bf1 100644 --- a/packages/node-sentencepiece/package.json +++ b/packages/node-sentencepiece/package.json @@ -10,12 +10,12 @@ "scripts": { "build": "tsc --build", "install": ":", - "clean": "node-gyp clean", + "clean:native": "node-gyp clean", "build:native": "run-script-os", "build:native:linux": "./scripts/linux_build_sentencepiece.sh && node-gyp rebuild", "build:native:darwin": "./scripts/macos_build_sentencepiece.sh && node-gyp rebuild", "build:native:win32": "(.\\scripts\\win_build_sentencepiece.bat || true) && node-gyp rebuild", - "test": "echo \"no tests\"" + "clean": "rimraf ./dist && rimraf ./node_modules" }, "dependencies": { "getos": "^3.2.1", @@ -26,7 +26,7 @@ "node-gyp": "^4.0.0", "node-addon-api": "^1.6.3", "run-script-os": "^1.0.5", - "@types/node": "^12.13.0", - "typescript": "^3.9.10" + "@types/node": "^16.11.10", + "typescript": "^5.0.4" } } diff --git a/packages/node-sentencepiece/src/index.ts b/packages/node-sentencepiece/src/index.ts index 308381ba..3795ba52 100644 --- a/packages/node-sentencepiece/src/index.ts +++ b/packages/node-sentencepiece/src/index.ts @@ -3,7 +3,7 @@ import { Processor } from './typings' type TaggerCtor = new () => Processor -interface BindingType { +type BindingType = { Processor: TaggerCtor } diff --git a/packages/node-sentencepiece/src/initialize.ts b/packages/node-sentencepiece/src/initialize.ts index 81fec8d7..cd54d4a8 100644 --- a/packages/node-sentencepiece/src/initialize.ts +++ b/packages/node-sentencepiece/src/initialize.ts @@ -39,7 +39,7 @@ import { binName } from './constants' */ type ExtensionDir = Record<'dirName' | 'dist' | 'version', string> -interface Mutex { +type Mutex = { release: () => void } diff --git a/packages/node-sentencepiece/src/typings.d.ts b/packages/node-sentencepiece/src/typings.d.ts index 993fdc2e..b73afd58 100644 --- a/packages/node-sentencepiece/src/typings.d.ts +++ b/packages/node-sentencepiece/src/typings.d.ts @@ -1,4 +1,4 @@ -export interface Processor { +export type Processor = { loadModel: (modelPath: string) => void encode: (inputText: string) => string[] decode: (pieces: string[]) => string diff --git a/packages/node-svm/jest.config.js b/packages/node-svm/jest.config.js deleted file mode 100644 index cf777dfc..00000000 --- a/packages/node-svm/jest.config.js +++ /dev/null @@ -1,6 +0,0 @@ -module.exports = { - preset: 'ts-jest', - testEnvironment: 'node', - testPathIgnorePatterns: ['dist', 'node_modules'], - rootDir: '.' -} diff --git a/packages/node-svm/package.json b/packages/node-svm/package.json index a9b6ad5a..bb6f2c57 100644 --- a/packages/node-svm/package.json +++ b/packages/node-svm/package.json @@ -10,9 +10,10 @@ "scripts": { "install": ":", "build:native": "node-gyp install && node-gyp rebuild", - "clean": "node-gyp clean", + "clean:native": "node-gyp clean", "build": "tsc --build", - "test": "jest -i --detectOpenHandles -c ./jest.config.js" + "test": "jest --roots ./dist", + "clean": "rimraf ./dist && rimraf ./node_modules" }, "gypfile": true, "dependencies": { @@ -24,11 +25,10 @@ "@babel/preset-typescript": "^7.10.1", "@types/bluebird": "^3.5.32", "@types/getos": "^3.0.0", - "@types/node": "^12.13.0", + "@types/node": "^16.11.10", "jest": "^24.9.0", "node-addon-api": "^2.0.0", "node-gyp": "^6.1.0", - "ts-jest": "^26.5.5", - "typescript": "^3.9.10" + "typescript": "^5.0.4" } } diff --git a/packages/node-svm/src/index.ts b/packages/node-svm/src/index.ts index df907704..780ad8fd 100644 --- a/packages/node-svm/src/index.ts +++ b/packages/node-svm/src/index.ts @@ -3,7 +3,7 @@ import { NSVM } from './typings' type SvmCtor = new (args?: { random_seed: number }) => NSVM type HelloWorld = () => string -interface BindingType { +type BindingType = { NSVM: SvmCtor hello: HelloWorld } diff --git a/packages/node-svm/src/initialize.ts b/packages/node-svm/src/initialize.ts index 81fec8d7..cd54d4a8 100644 --- a/packages/node-svm/src/initialize.ts +++ b/packages/node-svm/src/initialize.ts @@ -39,7 +39,7 @@ import { binName } from './constants' */ type ExtensionDir = Record<'dirName' | 'dist' | 'version', string> -interface Mutex { +type Mutex = { release: () => void } diff --git a/packages/node-svm/src/typings.d.ts b/packages/node-svm/src/typings.d.ts index fbfe6330..a04400a9 100644 --- a/packages/node-svm/src/typings.d.ts +++ b/packages/node-svm/src/typings.d.ts @@ -1,6 +1,6 @@ export const makeSvm: (args?: { random_seed: number }) => Promise -export interface NSVM { +export type NSVM = { train(params: AugmentedParameters, x: number[][], y: number[]): void train_async(params: AugmentedParameters, x: number[][], y: number[], cb: (e: null | string) => void): void predict(x: number[]): number @@ -13,12 +13,12 @@ export interface NSVM { is_trained(): boolean } -interface ProbabilityResult { +type ProbabilityResult = { prediction: number probabilities: number[] } -export interface Model { +export type Model = { param: Parameters nr_class: number l: number @@ -33,11 +33,11 @@ export interface Model { free_sv: number } -interface AugmentedParameters extends Parameters { +type AugmentedParameters = { mute: number -} +} & Parameters -export interface Parameters { +export type Parameters = { svm_type: number kernel_type: number degree: number diff --git a/packages/telemetry/package.json b/packages/telemetry/package.json new file mode 100644 index 00000000..884ed0dd --- /dev/null +++ b/packages/telemetry/package.json @@ -0,0 +1,23 @@ +{ + "name": "@botpress/telemetry", + "version": "1.0.0", + "description": "Botpress Telemetry Package", + "author": "Botpress, Inc.", + "license": "AGPL-3.0", + "main": "./dist/index.js", + "types": "./src/typings.d.ts", + "bin": "./dist/index.js", + "dependencies": { + "@promster/express": "5.0.3", + "@promster/metrics": "6.0.2", + "@promster/server": "6.0.2", + "express": "^4.16.4" + }, + "devDependencies": { + "typescript": "^5.0.4" + }, + "scripts": { + "build": "tsc --build", + "clean": "rimraf ./dist && rimraf ./node_modules" + } +} diff --git a/packages/telemetry/src/index.ts b/packages/telemetry/src/index.ts new file mode 100644 index 00000000..e6355665 --- /dev/null +++ b/packages/telemetry/src/index.ts @@ -0,0 +1 @@ +export * as prometheus from './prometheus' diff --git a/packages/telemetry/src/prometheus/index.ts b/packages/telemetry/src/prometheus/index.ts new file mode 100644 index 00000000..8a665446 --- /dev/null +++ b/packages/telemetry/src/prometheus/index.ts @@ -0,0 +1,118 @@ +import { createMiddleware, defaultNormalizers, signalIsUp } from '@promster/express' +import { getSummary, getContentType } from '@promster/metrics' +import { Express, Request } from 'express' +import * as http from 'http' + +type Route = { + prefix?: RegExp + subroutes?: Route[] + methods?: { [key: string]: boolean } + regexp?: RegExp + path: string +} + +const NOT_FOUND = 'not_found' + +const trimPrefix = (value: string, prefix: string) => (value.startsWith(prefix) ? value.slice(prefix.length) : value) + +const getMiddlewareRoutes = (middleware: any) => { + const routes: Route[] = [] + + if (middleware.route) { + routes.push({ + path: middleware.route.path, + regexp: middleware.regexp, + methods: middleware.route?.methods + }) + } + + if (middleware.name === 'router' && middleware.handle.stack) { + const subroutes: Route[] = [] + + for (const subMiddleware of middleware.handle.stack) { + subroutes.push(...getMiddlewareRoutes(subMiddleware)) + } + + if (subroutes.length) { + routes.push({ + prefix: middleware.regexp, + path: middleware.path || '', + subroutes + }) + } + } + + return routes +} + +const getRoutes = (app: Express) => { + const routes: Route[] = [] + + for (const middleware of app._router.stack) { + routes.push(...getMiddlewareRoutes(middleware)) + } + + return routes +} + +const getRoutesPath = (path: string, method: string, routes: Route[], prefix = '') => { + for (const route of routes) { + if (route.prefix && route.subroutes) { + if (route.prefix.test(path)) { + return getRoutesPath(trimPrefix(path, route.path), method, route.subroutes, route.path) + } + } else if (route.regexp) { + if (route.regexp.test(path) && route.methods?.[method]) { + return `${prefix}${route.path}` + } + } + } + + return NOT_FOUND +} + +const normalizePath = (app: Express) => { + const routes: Route[] = [] + + return (path: string, { req }: { req: Request }) => { + if (!routes.length) { + routes.push(...getRoutes(app)) + } + + return getRoutesPath(path, req.method.toLowerCase(), routes) + } +} + +const createServer = (onRequest?: () => Promise) => + new Promise((resolve, reject) => { + const server = http.createServer(async (_req, res) => { + if (onRequest) { + await onRequest() + } + + res.writeHead(200, 'OK', { 'content-type': getContentType() }) + res.end(await getSummary()) + }) + + server.listen(9090, '0.0.0.0', () => { + server.on('error', reject) + resolve(server) + }) + }) + +export const init = async (app: Express, onRequest?: () => Promise) => { + app.use( + createMiddleware({ + app, + options: { + ...defaultNormalizers, + normalizePath: normalizePath(app), + buckets: [0.05, 0.1, 0.5, 1, 3] + } + }) + ) + + await createServer(onRequest) + + signalIsUp() +} diff --git a/packages/logger/tsconfig.json b/packages/telemetry/tsconfig.json similarity index 96% rename from packages/logger/tsconfig.json rename to packages/telemetry/tsconfig.json index df76eb78..f1c6a2db 100644 --- a/packages/logger/tsconfig.json +++ b/packages/telemetry/tsconfig.json @@ -1,6 +1,5 @@ { "extends": "../../tsconfig.packages.json", - "references": [], "compilerOptions": { "outDir": "./dist" /* Redirect output structure to the directory. */, "rootDir": "./src" /* Specify the root directory of input files. Use to control the output directory structure with --outDir. */, diff --git a/packages/worker/package.json b/packages/worker/package.json index a52a6099..e6aa3beb 100644 --- a/packages/worker/package.json +++ b/packages/worker/package.json @@ -5,7 +5,7 @@ "license": "AGPL-3.0", "scripts": { "build": "tsc --build", - "test": "echo \"no tests\"" + "clean": "rimraf ./dist && rimraf ./node_modules" }, "dependencies": { "lodash": "^4.17.19", @@ -13,9 +13,8 @@ }, "devDependencies": { "@types/lodash": "^4.14.116", - "@types/yn": "^3.1.0", - "@types/node": "^12.13.0", - "typescript": "^3.9.10" + "@types/node": "^16.11.10", + "typescript": "^5.0.4" }, "types": "./src/typings.d.ts", "main": "./dist/index.js" diff --git a/packages/worker/src/error-handler.ts b/packages/worker/src/error-handler.ts new file mode 100644 index 00000000..9f6d0117 --- /dev/null +++ b/packages/worker/src/error-handler.ts @@ -0,0 +1,15 @@ +import _ from 'lodash' +import { ErrorSerializer, ErrorDeserializer, SerializedError } from './typings' + +export class ErrorHandler implements ErrorSerializer, ErrorDeserializer { + public deserializeError(err: SerializedError): Error { + const newErr = new Error(err.message) + newErr.stack = err.stack + return newErr + } + + public serializeError(err: Error): SerializedError { + const { message, stack } = err + return { message, stack, data: {} } + } +} diff --git a/packages/worker/src/error-utils.ts b/packages/worker/src/error-utils.ts deleted file mode 100644 index ea1d3a0c..00000000 --- a/packages/worker/src/error-utils.ts +++ /dev/null @@ -1,29 +0,0 @@ -import _ from 'lodash' - -export interface ErrorMessage { - message: string - stackTrace?: string -} - -export function serializeError(err: any): ErrorMessage { - if (err instanceof Error) { - const { message, stack } = err - return { message, stackTrace: stack } - } - - if (_.isString(err)) { - return { message: err } - } - - if (_.isObject(err)) { - return { message: JSON.stringify(err, null, 2) } - } - - return { message: '' } -} - -export function deserializeError(err: ErrorMessage): Error { - const newErr = new Error(err.message) - newErr.stack = err.stackTrace - return newErr -} diff --git a/packages/worker/src/errors.ts b/packages/worker/src/errors.ts index ad9689fa..f99e4edf 100644 --- a/packages/worker/src/errors.ts +++ b/packages/worker/src/errors.ts @@ -1,28 +1,21 @@ -import { Worker } from './worker-pool/worker' +import { errors } from './typings' -export class TaskCanceledError extends Error {} -export function isTaskCanceled(err: Error): err is TaskCanceledError { - return err instanceof TaskCanceledError -} +export class TaskCanceledError extends Error implements errors.TaskCanceledError {} -export class TaskAlreadyStartedError extends Error {} -export function isTaskAlreadyStarted(err: Error): err is TaskAlreadyStartedError { - return err instanceof TaskAlreadyStartedError -} +export class TaskAlreadyStartedError extends Error implements errors.TaskAlreadyStartedError {} -export class TaskExitedUnexpectedlyError extends Error { - public wid: number - public info: { exitCode: number; signal: string } +export class TaskExitedUnexpectedlyError extends Error implements errors.TaskExitedUnexpectedlyError { + public wid: number | undefined + public exitCode: number + public signal: string - constructor(worker: Worker, info: { exitCode: number; signal: string }) { - const { exitCode, signal } = info - const { type } = worker.innerWorker - const workerType = type === 'process' ? 'Process' : 'Thread' - const message = `${workerType} ${worker.wid} exited with exit code ${exitCode} and signal ${signal}.` + constructor(args: errors.TaskExitedUnexpectedlyErrorArgs) { + const { wType, wid, exitCode, signal } = args + const workerType = wType === 'process' ? 'Process' : 'Thread' + const message = `${workerType} ${wid} exited with exit code ${exitCode} and signal ${signal}.` super(message) - ;(this.wid = worker.wid), (this.info = info) + this.wid = args.wid + this.exitCode = exitCode + this.signal = signal } } -export function isTaskExitedUnexpectedly(err: Error): err is TaskExitedUnexpectedlyError { - return err instanceof TaskExitedUnexpectedlyError -} diff --git a/packages/worker/src/index.ts b/packages/worker/src/index.ts index 770eff13..1418050c 100644 --- a/packages/worker/src/index.ts +++ b/packages/worker/src/index.ts @@ -1,16 +1,20 @@ -import { isTaskAlreadyStarted, isTaskCanceled, isTaskExitedUnexpectedly } from './errors' - +import { TaskCanceledError, TaskAlreadyStartedError, TaskExitedUnexpectedlyError } from './errors' import { ProcessEntyPoint, ProcessPool } from './process-pool' import { ThreadEntyPoint, ThreadPool } from './thread-pool' -import { Logger, PoolOptions } from './typings' -export const errors = { - isTaskAlreadyStarted, - isTaskCanceled, - isTaskExitedUnexpectedly +import * as types from './typings' + +export const errors: typeof types.errors = { + TaskCanceledError, + TaskAlreadyStartedError, + TaskExitedUnexpectedlyError } -export const makeProcessPool = (logger: Logger, config: PoolOptions) => new ProcessPool(logger, config) -export const makeProcessEntryPoint = () => new ProcessEntyPoint() -export const makeThreadPool = (logger: Logger, config: PoolOptions) => new ThreadPool(logger, config) -export const makeThreadEntryPoint = () => new ThreadEntyPoint() +export const makeProcessPool: typeof types.makeProcessPool = (logger: types.Logger, config: types.PoolOptions) => + new ProcessPool(logger, config) +export const makeProcessEntryPoint: typeof types.makeProcessEntryPoint = (config?: types.EntryPointOptions) => + new ProcessEntyPoint(config) +export const makeThreadPool: typeof types.makeThreadPool = (logger: types.Logger, config: types.PoolOptions) => + new ThreadPool(logger, config) +export const makeThreadEntryPoint: typeof types.makeThreadEntryPoint = (config?: types.EntryPointOptions) => + new ThreadEntyPoint(config) diff --git a/packages/worker/src/process-pool.ts b/packages/worker/src/process-pool.ts index f6c393ab..8093203d 100644 --- a/packages/worker/src/process-pool.ts +++ b/packages/worker/src/process-pool.ts @@ -1,11 +1,11 @@ import child_process, { ForkOptions } from 'child_process' import yn from 'yn' -import { Logger, PoolOptions } from './typings' +import { Logger, PoolOptions, EntryPointOptions } from './typings' import { WorkerPool } from './worker-pool' import { Worker } from './worker-pool/worker' import { WorkerEntryPoint } from './worker-pool/worker-entry-point' -export class ProcessPool extends WorkerPool { +export class ProcessPool extends WorkerPool { constructor(logger: Logger, config: PoolOptions) { super(logger, config) } @@ -27,7 +27,11 @@ export class ProcessPool extends WorkerPool { } } -export class ProcessEntyPoint extends WorkerEntryPoint { +export class ProcessEntyPoint extends WorkerEntryPoint { + constructor(config?: EntryPointOptions) { + super(config) + } + messageMain = (msg: any) => { process.send?.(msg) } diff --git a/packages/worker/src/thread-pool.ts b/packages/worker/src/thread-pool.ts index bb8722d3..2494245d 100644 --- a/packages/worker/src/thread-pool.ts +++ b/packages/worker/src/thread-pool.ts @@ -1,10 +1,10 @@ import { Worker as Thread, isMainThread, parentPort } from 'worker_threads' -import { Logger, PoolOptions } from './typings' +import { Logger, PoolOptions, EntryPointOptions } from './typings' import { WorkerPool } from './worker-pool' import { Worker } from './worker-pool/worker' import { WorkerEntryPoint } from './worker-pool/worker-entry-point' -export class ThreadPool extends WorkerPool { +export class ThreadPool extends WorkerPool { constructor(logger: Logger, config: PoolOptions) { super(logger, config) } @@ -19,7 +19,11 @@ export class ThreadPool extends WorkerPool { } } -export class ThreadEntyPoint extends WorkerEntryPoint { +export class ThreadEntyPoint extends WorkerEntryPoint { + constructor(config?: EntryPointOptions) { + super(config) + } + messageMain = (msg: any) => { parentPort?.postMessage(msg) } diff --git a/packages/worker/src/typings.d.ts b/packages/worker/src/typings.d.ts index 11656cd6..48a96a5c 100644 --- a/packages/worker/src/typings.d.ts +++ b/packages/worker/src/typings.d.ts @@ -1,4 +1,4 @@ -export interface Logger { +export type Logger = { debug: (msg: string) => void info: (msg: string) => void warning: (msg: string, err?: Error) => void @@ -6,45 +6,80 @@ export interface Logger { sub: (namespace: string) => Logger } -export interface TaskDefinition { +export type SerializedError = { + message: string + stack?: string + data: any +} + +export type ErrorSerializer = { + serializeError(err: Error): SerializedError +} + +export type ErrorDeserializer = { + deserializeError(err: SerializedError): Error +} + +export type TaskProgress = P extends void ? (p: number) => void : (p: number, data: P) => void +export type TaskDefinition = { input: I - logger: Logger // TODO use the actual logger implementation with a custom LogTransporter - progress: (p: number) => void + logger: Logger + progress: TaskProgress } -export type TaskHandler = (def: TaskDefinition) => Promise +export type TaskHandler = (def: TaskDefinition) => Promise -export const errors: { - isTaskAlreadyStarted: (err: Error) => boolean - isTaskCanceled: (err: Error) => boolean - isTaskExitedUnexpectedly: (err: Error) => boolean +export namespace errors { + export class TaskAlreadyStartedError extends Error {} + export class TaskCanceledError extends Error {} + + export type TaskExitedUnexpectedlyErrorArgs = { + wType: 'thread' | 'process' + wid: number | undefined + exitCode: number + signal: string + } + + export class TaskExitedUnexpectedlyError extends Error { + public wid: number | undefined + public exitCode: number + public signal: string + constructor(worker: TaskExitedUnexpectedlyErrorArgs) + } } -export interface PoolOptions { +export type PoolOptions = { entryPoint: string maxWorkers: number env: NodeJS.ProcessEnv + errorHandler?: ErrorDeserializer +} + +export type WorkerPool = { + run(taskId: string, input: I, progress: TaskProgress): Promise } -export interface WorkerPool { - run(taskId: string, input: I, progress: (x: number) => void): Promise +export type EntryPointOptions = { + errorHandler?: ErrorSerializer } -export interface WorkerEntryPoint { +export type WorkerEntryPoint = { initialize(): Promise - listenForTask(handler: TaskHandler): void + listenForTask(handler: TaskHandler): void isMainWorker: () => boolean logger: Logger } -export interface ProcessPool extends WorkerPool { + +export type ProcessPool = WorkerPool & { cancel(id: string) } -export interface ProcessEntyPoint extends WorkerEntryPoint {} -export interface ThreadPool extends WorkerPool {} -export interface ThreadEntyPoint extends WorkerEntryPoint {} +export type ProcessEntyPoint = {} & WorkerEntryPoint + +export type ThreadPool = {} & WorkerPool +export type ThreadEntyPoint = {} & WorkerEntryPoint -export const makeProcessPool: (logger: Logger, config: PoolOptions) => ProcessPool -export const makeProcessEntryPoint: () => ProcessEntyPoint -export const makeThreadPool: (logger: Logger, config: PoolOptions) => ThreadPool -export const makeThreadEntryPoint: () => ThreadEntyPoint +export const makeProcessPool: (logger: Logger, config: PoolOptions) => ProcessPool +export const makeProcessEntryPoint: (config?: EntryPointOptions) => ProcessEntyPoint +export const makeThreadPool: (logger: Logger, config: PoolOptions) => ThreadPool +export const makeThreadEntryPoint: (config?: EntryPointOptions) => ThreadEntyPoint diff --git a/packages/worker/src/worker-pool/communication.ts b/packages/worker/src/worker-pool/communication.ts index 377198f0..8f5d1421 100644 --- a/packages/worker/src/worker-pool/communication.ts +++ b/packages/worker/src/worker-pool/communication.ts @@ -1,4 +1,4 @@ -import { ErrorMessage } from '../error-utils' +import { SerializedError } from 'src/typings' export type OutgoingPayload = T extends 'start_task' ? { @@ -7,41 +7,46 @@ export type OutgoingPayload = T extends 'start : {} export type OutgoingMessageType = 'start_task' -export interface OutgoingMessage { +export type OutgoingMessage = { type: T payload: OutgoingPayload } export type Log = Partial<{ info: string; warning: string; error: string; debug: string }> -export type IncomingPayload = T extends 'log' +export type IncomingPayload = T extends 'log' ? { log: Log } : T extends 'task_progress' - ? { progress: number } + ? { progress: number; data: P } : T extends 'task_error' - ? { error: ErrorMessage } + ? { error: SerializedError } : T extends 'task_done' - ? { output: P } + ? { output: O } : {} export type IncomingMessageType = 'log' | 'worker_ready' | 'task_done' | 'task_progress' | 'task_error' -export interface IncomingMessage { +export type IncomingMessage = { type: T - payload: IncomingPayload + payload: IncomingPayload } export type AllOutgoingMessages = OutgoingMessage -export type AllIncomingMessages = IncomingMessage +export type AllIncomingMessages = IncomingMessage export const isStartTask = (msg: AllOutgoingMessages): msg is OutgoingMessage<'start_task', I> => msg.type === 'start_task' -export const isLog = (msg: AllIncomingMessages): msg is IncomingMessage<'log', O> => msg.type === 'log' -export const isWorkerReady = (msg: AllIncomingMessages): msg is IncomingMessage<'worker_ready', O> => - msg.type === 'worker_ready' -export const isTrainingDone = (msg: AllIncomingMessages): msg is IncomingMessage<'task_done', O> => - msg.type === 'task_done' -export const isTrainingProgress = (msg: AllIncomingMessages): msg is IncomingMessage<'task_progress', O> => - msg.type === 'task_progress' -export const isTrainingError = (msg: AllIncomingMessages): msg is IncomingMessage<'task_error', O> => - msg.type === 'task_error' +export const isLog = (msg: AllIncomingMessages): msg is IncomingMessage<'log', O, P> => + msg.type === 'log' +export const isWorkerReady = ( + msg: AllIncomingMessages +): msg is IncomingMessage<'worker_ready', O, P> => msg.type === 'worker_ready' +export const isTrainingDone = ( + msg: AllIncomingMessages +): msg is IncomingMessage<'task_done', O, P> => msg.type === 'task_done' +export const isTrainingProgress = ( + msg: AllIncomingMessages +): msg is IncomingMessage<'task_progress', O, P> => msg.type === 'task_progress' +export const isTrainingError = ( + msg: AllIncomingMessages +): msg is IncomingMessage<'task_error', O, P> => msg.type === 'task_error' diff --git a/packages/worker/src/worker-pool/index.ts b/packages/worker/src/worker-pool/index.ts index 76ff51a5..1c40d9d5 100644 --- a/packages/worker/src/worker-pool/index.ts +++ b/packages/worker/src/worker-pool/index.ts @@ -1,9 +1,9 @@ import _ from 'lodash' -import { deserializeError } from '../error-utils' +import { ErrorHandler } from '../error-handler' import { TaskAlreadyStartedError, TaskCanceledError, TaskExitedUnexpectedlyError } from '../errors' import { SIG_KILL } from '../signals' -import { Logger, PoolOptions, WorkerPool as IWorkerPool } from '../typings' +import { Logger, PoolOptions, WorkerPool as IWorkerPool, ErrorDeserializer, errors, TaskProgress } from '../typings' import { AllIncomingMessages, @@ -18,17 +18,20 @@ import { import { Scheduler } from './scheduler' import { Worker } from './worker' -export abstract class WorkerPool implements IWorkerPool { +export abstract class WorkerPool implements IWorkerPool { protected _scheduler: Scheduler + private errorHandler: ErrorDeserializer + constructor(protected logger: Logger, private config: PoolOptions) { + this.errorHandler = config.errorHandler ?? new ErrorHandler() this._scheduler = new Scheduler(() => this._createNewWorker(), this.logger, { maxItems: this.config.maxWorkers }) } abstract createWorker: (entryPoint: string, env: NodeJS.ProcessEnv) => Promise abstract isMainWorker: () => boolean - public async run(taskId: string, input: I, progress: (x: number) => void): Promise { + public async run(taskId: string, input: I, progress: TaskProgress): Promise { if (!this.isMainWorker()) { throw new Error("Can't create a worker pool inside a child worker.") } @@ -58,7 +61,7 @@ export abstract class WorkerPool implements IWorkerPool { return this._scheduler.cancel(id) } - private async _startTask(worker: Worker, input: I, progress: (x: number) => void): Promise { + private async _startTask(worker: Worker, input: I, progress: TaskProgress): Promise { const msg: OutgoingMessage<'start_task', I> = { type: 'start_task', payload: { input } @@ -77,17 +80,19 @@ export abstract class WorkerPool implements IWorkerPool { worker.on('exit', exitHandler) } - const messageHandler = (msg: AllIncomingMessages) => { + const messageHandler = (msg: AllIncomingMessages) => { if (isTrainingDone(msg)) { removeHandlers() resolve(msg.payload.output) } if (isTrainingError(msg)) { removeHandlers() - reject(deserializeError(msg.payload.error)) + + const deserializedError = this.errorHandler.deserializeError(msg.payload.error) + reject(deserializedError) } if (isTrainingProgress(msg)) { - progress(msg.payload.progress) + progress(msg.payload.progress, msg.payload.data) } if (isLog(msg)) { this._logMessage(msg) @@ -101,7 +106,7 @@ export abstract class WorkerPool implements IWorkerPool { reject(new TaskCanceledError()) return } - reject(new TaskExitedUnexpectedlyError(worker, { exitCode, signal })) + reject(this._taskExitedUnexpectedlyError(worker, exitCode, signal)) return } @@ -131,7 +136,7 @@ export abstract class WorkerPool implements IWorkerPool { worker.on('exit', exitHandler) } - const messageHandler = (msg: AllIncomingMessages) => { + const messageHandler = (msg: AllIncomingMessages) => { if (isLog(msg)) { this._logMessage(msg) } @@ -149,14 +154,18 @@ export abstract class WorkerPool implements IWorkerPool { const exitHandler = (exitCode: number, signal: string) => { removeHandlers() - reject(new TaskExitedUnexpectedlyError(worker, { exitCode, signal })) + reject(this._taskExitedUnexpectedlyError(worker, exitCode, signal)) } addHandlers() }) } - private _logMessage(msg: IncomingMessage<'log', O>) { + private _taskExitedUnexpectedlyError(worker: Worker, exitCode: number, signal: string): TaskExitedUnexpectedlyError { + return new TaskExitedUnexpectedlyError({ wType: worker.innerWorker.type, wid: worker.wid, exitCode, signal }) + } + + private _logMessage(msg: IncomingMessage<'log', O, P>) { const { log } = msg.payload log.debug && this.logger.debug(log.debug) log.info && this.logger.info(log.info) diff --git a/packages/worker/src/worker-pool/scheduler.ts b/packages/worker/src/worker-pool/scheduler.ts index fa99c279..96e0fe7f 100644 --- a/packages/worker/src/worker-pool/scheduler.ts +++ b/packages/worker/src/worker-pool/scheduler.ts @@ -2,7 +2,7 @@ import _ from 'lodash' import { Logger } from '../typings' import { Worker } from './worker' -interface Options { +type Options = { maxItems: number } @@ -33,6 +33,7 @@ export class Scheduler { const isPlaceLeft = this._options.maxItems < 0 || this._options.maxItems > totalCount if (!readyCount && isPlaceLeft) { const newItem = await this._generator() + newItem.isAlive() this.active[id] = newItem return newItem } diff --git a/packages/worker/src/worker-pool/worker-entry-point.ts b/packages/worker/src/worker-pool/worker-entry-point.ts index 857c8f48..1c0df3a5 100644 --- a/packages/worker/src/worker-pool/worker-entry-point.ts +++ b/packages/worker/src/worker-pool/worker-entry-point.ts @@ -1,9 +1,22 @@ -import { serializeError } from '../error-utils' -import { Logger, TaskHandler, WorkerEntryPoint as IWorkerEntryPoint } from '../typings' +import { ErrorHandler } from '../error-handler' +import { + Logger, + TaskHandler, + WorkerEntryPoint as IWorkerEntryPoint, + ErrorSerializer, + EntryPointOptions, + TaskProgress +} from '../typings' import { AllOutgoingMessages, IncomingMessage, isStartTask } from './communication' -export abstract class WorkerEntryPoint implements IWorkerEntryPoint { - private _handlers: TaskHandler[] = [] +export abstract class WorkerEntryPoint implements IWorkerEntryPoint { + private _handlers: TaskHandler[] = [] + + private errorHandler: ErrorSerializer + + constructor(config?: EntryPointOptions) { + this.errorHandler = config?.errorHandler ?? new ErrorHandler() + } abstract isMainWorker: () => boolean abstract messageMain: (msg: any) => void @@ -14,7 +27,7 @@ export abstract class WorkerEntryPoint implements IWorkerEntryPoint throw new Error("Can't create a worker entry point inside the main worker.") } - const readyResponse: IncomingMessage<'worker_ready', O> = { + const readyResponse: IncomingMessage<'worker_ready', O, P> = { type: 'worker_ready', payload: {} } @@ -30,19 +43,19 @@ export abstract class WorkerEntryPoint implements IWorkerEntryPoint this.listenMain('message', messageHandler) } - public listenForTask(handler: TaskHandler) { + public listenForTask(handler: TaskHandler) { this._handlers.push(handler) } - private _runHandler = async (handler: TaskHandler, input: I) => { + private _runHandler = async (handler: TaskHandler, input: I) => { try { - const progress = (p: number) => { - const progressResponse: IncomingMessage<'task_progress', O> = { + const progress = ((p: number, data: P) => { + const progressResponse: IncomingMessage<'task_progress', O, P> = { type: 'task_progress', - payload: { progress: p } + payload: { progress: p, data } } this.messageMain(progressResponse) - } + }) as TaskProgress const output: O = await handler({ input, @@ -50,18 +63,19 @@ export abstract class WorkerEntryPoint implements IWorkerEntryPoint progress }) - const doneResponse: IncomingMessage<'task_done', O> = { + const doneResponse: IncomingMessage<'task_done', O, P> = { type: 'task_done', payload: { output } } this.messageMain(doneResponse) - } catch (err) { - const errorResponse: IncomingMessage<'task_error', O> = { + } catch (thrown) { + const err = thrown instanceof Error ? thrown : new Error(`${thrown}`) + const errorResponse: IncomingMessage<'task_error', O, P> = { type: 'task_error', payload: { - error: serializeError(err) + error: this.errorHandler.serializeError(err) } } this.messageMain(errorResponse) @@ -70,30 +84,30 @@ export abstract class WorkerEntryPoint implements IWorkerEntryPoint public logger: Logger = { debug: (msg: string) => { - const response: IncomingMessage<'log', O> = { + const response: IncomingMessage<'log', O, P> = { type: 'log', payload: { log: { debug: msg } } } this.messageMain(response) }, info: (msg: string) => { - const response: IncomingMessage<'log', O> = { + const response: IncomingMessage<'log', O, P> = { type: 'log', payload: { log: { info: msg } } } this.messageMain(response) }, warning: (msg: string, err?: Error) => { - const warning = `${msg} ${serializeError(err)}` - const response: IncomingMessage<'log', O> = { + const warning = err ? `${msg} ${err.message}` : msg + const response: IncomingMessage<'log', O, P> = { type: 'log', payload: { log: { warning } } } this.messageMain(response) }, error: (msg: string, err?: Error) => { - const error = `${msg} ${serializeError(err)}` - const response: IncomingMessage<'log', O> = { type: 'log', payload: { log: { error } } } + const error = err ? `${msg} ${err.message}` : msg + const response: IncomingMessage<'log', O, P> = { type: 'log', payload: { log: { error } } } this.messageMain(response) }, sub: (namespace: string) => { diff --git a/readme.md b/readme.md index def62246..f385412a 100644 --- a/readme.md +++ b/readme.md @@ -10,13 +10,13 @@ The source code is structured in a mono-repo fashion using yarn workspaces. The - [nlu-server](./packages/nlu-server/readme.md): Contains the Botpress Standalone NLU Server - [lang-server](./packages/lang-server/readme.md): Contains the Botpress Language Server -- [nlu-cli](./packages/nlu-cli/readme.md): Small CLI to use as an entry point for both `nlu-server` and `lang-server` +- [nlu-bin](./packages/nlu-bin/readme.md): Small CLI to use as an entry point for both `nlu-server` and `lang-server` Check out each individual packages for more details. ## Running from source -**Prerequisites**: Node 12.13 (you can use [nvm](https://github.com/creationix/nvm)) and Yarn. +**Prerequisites**: Node 16.13 (you can use [nvm](https://github.com/creationix/nvm)) and Yarn. 1. Run `yarn` to fetch node packages. 1. Run `yarn build && yarn start` to build and start the Standalone NLU server. @@ -26,6 +26,27 @@ Check out each individual packages for more details. New executable binary files are packaged at every release. You can download those directly on release page located [here](https://github.com/botpress/nlu/releases). +## Telemetry + +### Metrics (Prometheus) + +A Prometheus endpoint can be configured to expose NLU specific metrics. By setting the `PROMETHEUS_ENABLED` environment variables to `true`, the port `9090` will expose prometheus metrics. + +### Tracing (Jaeger) + +A [Jaeger](https://www.jaegertracing.io/) client can be configured using a subset of the standard Opentelemetry [environment variables](https://opentelemetry.io/docs/reference/specification/sdk-environment-variables). + +The important environment variables for configuring tracing are: + +- TRACING_ENABLED bool Enables the tracer +- TRACING_DEBUG bool Adds debug information about the tracing configuration +- OTEL_EXPORTER_JAEGER_ENDPOINT url Sets the Jaeger collector endpoint +- OTEL_SERVICE_NAME string Sets the service name given to a trace +- OTEL_SERVICE_VERSION string Sets the current running version of the service +- OTEL_SERVICE_VERSION_INSTANCE_ID string Sets the node intance id on which the service is running on +- OTEL_SERVICE_NAMESPACE string Sets the namespace of the service +- OTEL_DEPLOYMENT_ENVIRONMENT string Sets the environment of the service + ## ⚠️⚠️ Disclaimer ⚠️⚠️ The NLU Server does **not** enforce authentication in any way. This means it is completely exposed to many attacks. If you plan on using the nlu-server in your local Botpress setup, makes sure it is not publicly exposed. If you plan on exposing the NLU server, make sure it his hidden behind a reverse proxy which ensures a proper authentication. This reverse proxy should: diff --git a/scripts/gulp.config.js b/scripts/gulp.config.js deleted file mode 100644 index 479f8591..00000000 --- a/scripts/gulp.config.js +++ /dev/null @@ -1,26 +0,0 @@ -const fs = require('fs') -const path = require('path') - -const rootPath = path.join(__dirname, '..') -const filePath = path.join(rootPath, 'config.json') - -function createEmptyConfigFile() { - const rawContent = '{}' - fs.writeFileSync(filePath, rawContent) -} - -function upsertConfigFile(cb) { - try { - const exists = fs.existsSync(filePath) - if (!exists) { - createEmptyConfigFile() - } - cb() - } catch (err) { - cb(err) - } -} - -module.exports = { - upsertConfigFile -} diff --git a/scripts/gulp.package.js b/scripts/gulp.package.js index 08f8e089..d7cb5d2e 100644 --- a/scripts/gulp.package.js +++ b/scripts/gulp.package.js @@ -6,9 +6,9 @@ const chalk = require('chalk') const _ = require('lodash') const targets = { - win: 'node12-win32-x64', - linux: 'node12-linux-x64', - darwin: 'node12-macos-x64' + win: 'node16-win32-x64', + linux: 'node16-linux-x64', + darwin: 'node16-macos-x64' } const projectRoot = path.join(__dirname, '..') diff --git a/scripts/gulp.release.js b/scripts/gulp.release.js index 12f8b7eb..4ad70e6a 100644 --- a/scripts/gulp.release.js +++ b/scripts/gulp.release.js @@ -81,7 +81,7 @@ const bumpVersion = (cb) => { }) await spawn(yarnCmd, ['version', '--new-version', newVersion, '--no-git-tag-version'], { stdio: 'inherit', - cwd: path.join(packagesDir, 'nlu-cli') + cwd: path.join(packagesDir, 'nlu-bin') }) const changeLog = await getChangeLog() diff --git a/tsconfig.json b/tsconfig.json index 9b2b12e3..edf3956c 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -1,16 +1,22 @@ { "references": [ { - "path": "./packages/nlu-cli" + "path": "./packages/nlu-bin" }, { - "path": "./packages/e2e" + "path": "./packages/nlu-bench" + }, + { + "path": "./packages/nlu-e2e" } ], "files": [], "include": [], - "exclude": ["**/node_modules"], + "exclude": [ + "**/node_modules" + ], "compilerOptions": { - "esModuleInterop": true + "esModuleInterop": true, + "composite": true } -} +} \ No newline at end of file diff --git a/tsconfig.packages.json b/tsconfig.packages.json index 71b27219..743e557c 100644 --- a/tsconfig.packages.json +++ b/tsconfig.packages.json @@ -1,28 +1,11 @@ { "compilerOptions": { - /* Visit https://aka.ms/tsconfig.json to read more about this file */ - - /* Basic Options */ "incremental": true /* Enable incremental compilation */, "target": "es2019" /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019', 'ES2020', or 'ESNEXT'. */, "module": "commonjs" /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', 'es2020', or 'ESNext'. */, - // "lib": [], /* Specify library files to be included in the compilation. */ - // "allowJs": true, /* Allow javascript files to be compiled. */ - // "checkJs": true, /* Report errors in .js files. */ - // "jsx": "preserve", /* Specify JSX code generation: 'preserve', 'react-native', or 'react'. */ "declaration": true /* Generates corresponding '.d.ts' file. */, - // "declarationMap": true, /* Generates a sourcemap for each corresponding '.d.ts' file. */ "sourceMap": true /* Generates corresponding '.map' file. */, - // "outFile": "./", /* Concatenate and emit output to single file. */ "composite": true /* Enable project compilation */, - // "tsBuildInfoFile": "./", /* Specify file to store incremental compilation information */ - // "removeComments": true, /* Do not emit comments to output. */ - // "noEmit": true, /* Do not emit outputs. */ - // "importHelpers": true, /* Import emit helpers from 'tslib'. */ - // "downlevelIteration": true, /* Provide full support for iterables in 'for-of', spread, and destructuring when targeting 'ES5' or 'ES3'. */ - // "isolatedModules": true, /* Transpile each file as a separate module (similar to 'ts.transpileModule'). */ - - /* Strict Type-Checking Options */ "strict": true /* Enable all strict type-checking options. */, "noImplicitAny": false /* Raise error on expressions and declarations with an implied 'any' type. */, "strictNullChecks": true /* Enable strict null checks. */, @@ -38,29 +21,10 @@ // "noImplicitReturns": true, /* Report error when not all code paths in function return a value. */ // "noFallthroughCasesInSwitch": true, /* Report errors for fallthrough cases in switch statement. */ - /* Module Resolution Options */ "moduleResolution": "node" /* Specify module resolution strategy: 'node' (Node.js) or 'classic' (TypeScript pre-1.6). */, - // "baseUrl": "./", /* Base directory to resolve non-absolute module names. */ - // "paths": {}, /* A series of entries which re-map imports to lookup locations relative to the 'baseUrl'. */ - // "rootDirs": [], /* List of root folders whose combined content represents the structure of the project at runtime. */ - // "typeRoots": [], /* List of folders to include type definitions from. */ - // "types": [], /* Type declaration files to be included in compilation. */ - // "allowSyntheticDefaultImports": true, /* Allow default imports from modules with no default export. This does not affect code emit, just typechecking. */ "esModuleInterop": true /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */, - // "preserveSymlinks": true, /* Do not resolve the real path of symlinks. */ - // "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */ - - /* Source Map Options */ - // "sourceRoot": "", /* Specify the location where debugger should locate TypeScript files instead of source locations. */ - // "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */ - // "inlineSourceMap": true, /* Emit a single file with source maps instead of having a separate file. */ - // "inlineSources": true, /* Emit the source alongside the sourcemaps within a single file; requires '--inlineSourceMap' or '--sourceMap' to be set. */ - - /* Experimental Options */ "experimentalDecorators": true /* Enables experimental support for ES7 decorators. */, "emitDecoratorMetadata": true /* Enables experimental support for emitting type metadata for decorators. */, - - /* Advanced Options */ "skipLibCheck": true /* Skip type checking of declaration files. */, "forceConsistentCasingInFileNames": true /* Disallow inconsistently-cased references to the same file. */, "typeRoots": ["./node_modules/@types"], diff --git a/yarn.lock b/yarn.lock index 20bc52f1..2ebfc606 100644 --- a/yarn.lock +++ b/yarn.lock @@ -279,6 +279,11 @@ resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.13.12.tgz#ba320059420774394d3b0c0233ba40e4250b81d1" integrity sha512-4T7Pb244rxH24yR116LAuJ+adxXXnHhZaLJjegJVKSdoNCe4x1eDBaud5YIcQFcqzsaD5BHvJw5BQ0AZapdCRw== +"@babel/parser@7.13.13": + version "7.13.13" + resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.13.13.tgz#42f03862f4aed50461e543270916b47dd501f0df" + integrity sha512-OhsyMrqygfk5v8HmWwOzlYjJrtLaFhF34MrfG/Z73DgYCI6ojNUTUp2TYbtnjo8PegeJp12eamsNettCQjKjVw== + "@babel/parser@^7.1.0", "@babel/parser@^7.12.13", "@babel/parser@^7.13.16", "@babel/parser@^7.4.3": version "7.13.16" resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.13.16.tgz#0f18179b0448e6939b1f3f5c4c355a3a9bcdfd37" @@ -889,6 +894,15 @@ debug "^4.1.0" globals "^11.1.0" +"@babel/types@7.13.12": + version "7.13.12" + resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.13.12.tgz#edbf99208ef48852acdff1c8a681a1e4ade580cd" + integrity sha512-K4nY2xFN4QMvQwkQ+zmBDp6ANMbVNw6BbxWmYA4qNjhR9W+Lj/8ky5MEY2Me5r+B2c6/v6F53oMndG+f9s3IiA== + dependencies: + "@babel/helper-validator-identifier" "^7.12.11" + lodash "^4.17.19" + to-fast-properties "^2.0.0" + "@babel/types@^7.0.0", "@babel/types@^7.12.1", "@babel/types@^7.12.13", "@babel/types@^7.13.0", "@babel/types@^7.13.12", "@babel/types@^7.13.14", "@babel/types@^7.13.16", "@babel/types@^7.13.17", "@babel/types@^7.3.0", "@babel/types@^7.4.0", "@babel/types@^7.4.4": version "7.13.17" resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.13.17.tgz#48010a115c9fba7588b4437dd68c9469012b38b4" @@ -897,6 +911,50 @@ "@babel/helper-validator-identifier" "^7.12.11" to-fast-properties "^2.0.0" +"@bpinternal/log4bot@^0.0.4": + version "0.0.4" + resolved "https://registry.yarnpkg.com/@bpinternal/log4bot/-/log4bot-0.0.4.tgz#87f9867b5ddf83e179eeb1ba7afe0bcf2938c07f" + integrity sha512-psjT4wxlMcKMnWu+acnxoMeVQaAjD5eRfTdpk1L2uymhy2hnukzxg53Z2cThbFDJsdEufFOneEGwSi2kfWXRwA== + dependencies: + lodash "^4.17.21" + moment "^2.24.0" + regex-parser "^2.2.11" + +"@bpinternal/ptb-schema@^0.0.2": + version "0.0.2" + resolved "https://registry.yarnpkg.com/@bpinternal/ptb-schema/-/ptb-schema-0.0.2.tgz#a3d6072ab94d6e7e8c8556e38bc78a8cc13f00e9" + integrity sha512-eSCZprNdylj8JOSilGWRf2+95izwaVf2dBbwKifv9BUSjvIZLmcsADWnvaQSIcOp/4BcUS4CLAMIBd413ncidw== + dependencies: + protobufjs "^6.11.2" + +"@bpinternal/trail@^0.1.0": + version "0.1.0" + resolved "https://registry.yarnpkg.com/@bpinternal/trail/-/trail-0.1.0.tgz#665689884acaedeb495c0408643da6594deac76f" + integrity sha512-tKIpYLi3KzZQv2+U77/EhKa+4YM+cynPRkoeEGP2iW5RYgpkh3pJHFuI8vCzYvTRCLEKjVd08vEbsQ5oIrE3KA== + dependencies: + "@opentelemetry/api" "1.1.0" + "@opentelemetry/auto-instrumentations-node" "0.27.0" + "@opentelemetry/exporter-jaeger" "1.1.1" + "@opentelemetry/instrumentation" "0.27.0" + "@opentelemetry/propagator-jaeger" "1.1.1" + "@opentelemetry/resources" "1.1.1" + "@opentelemetry/sdk-trace-base" "1.1.1" + "@opentelemetry/sdk-trace-node" "1.1.1" + "@opentelemetry/semantic-conventions" "1.1.1" + yn "^4.0.0" + +"@bpinternal/yargs-extra@^0.0.2": + version "0.0.2" + resolved "https://registry.yarnpkg.com/@bpinternal/yargs-extra/-/yargs-extra-0.0.2.tgz#6307dd8ee3be593eef81f73c6aefeef0b9039cc9" + integrity sha512-6LycMPwXftFryX2omQt0Fm+fL8cV5dKt5pIg6rIcAKRz4rDqiN9Fbu5uq2Pnm01yHfof755RYlxGGpUiKloMDA== + dependencies: + "@types/yargs" "^17.0.10" + decamelize "5.0.1" + json-schema "^0.4.0" + lodash "^4.17.21" + yargs "^17.5.1" + yn "^4.0.0" + "@cnakazawa/watch@^1.0.3": version "1.0.4" resolved "https://registry.yarnpkg.com/@cnakazawa/watch/-/watch-1.0.4.tgz#f864ae85004d0fcab6f50be9141c4da368d1656a" @@ -1196,6 +1254,333 @@ "@nodelib/fs.scandir" "2.1.3" fastq "^1.6.0" +"@opentelemetry/api-metrics@0.27.0": + version "0.27.0" + resolved "https://registry.yarnpkg.com/@opentelemetry/api-metrics/-/api-metrics-0.27.0.tgz#d8eca344ed1155f3ea8a8133ade827b4bb90efbf" + integrity sha512-tB79288bwjkdhPNpw4UdOEy3bacVwtol6Que7cAu8KEJ9ULjRfSiwpYEwJY/oER3xZ7zNFz0uiJ7N1jSiotpVA== + +"@opentelemetry/api@1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@opentelemetry/api/-/api-1.1.0.tgz#563539048255bbe1a5f4f586a4a10a1bb737f44a" + integrity sha512-hf+3bwuBwtXsugA2ULBc95qxrOqP2pOekLz34BJhcAKawt94vfeNyUKpYc0lZQ/3sCP6LqRa7UAdHA7i5UODzQ== + +"@opentelemetry/auto-instrumentations-node@0.27.0": + version "0.27.0" + resolved "https://registry.yarnpkg.com/@opentelemetry/auto-instrumentations-node/-/auto-instrumentations-node-0.27.0.tgz#c2b56a1d487d9ec0a10217862a95a40cb2355ea6" + integrity sha512-DlY6f00wTM6H6THcGvI/jaM9qIGiA11VPIGgvpU1SKJgQLauKD6f6plvtRQWDpqGsXYSCQdwExE9XlJIkovTkQ== + dependencies: + "@opentelemetry/instrumentation" "^0.27.0" + "@opentelemetry/instrumentation-dns" "^0.27.0" + "@opentelemetry/instrumentation-express" "^0.27.0" + "@opentelemetry/instrumentation-graphql" "^0.27.0" + "@opentelemetry/instrumentation-grpc" "^0.27.0" + "@opentelemetry/instrumentation-http" "^0.27.0" + "@opentelemetry/instrumentation-ioredis" "^0.27.0" + "@opentelemetry/instrumentation-koa" "^0.28.0" + "@opentelemetry/instrumentation-mongodb" "^0.27.0" + "@opentelemetry/instrumentation-mysql" "^0.27.0" + "@opentelemetry/instrumentation-pg" "^0.27.0" + "@opentelemetry/instrumentation-redis" "^0.27.0" + +"@opentelemetry/context-async-hooks@1.1.1": + version "1.1.1" + resolved "https://registry.yarnpkg.com/@opentelemetry/context-async-hooks/-/context-async-hooks-1.1.1.tgz#0ee32d05800f5479154259acbb5700134aa4d3fb" + integrity sha512-17wlKOwcWzo1Eo2T1OJqWTnrUZ6vTdmHs9XhcqChvyx6N8DRIP096qQxfebk/zDzVgvjryv+K2pYjONPH404hQ== + +"@opentelemetry/core@1.0.1", "@opentelemetry/core@^1.0.0": + version "1.0.1" + resolved "https://registry.yarnpkg.com/@opentelemetry/core/-/core-1.0.1.tgz#5e08cef21946fdea7952f544e8f667f6d2a0ded8" + integrity sha512-90nQ2X6b/8X+xjcLDBYKooAcOsIlwLRYm+1VsxcX5cHl6V4CSVmDpBreQSDH/A21SqROzapk6813008SatmPpQ== + dependencies: + "@opentelemetry/semantic-conventions" "1.0.1" + +"@opentelemetry/core@1.1.1": + version "1.1.1" + resolved "https://registry.yarnpkg.com/@opentelemetry/core/-/core-1.1.1.tgz#3d52eeb8ceeee3ca1870319b3e6e609fb1e9eadf" + integrity sha512-rNYVBLzO+gXeYmNVcm4NfKw9x+nTy08Qp8SMpkmM5cqfdEwEtKw83vpSrFKzafy2aOIpmUkKGpi2k/m5kiDP9w== + dependencies: + "@opentelemetry/semantic-conventions" "1.1.1" + +"@opentelemetry/exporter-jaeger@1.1.1": + version "1.1.1" + resolved "https://registry.yarnpkg.com/@opentelemetry/exporter-jaeger/-/exporter-jaeger-1.1.1.tgz#a974f6b9a41646f2483d88118167de2bb8deff8a" + integrity sha512-N6p/naMZf1scc0FbbZblVENb/f/4EV4xCqO5vHqqe/oPR4e0rPZ5ha10JQFo8aZAUMX3nIyESqhA1CNA0waLOA== + dependencies: + "@opentelemetry/core" "1.1.1" + "@opentelemetry/sdk-trace-base" "1.1.1" + "@opentelemetry/semantic-conventions" "1.1.1" + jaeger-client "^3.15.0" + +"@opentelemetry/instrumentation-dns@^0.27.0": + version "0.27.1" + resolved "https://registry.yarnpkg.com/@opentelemetry/instrumentation-dns/-/instrumentation-dns-0.27.1.tgz#5b341f18d959f15cb88095070a2c931d57d3652a" + integrity sha512-NwLMHthT7/YncTZcfYbb24n8z/mFYSWPlZkWOhG6XVRAE0KsDLlMsrwvha5Yg/4vhv4nj2qLM9bk5+zA5a/ZfA== + dependencies: + "@opentelemetry/instrumentation" "^0.27.0" + "@opentelemetry/semantic-conventions" "^1.0.0" + semver "^7.3.2" + +"@opentelemetry/instrumentation-express@0.27.0": + version "0.27.0" + resolved "https://registry.yarnpkg.com/@opentelemetry/instrumentation-express/-/instrumentation-express-0.27.0.tgz#37968ea21cb6094ba62452037cd5ea4b6f788b2f" + integrity sha512-8C7jGqrhTmAP2lZNzH7SSxSClij3wlYTB7t71/wsFooV01yo/cH2Go0kDo13kPmcGhoR07iMZDabeHWwdxre6w== + dependencies: + "@opentelemetry/core" "^1.0.0" + "@opentelemetry/instrumentation" "^0.27.0" + "@opentelemetry/semantic-conventions" "^1.0.0" + "@types/express" "4.17.13" + +"@opentelemetry/instrumentation-express@^0.27.0": + version "0.27.1" + resolved "https://registry.yarnpkg.com/@opentelemetry/instrumentation-express/-/instrumentation-express-0.27.1.tgz#5ca70e85825c05ac595baacd4a37e08f9bcb03b7" + integrity sha512-EbpmTyem70Rg56+A0w8ndSmeyVYj7gFV9yk2IGgVVc9n/hs2mTgsBI2evaUtWbpB51pb1u/h+DhICKxgg/8Hew== + dependencies: + "@opentelemetry/core" "^1.0.0" + "@opentelemetry/instrumentation" "^0.27.0" + "@opentelemetry/semantic-conventions" "^1.0.0" + "@types/express" "4.17.13" + +"@opentelemetry/instrumentation-graphql@^0.27.0": + version "0.27.4" + resolved "https://registry.yarnpkg.com/@opentelemetry/instrumentation-graphql/-/instrumentation-graphql-0.27.4.tgz#bc76a07ff3a88a170c56b3c448de6e2924ea3f13" + integrity sha512-fgb0mmS/XOPIlfe8b3heibeLfTrGIwsr2HOR4srf8jdCYA/2KbbGioWHoZDOTsu7n54Yj9MtMXDxa+OouJqcEg== + dependencies: + "@opentelemetry/instrumentation" "^0.27.0" + graphql "^15.5.1" + +"@opentelemetry/instrumentation-grpc@^0.27.0": + version "0.27.0" + resolved "https://registry.yarnpkg.com/@opentelemetry/instrumentation-grpc/-/instrumentation-grpc-0.27.0.tgz#198af374de5a8a995f87fe97c9ae8c205d0448b2" + integrity sha512-aFHcAeeLfqoH8PMjmdqEwZwXDJtFSkWmGDBZeH2yrx3KzFMVBB/UJEr1n/ZC6AqfqahL/qqB1N8EnoCoOcs5ig== + dependencies: + "@opentelemetry/api-metrics" "0.27.0" + "@opentelemetry/instrumentation" "0.27.0" + "@opentelemetry/semantic-conventions" "1.0.1" + +"@opentelemetry/instrumentation-http@0.27.0", "@opentelemetry/instrumentation-http@^0.27.0": + version "0.27.0" + resolved "https://registry.yarnpkg.com/@opentelemetry/instrumentation-http/-/instrumentation-http-0.27.0.tgz#4abeeeae69f2fb959343b8a96f75d8d8c7679689" + integrity sha512-Q1dxUt+5d70rbY6jJAC8nwpIQJontmJW94eIS5CsGngvCRYw6tgjLZp2fpVL1o7Lj7uiLpGigeE4EN5Lr2YDFA== + dependencies: + "@opentelemetry/core" "1.0.1" + "@opentelemetry/instrumentation" "0.27.0" + "@opentelemetry/semantic-conventions" "1.0.1" + semver "^7.3.5" + +"@opentelemetry/instrumentation-ioredis@^0.27.0": + version "0.27.1" + resolved "https://registry.yarnpkg.com/@opentelemetry/instrumentation-ioredis/-/instrumentation-ioredis-0.27.1.tgz#dad31a7a0a92e88dacf6453921fa273f6546a4fd" + integrity sha512-379UrP7dfWOtW0zcmr1uE2sSBSPkrb4P7mfMnBNasGjrttedLc7EN/yRlLiTHtzPFn7pX3WPFHT0QMsrQdC3YQ== + dependencies: + "@opentelemetry/instrumentation" "^0.27.0" + "@opentelemetry/semantic-conventions" "^1.0.0" + "@types/ioredis" "4.26.6" + +"@opentelemetry/instrumentation-knex@0.27.0": + version "0.27.0" + resolved "https://registry.yarnpkg.com/@opentelemetry/instrumentation-knex/-/instrumentation-knex-0.27.0.tgz#5e05f501299b7236608f569710d057f5fbe86a9c" + integrity sha512-VkhJBHk90pKAOaBgV0/PpZ52mPon56RbTvhDyYmq/UHV3pl180srHGMgOvs354eRaTgKoGpcCj+w11jlC5NpRA== + dependencies: + "@opentelemetry/instrumentation" "^0.27.0" + "@opentelemetry/semantic-conventions" "^1.0.0" + +"@opentelemetry/instrumentation-koa@^0.28.0": + version "0.28.1" + resolved "https://registry.yarnpkg.com/@opentelemetry/instrumentation-koa/-/instrumentation-koa-0.28.1.tgz#a73fec2723c67ad139f912b4052334f2a84661f1" + integrity sha512-q3vFaOooVJzzHsmML/4noWqyD81jgGWBK7Dt1sYkQISEKfKvAWpmLEOQbfNSccziq+BNHnwd/2WJSmQsj8m7Pw== + dependencies: + "@opentelemetry/core" "^1.0.0" + "@opentelemetry/instrumentation" "^0.27.0" + "@opentelemetry/semantic-conventions" "^1.0.0" + "@types/koa" "2.13.4" + "@types/koa__router" "8.0.7" + +"@opentelemetry/instrumentation-mongodb@^0.27.0": + version "0.27.0" + resolved "https://registry.yarnpkg.com/@opentelemetry/instrumentation-mongodb/-/instrumentation-mongodb-0.27.0.tgz#5043be7213c9d4f3baf925ac873de960cea90242" + integrity sha512-Ae9bNTHg+rt7kx3o4j0sizXZVx4S82yIahsmZ2cDqV3BE2RV8+My/+CUx4jCbSa0c8VGyK4Loyyn6IINVs3Yxg== + dependencies: + "@opentelemetry/instrumentation" "^0.27.0" + "@opentelemetry/semantic-conventions" "^1.0.0" + "@types/mongodb" "3.6.20" + +"@opentelemetry/instrumentation-mysql@^0.27.0": + version "0.27.1" + resolved "https://registry.yarnpkg.com/@opentelemetry/instrumentation-mysql/-/instrumentation-mysql-0.27.1.tgz#004cdd0f0cfdb3bfb114e4c8af321a277b630dab" + integrity sha512-JxRgJKTL3DtBFGzwwu/n2iVedDbToCbf12554JOUd6d4NRIJo80wEf710RuMNmJvmDPNukD5gVIEQdkGIpXPKA== + dependencies: + "@opentelemetry/instrumentation" "^0.27.0" + "@opentelemetry/semantic-conventions" "^1.0.0" + "@types/mysql" "2.15.19" + +"@opentelemetry/instrumentation-pg@0.27.0", "@opentelemetry/instrumentation-pg@^0.27.0": + version "0.27.0" + resolved "https://registry.yarnpkg.com/@opentelemetry/instrumentation-pg/-/instrumentation-pg-0.27.0.tgz#196c68a3b95dcb34d962c5484e17d329b8c0f3a2" + integrity sha512-8G3YwQ/9K1B2IfYAipvTHyTqN79pz4xtNdi2HvvPnspBsrUeF71LqA3S04z1AeU81QhEOgX6D2+FZKdx8N/KTg== + dependencies: + "@opentelemetry/instrumentation" "^0.27.0" + "@opentelemetry/semantic-conventions" "^1.0.0" + "@types/pg" "8.6.1" + "@types/pg-pool" "2.0.3" + +"@opentelemetry/instrumentation-redis@^0.27.0": + version "0.27.0" + resolved "https://registry.yarnpkg.com/@opentelemetry/instrumentation-redis/-/instrumentation-redis-0.27.0.tgz#f5d312a1f15912d1671851ba96a8ab43fc25612b" + integrity sha512-A54NWDuqnTk0XImM64eDhNuvn139scUBxPbkea+Y5QqLKac83XGpVsGI2RCSN4dR2KLurdDI2B3qBVkJ5mxAzA== + dependencies: + "@opentelemetry/instrumentation" "^0.27.0" + "@opentelemetry/semantic-conventions" "^1.0.0" + "@types/redis" "2.8.31" + +"@opentelemetry/instrumentation@0.27.0", "@opentelemetry/instrumentation@^0.27.0": + version "0.27.0" + resolved "https://registry.yarnpkg.com/@opentelemetry/instrumentation/-/instrumentation-0.27.0.tgz#8e9864aed24e5b265008ffb14466bdc27ce31897" + integrity sha512-dUwY/VoDptdK8AYigwS3IKblG+unV5xIdV4VQKy+nX5aT3f7vd5PMYs4arCQSYLbLRe0s7GxK6S9dtjai/TsHQ== + dependencies: + "@opentelemetry/api-metrics" "0.27.0" + require-in-the-middle "^5.0.3" + semver "^7.3.2" + shimmer "^1.2.1" + +"@opentelemetry/propagator-b3@1.1.1": + version "1.1.1" + resolved "https://registry.yarnpkg.com/@opentelemetry/propagator-b3/-/propagator-b3-1.1.1.tgz#17d61f3f83dd6e0b86708606cd0a3c94eeb7f018" + integrity sha512-FzrImysl3cVrPUm9mTTCN4Z/A6lYEyuKe6cE/SV9Avek6EKY8Ibgxqsg76T0KN27gm/i3YEbd/NL/+HZit0Wgw== + dependencies: + "@opentelemetry/core" "1.1.1" + +"@opentelemetry/propagator-jaeger@1.1.1": + version "1.1.1" + resolved "https://registry.yarnpkg.com/@opentelemetry/propagator-jaeger/-/propagator-jaeger-1.1.1.tgz#65c49ee98897b601abbb1a5501b1ae7339192ab1" + integrity sha512-l1uuJN4phlsZgqGJLEJRo+QDnXizIwV9oC1N2+8KWpA+cKbAG0Wa4+JGjgio8vnF0kccJDQ02CG7cBbkcleBgA== + dependencies: + "@opentelemetry/core" "1.1.1" + +"@opentelemetry/resources@1.1.1": + version "1.1.1" + resolved "https://registry.yarnpkg.com/@opentelemetry/resources/-/resources-1.1.1.tgz#3abe9957e9e670164aaac9051768d95f4538fe8a" + integrity sha512-w0X65ufTaRevIumjylWzYhRquRNoM5T6e0ARNcE0o2YkYPkAxTr3PYkcXG8hUdWRAglqliZKG4IlMv03Q0wOXA== + dependencies: + "@opentelemetry/core" "1.1.1" + "@opentelemetry/semantic-conventions" "1.1.1" + +"@opentelemetry/sdk-trace-base@1.1.1": + version "1.1.1" + resolved "https://registry.yarnpkg.com/@opentelemetry/sdk-trace-base/-/sdk-trace-base-1.1.1.tgz#2277e44a8b90815bb3c23515cae9de57ce902595" + integrity sha512-nj5kFly/d6V2UXZNi3jCaRBw44/7Z91xH0PcemXJTO3B6gyMx8zIHXdnECxrTVR1pglDWYCGs84uXPavu5SULw== + dependencies: + "@opentelemetry/core" "1.1.1" + "@opentelemetry/resources" "1.1.1" + "@opentelemetry/semantic-conventions" "1.1.1" + +"@opentelemetry/sdk-trace-node@1.1.1": + version "1.1.1" + resolved "https://registry.yarnpkg.com/@opentelemetry/sdk-trace-node/-/sdk-trace-node-1.1.1.tgz#02093c1fcf579ad03e7d19d5bf3906b744b5e64c" + integrity sha512-wb/BONBrBs/EDN24AqB1KAAygVUiD8WdufaprLdv1LGTNat2ETCVVX+jKoi3K8W6y1KVLeEM5GjBV3Ww0E40nA== + dependencies: + "@opentelemetry/context-async-hooks" "1.1.1" + "@opentelemetry/core" "1.1.1" + "@opentelemetry/propagator-b3" "1.1.1" + "@opentelemetry/propagator-jaeger" "1.1.1" + "@opentelemetry/sdk-trace-base" "1.1.1" + semver "^7.3.5" + +"@opentelemetry/semantic-conventions@1.0.1", "@opentelemetry/semantic-conventions@^1.0.0": + version "1.0.1" + resolved "https://registry.yarnpkg.com/@opentelemetry/semantic-conventions/-/semantic-conventions-1.0.1.tgz#9349c3860a53468fa2108b5df09aa843f22dbf94" + integrity sha512-7XU1sfQ8uCVcXLxtAHA8r3qaLJ2oq7sKtEwzZhzuEXqYmjW+n+J4yM3kNo0HQo3Xp1eUe47UM6Wy6yuAvIyllg== + +"@opentelemetry/semantic-conventions@1.1.1": + version "1.1.1" + resolved "https://registry.yarnpkg.com/@opentelemetry/semantic-conventions/-/semantic-conventions-1.1.1.tgz#8f09355039cf03ecd7a16b4e309c4a47cb8a8d30" + integrity sha512-GdTwDHSaZ6iP5LUdvS/SLUjn3067xn1HcBsLZCh8YOsf22d/YWTBcnFl3buieBP4KiajwHLho4I8HSMDKACBSg== + +"@promster/express@5.0.3": + version "5.0.3" + resolved "https://registry.yarnpkg.com/@promster/express/-/express-5.0.3.tgz#754acd2ab909002d81cab88d350da1f2ab9e9e3f" + integrity sha512-Em4K7LM5wKSq14ZqDRvQscp+3cPJxRnoSa0wMKZL+emSuDhFhyxysfB4muUoEimgiXgv+uVy4wFGNzuReFVF6w== + dependencies: + "@promster/metrics" "^6.0.2" + merge-options "3.0.4" + tslib "2.2.0" + +"@promster/metrics@6.0.2", "@promster/metrics@^6.0.2": + version "6.0.2" + resolved "https://registry.yarnpkg.com/@promster/metrics/-/metrics-6.0.2.tgz#2609881242ee14247bd79ff4c29671d4c96d06bb" + integrity sha512-VtuSn/OeI0W3rYwRYrg0IlSHxujGZaZTBg+u2dbCw/grazoYiBpLJLZkZjNHlzeWuioN8u3otcGrWfJe2bFQAA== + dependencies: + lodash.memoize "4.1.2" + lodash.once "4.1.1" + merge-options "3.0.4" + optional "0.1.4" + ts-essentials "7.0.1" + tslib "2.2.0" + url "0.11.0" + url-value-parser "2.0.3" + optionalDependencies: + gc-stats "1.4.0" + +"@promster/server@6.0.2": + version "6.0.2" + resolved "https://registry.yarnpkg.com/@promster/server/-/server-6.0.2.tgz#97d4a032360e244fface27ffd815db0e115bdb40" + integrity sha512-HLsVQinSToJS3PFCy19cv0x7DQNfK/3P5U5As+mXg8FGD6jpYQUvX/GW9HFlmVDGD/zm9pK0ykFgR9FLT5mLNQ== + dependencies: + "@promster/metrics" "^6.0.2" + tslib "2.2.0" + +"@protobufjs/aspromise@^1.1.1", "@protobufjs/aspromise@^1.1.2": + version "1.1.2" + resolved "https://registry.yarnpkg.com/@protobufjs/aspromise/-/aspromise-1.1.2.tgz#9b8b0cc663d669a7d8f6f5d0893a14d348f30fbf" + integrity sha1-m4sMxmPWaafY9vXQiToU00jzD78= + +"@protobufjs/base64@^1.1.2": + version "1.1.2" + resolved "https://registry.yarnpkg.com/@protobufjs/base64/-/base64-1.1.2.tgz#4c85730e59b9a1f1f349047dbf24296034bb2735" + integrity sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg== + +"@protobufjs/codegen@^2.0.4": + version "2.0.4" + resolved "https://registry.yarnpkg.com/@protobufjs/codegen/-/codegen-2.0.4.tgz#7ef37f0d010fb028ad1ad59722e506d9262815cb" + integrity sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg== + +"@protobufjs/eventemitter@^1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz#355cbc98bafad5978f9ed095f397621f1d066b70" + integrity sha1-NVy8mLr61ZePntCV85diHx0Ga3A= + +"@protobufjs/fetch@^1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@protobufjs/fetch/-/fetch-1.1.0.tgz#ba99fb598614af65700c1619ff06d454b0d84c45" + integrity sha1-upn7WYYUr2VwDBYZ/wbUVLDYTEU= + dependencies: + "@protobufjs/aspromise" "^1.1.1" + "@protobufjs/inquire" "^1.1.0" + +"@protobufjs/float@^1.0.2": + version "1.0.2" + resolved "https://registry.yarnpkg.com/@protobufjs/float/-/float-1.0.2.tgz#5e9e1abdcb73fc0a7cb8b291df78c8cbd97b87d1" + integrity sha1-Xp4avctz/Ap8uLKR33jIy9l7h9E= + +"@protobufjs/inquire@^1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@protobufjs/inquire/-/inquire-1.1.0.tgz#ff200e3e7cf2429e2dcafc1140828e8cc638f089" + integrity sha1-/yAOPnzyQp4tyvwRQIKOjMY48Ik= + +"@protobufjs/path@^1.1.2": + version "1.1.2" + resolved "https://registry.yarnpkg.com/@protobufjs/path/-/path-1.1.2.tgz#6cc2b20c5c9ad6ad0dccfd21ca7673d8d7fbf68d" + integrity sha1-bMKyDFya1q0NzP0hynZz2Nf79o0= + +"@protobufjs/pool@^1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@protobufjs/pool/-/pool-1.1.0.tgz#09fd15f2d6d3abfa9b65bc366506d6ad7846ff54" + integrity sha1-Cf0V8tbTq/qbZbw2ZQbWrXhG/1Q= + +"@protobufjs/utf8@^1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@protobufjs/utf8/-/utf8-1.1.0.tgz#a777360b5b39a1a2e5106f8e858f2fd2d060c570" + integrity sha1-p3c2C1s5oaLlEG+OhY8v0tBgxXA= + "@sentry/core@6.9.0": version "6.9.0" resolved "https://registry.yarnpkg.com/@sentry/core/-/core-6.9.0.tgz#43b25290f3b1eb2c23f665e1c0fcbadd06d06012" @@ -1301,6 +1686,13 @@ resolved "https://registry.yarnpkg.com/@tsconfig/node16/-/node16-1.0.2.tgz#423c77877d0569db20e1fc80885ac4118314010e" integrity sha512-eZxlbI8GZscaGS7kkc/trHTT5xgrjH3/1n2JDwusC9iahPKWMRvRjJSAN5mCXviuTGQ/lHnhvv8Q1YTpnfz9gA== +"@types/accepts@*": + version "1.3.5" + resolved "https://registry.yarnpkg.com/@types/accepts/-/accepts-1.3.5.tgz#c34bec115cfc746e04fe5a059df4ce7e7b391575" + integrity sha512-jOdnI/3qTpHABjM5cx1Hc0sKsPoYCp+DP/GJRGtDlPd7fiV9oXGGIcjW/ZOxLIvjGz8MA+uMZI9metHlgqbgwQ== + dependencies: + "@types/node" "*" + "@types/babel__core@^7.1.0": version "7.1.14" resolved "https://registry.yarnpkg.com/@types/babel__core/-/babel__core-7.1.14.tgz#faaeefc4185ec71c389f4501ee5ec84b170cc402" @@ -1359,11 +1751,23 @@ "@types/connect" "*" "@types/node" "*" +"@types/bson@*": + version "4.2.0" + resolved "https://registry.yarnpkg.com/@types/bson/-/bson-4.2.0.tgz#a2f71e933ff54b2c3bf267b67fa221e295a33337" + integrity sha512-ELCPqAdroMdcuxqwMgUpifQyRoTpyYCNr1V9xKyF40VsBobsj+BbWNRvwGchMgBPGqkw655ypkjj2MEF5ywVwg== + dependencies: + bson "*" + "@types/bytes@^3.1.0": version "3.1.0" resolved "https://registry.yarnpkg.com/@types/bytes/-/bytes-3.1.0.tgz#835a3e4aea3b4d7604aca216a78de372bff3ecc3" integrity sha512-5YG1AiIC8HPPXRvYAIa7ehK3YMAwd0DWiPCtpuL9sgKceWLyWsVtLRA+lT4NkoanDNF9slwQ66lPizWDpgRlWA== +"@types/chai@4.3.0": + version "4.3.0" + resolved "https://registry.yarnpkg.com/@types/chai/-/chai-4.3.0.tgz#23509ebc1fa32f1b4d50d6a66c4032d5b8eaabdc" + integrity sha512-/ceqdqeRraGolFTcfoXNiqjyQhZzbINDngeoAq9GoHa8PPK1yNzTaxWjA6BFWp5Ua9JpXEMSS4s5i9tS0hOJtw== + "@types/cli-progress@^3.8.0": version "3.9.1" resolved "https://registry.yarnpkg.com/@types/cli-progress/-/cli-progress-3.9.1.tgz#285e7fbdad6e7baf072d163ae1c3b23b7b219130" @@ -1383,16 +1787,43 @@ dependencies: "@types/node" "*" +"@types/content-disposition@*": + version "0.5.5" + resolved "https://registry.yarnpkg.com/@types/content-disposition/-/content-disposition-0.5.5.tgz#650820e95de346e1f84e30667d168c8fd25aa6e3" + integrity sha512-v6LCdKfK6BwcqMo+wYW05rLS12S0ZO0Fl4w1h4aaZMD7bqT3gVUns6FvLJKGZHQmYn3SX55JWGpziwJRwVgutA== + "@types/cookiejar@*": version "2.1.2" resolved "https://registry.yarnpkg.com/@types/cookiejar/-/cookiejar-2.1.2.tgz#66ad9331f63fe8a3d3d9d8c6e3906dd10f6446e8" integrity sha512-t73xJJrvdTjXrn4jLS9VSGRbz0nUY3cl2DMGDU48lKl+HR9dbbjW2A9r3g40VA++mQpy6uuHg33gy7du2BKpog== +"@types/cookies@*": + version "0.7.7" + resolved "https://registry.yarnpkg.com/@types/cookies/-/cookies-0.7.7.tgz#7a92453d1d16389c05a5301eef566f34946cfd81" + integrity sha512-h7BcvPUogWbKCzBR2lY4oqaZbO3jXZksexYJVFvkrFeLgbZjQkU4x8pRq6eg2MHXQhY0McQdqmmsxRWlVAHooA== + dependencies: + "@types/connect" "*" + "@types/express" "*" + "@types/keygrip" "*" + "@types/node" "*" + +"@types/cors@^2.8.12": + version "2.8.12" + resolved "https://registry.yarnpkg.com/@types/cors/-/cors-2.8.12.tgz#6b2c510a7ad7039e98e7b8d3d6598f4359e5c080" + integrity sha512-vt+kDhq/M2ayberEtJcIN/hxXy1Pk+59g2FV/ZQceeaTyCtCucjL2Q7FXlFjtWn4n15KCr1NE2lNNFhp0lEThw== + "@types/diff@^5.0.0": version "5.0.0" resolved "https://registry.yarnpkg.com/@types/diff/-/diff-5.0.0.tgz#eb71e94feae62548282c4889308a3dfb57e36020" integrity sha512-jrm2K65CokCCX4NmowtA+MfXyuprZC13jbRuwprs6/04z/EcFg/MCwYdsHn+zgV4CQBiATiI7AEq7y1sZCtWKA== +"@types/express-rate-limit@^5.1.3": + version "5.1.3" + resolved "https://registry.yarnpkg.com/@types/express-rate-limit/-/express-rate-limit-5.1.3.tgz#79f2ca40d90455a5798da6f8e06d8a3d35f4a1d6" + integrity sha512-H+TYy3K53uPU2TqPGFYaiWc2xJV6+bIFkDd/Ma2/h67Pa6ARk9kWE0p/K9OH1Okm0et9Sfm66fmXoAxsH2PHXg== + dependencies: + "@types/express" "*" + "@types/express-serve-static-core@*": version "4.16.2" resolved "https://registry.yarnpkg.com/@types/express-serve-static-core/-/express-serve-static-core-4.16.2.tgz#5ee8a22e602005be6767df6b2cba9879df3f75aa" @@ -1410,7 +1841,7 @@ "@types/qs" "*" "@types/range-parser" "*" -"@types/express@^4.16.0", "@types/express@^4.17.13": +"@types/express@*", "@types/express@4.17.13", "@types/express@^4.16.0", "@types/express@^4.17.13": version "4.17.13" resolved "https://registry.yarnpkg.com/@types/express/-/express-4.17.13.tgz#a76e2995728999bab51a33fabce1d705a3709034" integrity sha512-6bSZTPaTIACxn48l50SR+axgrqm6qXFIxrdAKaG6PaJk3+zuUr35hBlgT7vOmJcum+OEaIBLtHV/qloEAFITeA== @@ -1447,6 +1878,23 @@ "@types/minimatch" "*" "@types/node" "*" +"@types/http-assert@*": + version "1.5.3" + resolved "https://registry.yarnpkg.com/@types/http-assert/-/http-assert-1.5.3.tgz#ef8e3d1a8d46c387f04ab0f2e8ab8cb0c5078661" + integrity sha512-FyAOrDuQmBi8/or3ns4rwPno7/9tJTijVW6aQQjK02+kOQ8zmoNg2XJtAuQhvQcy1ASJq38wirX5//9J1EqoUA== + +"@types/http-errors@*": + version "1.8.2" + resolved "https://registry.yarnpkg.com/@types/http-errors/-/http-errors-1.8.2.tgz#7315b4c4c54f82d13fa61c228ec5c2ea5cc9e0e1" + integrity sha512-EqX+YQxINb+MeXaIqYDASb6U6FCHbWjkj4a1CKDBks3d/QiB2+PqBLyO72vLDgAO1wUI4O+9gweRcQK11bTL/w== + +"@types/ioredis@4.26.6": + version "4.26.6" + resolved "https://registry.yarnpkg.com/@types/ioredis/-/ioredis-4.26.6.tgz#7e332d6d24f12d79a1099834ccfa0c169ef667ed" + integrity sha512-Q9ydXL/5Mot751i7WLCm9OGTj5jlW3XBdkdEW21SkXZ8Y03srbkluFGbM3q8c+vzPW30JOLJ+NsZWHoly0+13A== + dependencies: + "@types/node" "*" + "@types/istanbul-lib-coverage@*", "@types/istanbul-lib-coverage@^2.0.0": version "2.0.3" resolved "https://registry.yarnpkg.com/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.3.tgz#4ba8ddb720221f432e443bd5f9117fd22cfd4762" @@ -1506,6 +1954,11 @@ resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.7.tgz#98a993516c859eb0d5c4c8f098317a9ea68db9ad" integrity sha512-cxWFQVseBm6O9Gbw1IWb8r6OS4OhSt3hPZLkFApLjM8TEXROBuQGLAH2i2gZpcXdLBIrpXuTDhH7Vbm1iXmNGA== +"@types/json-schema@^7.0.9": + version "7.0.9" + resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.9.tgz#97edc9037ea0c38585320b28964dde3b39e4660d" + integrity sha512-qcUXuemtEu+E5wZSJHNxUXeCZhAfXKQ41D+duX+VYPde7xyEVZci+/oXKJL13tnRs9lR2pr4fod59GT6/X1/yQ== + "@types/json5@^0.0.29": version "0.0.29" resolved "https://registry.yarnpkg.com/@types/json5/-/json5-0.0.29.tgz#ee28707ae94e11d2b827bcbe5270bcea7f3e71ee" @@ -1516,6 +1969,53 @@ resolved "https://registry.yarnpkg.com/@types/jsonpack/-/jsonpack-1.1.1.tgz#d5d3e057c409db0d411dfa65183e0785806fd14b" integrity sha512-phk55nFPFXddwxExpnshV2a40Gre+YbhDtAMBc0twoYVDqOOems5cZk1sOdWam+SAUuLbhBP39wuG0PkrAJjPg== +"@types/keygrip@*": + version "1.0.2" + resolved "https://registry.yarnpkg.com/@types/keygrip/-/keygrip-1.0.2.tgz#513abfd256d7ad0bf1ee1873606317b33b1b2a72" + integrity sha512-GJhpTepz2udxGexqos8wgaBx4I/zWIDPh/KOGEwAqtuGDkOUJu5eFvwmdBX4AmB8Odsr+9pHCQqiAqDL/yKMKw== + +"@types/koa-compose@*": + version "3.2.5" + resolved "https://registry.yarnpkg.com/@types/koa-compose/-/koa-compose-3.2.5.tgz#85eb2e80ac50be95f37ccf8c407c09bbe3468e9d" + integrity sha512-B8nG/OoE1ORZqCkBVsup/AKcvjdgoHnfi4pZMn5UwAPCbhk/96xyv284eBYW8JlQbQ7zDmnpFr68I/40mFoIBQ== + dependencies: + "@types/koa" "*" + +"@types/koa@*": + version "2.13.5" + resolved "https://registry.yarnpkg.com/@types/koa/-/koa-2.13.5.tgz#64b3ca4d54e08c0062e89ec666c9f45443b21a61" + integrity sha512-HSUOdzKz3by4fnqagwthW/1w/yJspTgppyyalPVbgZf8jQWvdIXcVW5h2DGtw4zYntOaeRGx49r1hxoPWrD4aA== + dependencies: + "@types/accepts" "*" + "@types/content-disposition" "*" + "@types/cookies" "*" + "@types/http-assert" "*" + "@types/http-errors" "*" + "@types/keygrip" "*" + "@types/koa-compose" "*" + "@types/node" "*" + +"@types/koa@2.13.4": + version "2.13.4" + resolved "https://registry.yarnpkg.com/@types/koa/-/koa-2.13.4.tgz#10620b3f24a8027ef5cbae88b393d1b31205726b" + integrity sha512-dfHYMfU+z/vKtQB7NUrthdAEiSvnLebvBjwHtfFmpZmB7em2N3WVQdHgnFq+xvyVgxW5jKDmjWfLD3lw4g4uTw== + dependencies: + "@types/accepts" "*" + "@types/content-disposition" "*" + "@types/cookies" "*" + "@types/http-assert" "*" + "@types/http-errors" "*" + "@types/keygrip" "*" + "@types/koa-compose" "*" + "@types/node" "*" + +"@types/koa__router@8.0.7": + version "8.0.7" + resolved "https://registry.yarnpkg.com/@types/koa__router/-/koa__router-8.0.7.tgz#663d69d5ddebff5aaca27c0594430b3ba6ea20be" + integrity sha512-OB3Ax75nmTP+WR9AgdzA42DI7YmBtiNKN2g1Wxl+d5Dyek9SWt740t+ukwXSmv/jMBCUPyV3YEI93vZHgdP7UQ== + dependencies: + "@types/koa" "*" + "@types/lodash@^4.14.116": version "4.14.123" resolved "https://registry.yarnpkg.com/@types/lodash/-/lodash-4.14.123.tgz#39be5d211478c8dd3bdae98ee75bb7efe4abfe4d" @@ -1526,6 +2026,11 @@ resolved "https://registry.yarnpkg.com/@types/lodash/-/lodash-4.14.168.tgz#fe24632e79b7ade3f132891afff86caa5e5ce008" integrity sha512-oVfRvqHV/V6D1yifJbVRU3TMp8OT6o6BG+U9MkwuJ3U8/CsDHvalRpsxBqivn71ztOFZBTfJMvETbqHiaNSj7Q== +"@types/long@^4.0.1": + version "4.0.1" + resolved "https://registry.yarnpkg.com/@types/long/-/long-4.0.1.tgz#459c65fa1867dafe6a8f322c4c51695663cc55e9" + integrity sha512-5tXH6Bx/kNGd3MgffdmP4dy2Z+G4eaXw0SE81Tq3BNadtnMR5/ySMzX4SLEzHJzSmPNn4HIdpQsBvXMUykr58w== + "@types/lru-cache@^5.1.0": version "5.1.0" resolved "https://registry.yarnpkg.com/@types/lru-cache/-/lru-cache-5.1.0.tgz#57f228f2b80c046b4a1bd5cac031f81f207f4f03" @@ -1560,11 +2065,26 @@ dependencies: "@types/node" "*" +"@types/mongodb@3.6.20": + version "3.6.20" + resolved "https://registry.yarnpkg.com/@types/mongodb/-/mongodb-3.6.20.tgz#b7c5c580644f6364002b649af1c06c3c0454e1d2" + integrity sha512-WcdpPJCakFzcWWD9juKoZbRtQxKIMYF/JIAM4JrNHrMcnJL6/a2NWjXxW7fo9hxboxxkg+icff8d7+WIEvKgYQ== + dependencies: + "@types/bson" "*" + "@types/node" "*" + "@types/ms@^0.7.30": version "0.7.30" resolved "https://registry.yarnpkg.com/@types/ms/-/ms-0.7.30.tgz#f6c38b7ecbbf698b0bbd138315a0f0f18954f85f" integrity sha512-OftRLCgAzJP7vmKn9by/GVjnf4hloz/pXNOwPo0vKGAfXI7GqWXJi9N2kRar4cP5s1dGwuwcagWqO6iHBTq1Mg== +"@types/mysql@2.15.19": + version "2.15.19" + resolved "https://registry.yarnpkg.com/@types/mysql/-/mysql-2.15.19.tgz#d158927bb7c1a78f77e56de861a3b15cae0e7aed" + integrity sha512-wSRg2QZv14CWcZXkgdvHbbV2ACufNy5EgI8mBBxnJIptchv7DBy/h53VMa2jDhyo0C9MO4iowE6z9vF8Ja1DkQ== + dependencies: + "@types/node" "*" + "@types/nanoid@^3.0.0": version "3.0.0" resolved "https://registry.yarnpkg.com/@types/nanoid/-/nanoid-3.0.0.tgz#c757b20f343f3a1dd76e80a9a431b6290fc20f35" @@ -1577,10 +2097,15 @@ resolved "https://registry.yarnpkg.com/@types/node/-/node-11.12.0.tgz#ec5594728811dc2797e42396cfcdf786f2052c12" integrity sha512-Lg00egj78gM+4aE0Erw05cuDbvX9sLJbaaPwwRtdCdAMnIudqrQZ0oZX98Ek0yiSK/A2nubHgJfvII/rTT2Dwg== -"@types/node@^12.13.0": - version "12.20.13" - resolved "https://registry.yarnpkg.com/@types/node/-/node-12.20.13.tgz#e743bae112bd779ac9650f907197dd2caa7f0364" - integrity sha512-1x8W5OpxPq+T85OUsHRP6BqXeosKmeXRtjoF39STcdf/UWLqUsoehstZKOi0CunhVqHG17AyZgpj20eRVooK6A== +"@types/node@>=13.7.0": + version "17.0.14" + resolved "https://registry.yarnpkg.com/@types/node/-/node-17.0.14.tgz#33b9b94f789a8fedd30a68efdbca4dbb06b61f20" + integrity sha512-SbjLmERksKOGzWzPNuW7fJM7fk3YXVTFiZWB/Hs99gwhk+/dnrQRPBQjPW9aO+fi1tAffi9PrwFvsmOKmDTyng== + +"@types/node@^16.11.10": + version "16.11.10" + resolved "https://registry.yarnpkg.com/@types/node/-/node-16.11.10.tgz#2e3ad0a680d96367103d3e670d41c2fed3da61ae" + integrity sha512-3aRnHa1KlOEEhJ6+CvyHKK5vE9BcLGjtUpwvqYLRvYNQKMfabu3BwfJaA/SLW8dxe28LsNDjtHwePTuzn3gmOA== "@types/normalize-package-data@^2.4.0": version "2.4.0" @@ -1592,7 +2117,21 @@ resolved "https://registry.yarnpkg.com/@types/numeric/-/numeric-1.2.1.tgz#6bce5d0c4f1b20f2cbd4a3d47922b8fe6e36ad56" integrity sha512-30gQPisgZW5+ErkDVTZkoVKmwIWdjf2O6HmgKr3E1FJBdMYFldOPSJlQYP2VMafHuhOKvbLFA4Hf+ohvArz1+w== -"@types/pg@^8.6.1": +"@types/on-headers@^1.0.0": + version "1.0.0" + resolved "https://registry.yarnpkg.com/@types/on-headers/-/on-headers-1.0.0.tgz#12e80879ff22257036d3f18310c39de1881d141e" + integrity sha512-m5LRgPZzWPHGoEQDbuVuduRMctCW5tDarxi9f7+rYZoJDy7nMHMMrODtqbwPpNjYc6Ilg4vL2NeYAwimJfku3w== + dependencies: + "@types/node" "*" + +"@types/pg-pool@2.0.3": + version "2.0.3" + resolved "https://registry.yarnpkg.com/@types/pg-pool/-/pg-pool-2.0.3.tgz#3eb8df2933f617f219a53091ad4080c94ba1c959" + integrity sha512-fwK5WtG42Yb5RxAwxm3Cc2dJ39FlgcaNiXKvtTLAwtCn642X7dgel+w1+cLWwpSOFImR3YjsZtbkfjxbHtFAeg== + dependencies: + "@types/pg" "*" + +"@types/pg@*", "@types/pg@8.6.1": version "8.6.1" resolved "https://registry.yarnpkg.com/@types/pg/-/pg-8.6.1.tgz#099450b8dc977e8197a44f5229cedef95c8747f9" integrity sha512-1Kc4oAGzAl7uqUStZCDvaLFqZrW9qWSjXOmBfdgyBP5La7Us6Mg4GBvRlSoaZMhQF/zSj1C8CtKMBkoiT8eL8w== @@ -1618,11 +2157,23 @@ dependencies: "@types/node" "*" +"@types/redis@2.8.31": + version "2.8.31" + resolved "https://registry.yarnpkg.com/@types/redis/-/redis-2.8.31.tgz#c11c1b269fec132ac2ec9eb891edf72fc549149e" + integrity sha512-daWrrTDYaa5iSDFbgzZ9gOOzyp2AJmYK59OlG/2KGBgYWF3lfs8GDKm1c//tik5Uc93hDD36O+qLPvzDolChbA== + dependencies: + "@types/node" "*" + "@types/seedrandom@^2.4.28": version "2.4.28" resolved "https://registry.yarnpkg.com/@types/seedrandom/-/seedrandom-2.4.28.tgz#9ce8fa048c1e8c85cb71d7fe4d704e000226036f" integrity sha512-SMA+fUwULwK7sd/ZJicUztiPs8F1yCPwF3O23Z9uQ32ME5Ha0NmDK9+QTsYE4O2tHXChzXomSWWeIhCnoN1LqA== +"@types/semver@7.3.9": + version "7.3.9" + resolved "https://registry.yarnpkg.com/@types/semver/-/semver-7.3.9.tgz#152c6c20a7688c30b967ec1841d31ace569863fc" + integrity sha512-L/TMpyURfBkf+o/526Zb6kd/tchUP3iBDEPjqjb+U2MAJhVRxxrmr2fwpe08E7QsV7YLcpq0tUaQ9O9x97ZIxQ== + "@types/serve-static@*": version "1.13.2" resolved "https://registry.yarnpkg.com/@types/serve-static/-/serve-static-1.13.2.tgz#f5ac4d7a6420a99a6a45af4719f4dcd8cd907a48" @@ -1703,20 +2254,13 @@ dependencies: "@types/yargs-parser" "*" -"@types/yargs@^17.0.4": - version "17.0.4" - resolved "https://registry.yarnpkg.com/@types/yargs/-/yargs-17.0.4.tgz#d7ad5c311aaca3d7daebba169e1ecf35be97ceee" - integrity sha512-D/wihO9WFYqwsmJI0e0qS+U09wIQtYRSBJlXWjTFGjouEuOCy0BU4N/ZK5utb00S5lW/9LO7vOpvGDd8M06NvQ== +"@types/yargs@^17.0.10": + version "17.0.10" + resolved "https://registry.yarnpkg.com/@types/yargs/-/yargs-17.0.10.tgz#591522fce85d8739bca7b8bb90d048e4478d186a" + integrity sha512-gmEaFwpj/7f/ROdtIlci1R1VYU1J4j95m8T+Tj3iBgiBFKg1foE/PSl93bBd5T9LDXNPo8UlNN6W0qwD8O5OaA== dependencies: "@types/yargs-parser" "*" -"@types/yn@^3.1.0": - version "3.1.0" - resolved "https://registry.yarnpkg.com/@types/yn/-/yn-3.1.0.tgz#02e522edc6f869db59326b12caaaa10db97dcc5e" - integrity sha512-Qs2tU/syFYlALjR3EoT+NcvpMwAd6voSiDxW+c8bhAN1WbzQUnRfWTmttORf4R1WqDUT+dvHKj+llupSxs0O/w== - dependencies: - yn "*" - "@typescript-eslint/eslint-plugin@^4.22.0": version "4.22.0" resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-4.22.0.tgz#3d5f29bb59e61a9dba1513d491b059e536e16dbc" @@ -1910,6 +2454,11 @@ ajv@^8.0.1: require-from-string "^2.0.2" uri-js "^4.2.2" +ansi-color@^0.2.1: + version "0.2.1" + resolved "https://registry.yarnpkg.com/ansi-color/-/ansi-color-0.2.1.tgz#3e75c037475217544ed763a8db5709fa9ae5bf9a" + integrity sha512-bF6xLaZBLpOQzgYUtYEhJx090nPSZk1BQ/q2oyBK9aMMcJHzx9uXGCjI2Y+LebsN4Jwoykr0V9whbPiogdyHoQ== + ansi-colors@^1.0.1: version "1.1.0" resolved "https://registry.yarnpkg.com/ansi-colors/-/ansi-colors-1.1.0.tgz#6374b4dd5d4718ff3ce27a671a3b1cad077132a9" @@ -1954,6 +2503,11 @@ ansi-regex@^5.0.0: resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-5.0.0.tgz#388539f55179bf39339c81af30a654d69f87cb75" integrity sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg== +ansi-regex@^5.0.1: + version "5.0.1" + resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-5.0.1.tgz#082cb2c89c9fe8659a311a53bd6a4dc5301db304" + integrity sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ== + ansi-styles@^3.2.0, ansi-styles@^3.2.1: version "3.2.1" resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-3.2.1.tgz#41fbb20243e50b12be0f04b8dedbf07520ce841d" @@ -2157,6 +2711,11 @@ assert-plus@1.0.0, assert-plus@^1.0.0: resolved "https://registry.yarnpkg.com/assert-plus/-/assert-plus-1.0.0.tgz#f12e0f3c5d77b0b1cdd9146942e4e96c1e4dd525" integrity sha1-8S4PPF13sLHN2RRpQuTpbB5N1SU= +assertion-error@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/assertion-error/-/assertion-error-1.1.0.tgz#e60b6b0e8f301bd97e5375215bda406c85118c0b" + integrity sha512-jgsaNduz+ndvGyFt3uSuWqvy4lCnIJiovtouQN5JZHOKCS2QuhEdbcQHFhVksz2N2U9hXJo8odG7ETyWlEeuDw== + assign-symbols@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/assign-symbols/-/assign-symbols-1.0.0.tgz#59667f41fadd4f20ccbc2bb96b8d4f7f78ec0367" @@ -2383,6 +2942,11 @@ binary@~0.3.0: buffers "~0.1.1" chainsaw "~0.1.0" +bintrees@1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/bintrees/-/bintrees-1.0.1.tgz#0e655c9b9c2435eaab68bf4027226d2b55a34524" + integrity sha1-DmVcm5wkNeqraL9AJyJtK1WjRSQ= + bl@^4.0.3: version "4.1.0" resolved "https://registry.yarnpkg.com/bl/-/bl-4.1.0.tgz#451535264182bec2fbbc83a62ab98cf11d9f7b3a" @@ -2477,13 +3041,6 @@ browserslist@^4.14.5, browserslist@^4.16.4: escalade "^3.1.1" node-releases "^1.1.71" -bs-logger@0.x: - version "0.2.6" - resolved "https://registry.yarnpkg.com/bs-logger/-/bs-logger-0.2.6.tgz#eb7d365307a72cf974cc6cda76b68354ad336bd8" - integrity sha512-pd8DCoxmbgc7hyPKOvxtqNcjYoOsABPQdcCUjGp3d42VR2CX1ORhk2A87oqqu5R1kk+76nsxZupkmyd+MVtCog== - dependencies: - fast-json-stable-stringify "2.x" - bser@2.1.1: version "2.1.1" resolved "https://registry.yarnpkg.com/bser/-/bser-2.1.1.tgz#e6787da20ece9d07998533cfd9de6f5c38f4bc05" @@ -2491,12 +3048,19 @@ bser@2.1.1: dependencies: node-int64 "^0.4.0" +bson@*: + version "4.6.5" + resolved "https://registry.yarnpkg.com/bson/-/bson-4.6.5.tgz#1a410148c20eef4e40d484878a037a7036e840fb" + integrity sha512-uqrgcjyOaZsHfz7ea8zLRCLe1u+QGUSzMZmvXqO24CDW7DWoW1qiN9folSwa7hSneTSgM2ykDIzF5kcQQ8cwNw== + dependencies: + buffer "^5.6.0" + buffer-equal@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/buffer-equal/-/buffer-equal-1.0.0.tgz#59616b498304d556abd466966b22eeda3eca5fbe" integrity sha1-WWFrSYME1Var1GaWayLu2j7KX74= -buffer-from@1.x, buffer-from@^1.0.0: +buffer-from@^1.0.0: version "1.1.1" resolved "https://registry.yarnpkg.com/buffer-from/-/buffer-from-1.1.1.tgz#32713bc028f75c02fdb710d7c7bcec1f2c6070ef" integrity sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A== @@ -2532,6 +3096,16 @@ buffers@~0.1.1: resolved "https://registry.yarnpkg.com/buffers/-/buffers-0.1.1.tgz#b24579c3bed4d6d396aeee6d9a8ae7f5482ab7bb" integrity sha1-skV5w77U1tOWru5tmorn9Ugqt7s= +bufrw@^1.3.0: + version "1.3.0" + resolved "https://registry.yarnpkg.com/bufrw/-/bufrw-1.3.0.tgz#28d6cfdaf34300376836310f5c31d57eeb40c8fa" + integrity sha512-jzQnSbdJqhIltU9O5KUiTtljP9ccw2u5ix59McQy4pV2xGhVLhRZIndY8GIrgh5HjXa6+QJ9AQhOd2QWQizJFQ== + dependencies: + ansi-color "^0.2.1" + error "^7.0.0" + hexer "^1.5.0" + xtend "^4.0.0" + byline@^5.0.0: version "5.0.0" resolved "https://registry.yarnpkg.com/byline/-/byline-5.0.0.tgz#741c5216468eadc457b03410118ad77de8c1ddb1" @@ -2602,11 +3176,6 @@ camelcase@^3.0.0: resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-3.0.0.tgz#32fc4b9fcdaf845fcdf7e73bb97cac2261f0ab0a" integrity sha1-MvxLn82vhF/N9+c7uXysImHwqwo= -camelcase@^4.1.0: - version "4.1.0" - resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-4.1.0.tgz#d545635be1e33c542649c69173e5de6acfae34dd" - integrity sha1-1UVjW+HjPFQmScaRc+Xeas+uNN0= - camelcase@^5.0.0: version "5.2.0" resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-5.2.0.tgz#e7522abda5ed94cc0489e1b8466610e88404cf45" @@ -2634,6 +3203,18 @@ caseless@~0.12.0: resolved "https://registry.yarnpkg.com/caseless/-/caseless-0.12.0.tgz#1b681c21ff84033c826543090689420d187151dc" integrity sha1-G2gcIf+EAzyCZUMJBolCDRhxUdw= +chai@4.3.4: + version "4.3.4" + resolved "https://registry.yarnpkg.com/chai/-/chai-4.3.4.tgz#b55e655b31e1eac7099be4c08c21964fce2e6c49" + integrity sha512-yS5H68VYOCtN1cjfwumDSuzn/9c+yza4f3reKXlE5rUg7SFcCEy90gJvydNgOYtblyf4Zi6jIWRnXOgErta0KA== + dependencies: + assertion-error "^1.1.0" + check-error "^1.0.2" + deep-eql "^3.0.1" + get-func-name "^2.0.0" + pathval "^1.1.1" + type-detect "^4.0.5" + chainsaw@~0.1.0: version "0.1.0" resolved "https://registry.yarnpkg.com/chainsaw/-/chainsaw-0.1.0.tgz#5eab50b28afe58074d0d58291388828b5e5fbc98" @@ -2666,6 +3247,11 @@ chalk@^4.0.0, chalk@^4.1.0: ansi-styles "^4.1.0" supports-color "^7.1.0" +check-error@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/check-error/-/check-error-1.0.2.tgz#574d312edd88bb5dd8912e9286dd6c0aed4aac82" + integrity sha1-V00xLt2Iu13YkS6Sht1sCu1KrII= + chokidar@^2.0.0, chokidar@^2.1.5: version "2.1.8" resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-2.1.8.tgz#804b3a7b6a99358c3c5c61e71d8728f041cff917" @@ -2779,11 +3365,6 @@ clone-stats@^1.0.0: resolved "https://registry.yarnpkg.com/clone-stats/-/clone-stats-1.0.0.tgz#b3782dff8bb5474e18b9b6bf0fdfe782f8777680" integrity sha1-s3gt/4u1R04Yuba/D9/ngvh3doA= -clone@^1.0.2: - version "1.0.4" - resolved "https://registry.yarnpkg.com/clone/-/clone-1.0.4.tgz#da309cc263df15994c688ca902179ca3c7cd7c7e" - integrity sha1-2jCcwmPfFZlMaIypAheco8fNfH4= - clone@^2.1.1: version "2.1.2" resolved "https://registry.yarnpkg.com/clone/-/clone-2.1.2.tgz#1b7f4b9f591f1e8f83670401600345a02887435f" @@ -2854,10 +3435,10 @@ color-support@^1.1.3: resolved "https://registry.yarnpkg.com/color-support/-/color-support-1.1.3.tgz#93834379a1cc9a0c61f82f52f0d04322251bd5a2" integrity sha512-qiBjkpbMLO/HL68y+lh4q0/O1MZFj2RX6X/KmMa3+gJD3z+WwI1ZzDHysvqHGS3mP6mznPckpXmw1nI9cJjyRg== -colorette@1.1.0: - version "1.1.0" - resolved "https://registry.yarnpkg.com/colorette/-/colorette-1.1.0.tgz#1f943e5a357fac10b4e0f5aaef3b14cdc1af6ec7" - integrity sha512-6S062WDQUXi6hOfkO/sBPVwE5ASXY4G2+b4atvhJfSsuUUhIaUKlkjLe9692Ipyt5/a+IPF5aVTu3V5gvXq5cg== +colorette@2.0.16: + version "2.0.16" + resolved "https://registry.yarnpkg.com/colorette/-/colorette-2.0.16.tgz#713b9af84fdb000139f04546bd4a93f62a5085da" + integrity sha512-hUewv7oMjCp+wkBv5Rm0v87eJhq4woh5rSR+42YSQJKecCqgIqNkZ6lAlQms/BwHPJA5NKMRlpxPRv0n8HQW6g== colorette@^1.2.2: version "1.2.2" @@ -2876,10 +3457,10 @@ combined-stream@^1.0.6, combined-stream@^1.0.8, combined-stream@~1.0.6: dependencies: delayed-stream "~1.0.0" -commander@^4.1.1: - version "4.1.1" - resolved "https://registry.yarnpkg.com/commander/-/commander-4.1.1.tgz#9fd602bd936294e9e9ef46a3f4d6964044b18068" - integrity sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA== +commander@^7.1.0: + version "7.2.0" + resolved "https://registry.yarnpkg.com/commander/-/commander-7.2.0.tgz#a36cb57d0b501ce108e4d20559a150a391d97ab7" + integrity sha512-QrWXB+ZQSVPmIWIhtEO9H+gwHaMGYiF5ChvoJ+K9ZGHG/sVsa6yiesAD1GC/x46sET00Xlwo1u49RVVVzvcSkw== comment-parser@1.1.5: version "1.1.5" @@ -3240,20 +3821,13 @@ debug@2.6.9, debug@^2.1.2, debug@^2.2.0, debug@^2.3.3, debug@^2.6.9: dependencies: ms "2.0.0" -debug@4: +debug@4, debug@4.3.2: version "4.3.2" resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.2.tgz#f0a49c18ac8779e31d4a0c6029dfb76873c7428b" integrity sha512-mOp8wKcvj7XxC78zLgw/ZA+6TSgkoE2C/ienthhRD298T7UNwAg9diBpLRxC0mOezLl4B0xV7M0cCO6P/O0Xhw== dependencies: ms "2.1.2" -debug@4.1.1, debug@^4.1.0, debug@^4.1.1: - version "4.1.1" - resolved "https://registry.yarnpkg.com/debug/-/debug-4.1.1.tgz#3b72260255109c6b589cee050f1d516139664791" - integrity sha512-pYAIzeRo8J6KPEaJ0VWOh5Pzkbw/RetuzehGM7QRRX5he4fPHx2rdKMB256ehJCkX+XRQm16eZLqLNS8RSZXZw== - dependencies: - ms "^2.1.1" - debug@^3.1.0: version "3.2.6" resolved "https://registry.yarnpkg.com/debug/-/debug-3.2.6.tgz#e83d17de16d8a7efb7717edbe5fb10135eee629b" @@ -3268,6 +3842,13 @@ debug@^4.0.1, debug@^4.3.1: dependencies: ms "2.1.2" +debug@^4.1.0, debug@^4.1.1: + version "4.1.1" + resolved "https://registry.yarnpkg.com/debug/-/debug-4.1.1.tgz#3b72260255109c6b589cee050f1d516139664791" + integrity sha512-pYAIzeRo8J6KPEaJ0VWOh5Pzkbw/RetuzehGM7QRRX5he4fPHx2rdKMB256ehJCkX+XRQm16eZLqLNS8RSZXZw== + dependencies: + ms "^2.1.1" + decamelize-keys@^1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/decamelize-keys/-/decamelize-keys-1.1.0.tgz#d171a87933252807eb3cb61dc1c1445d078df2d9" @@ -3276,6 +3857,11 @@ decamelize-keys@^1.1.0: decamelize "^1.1.0" map-obj "^1.0.0" +decamelize@5.0.1: + version "5.0.1" + resolved "https://registry.yarnpkg.com/decamelize/-/decamelize-5.0.1.tgz#db11a92e58c741ef339fb0a2868d8a06a9a7b1e9" + integrity sha512-VfxadyCECXgQlkoEAjeghAr5gY3Hf+IKjKb+X8tGVDtveCjN+USwprd2q3QXBR9T1+x2DG0XZF5/w+7HAtSaXA== + decamelize@^1.1.0, decamelize@^1.1.1, decamelize@^1.1.2, decamelize@^1.2.0: version "1.2.0" resolved "https://registry.yarnpkg.com/decamelize/-/decamelize-1.2.0.tgz#f6534d15148269b20352e7bee26f501f9a191290" @@ -3298,6 +3884,13 @@ decompress-response@^4.2.0: dependencies: mimic-response "^2.0.0" +deep-eql@^3.0.1: + version "3.0.1" + resolved "https://registry.yarnpkg.com/deep-eql/-/deep-eql-3.0.1.tgz#dfc9404400ad1c8fe023e7da1df1c147c4b444df" + integrity sha512-+QeIQyN5ZuO+3Uk5DYh6/1eKO0m0YmJFGNmFHGACpf1ClL1nmlV/p4gNgbl2pJGxgXb4faqo6UE+M5ACEMyVcw== + dependencies: + type-detect "^4.0.0" + deep-extend@^0.6.0: version "0.6.0" resolved "https://registry.yarnpkg.com/deep-extend/-/deep-extend-0.6.0.tgz#c4fa7c95404a17a9c3e8ca7e1537312b736330ac" @@ -3320,13 +3913,6 @@ default-resolution@^2.0.0: resolved "https://registry.yarnpkg.com/default-resolution/-/default-resolution-2.0.0.tgz#bcb82baa72ad79b426a76732f1a81ad6df26d684" integrity sha1-vLgrqnKtebQmp2cy8aga1t8m1oQ= -defaults@^1.0.3: - version "1.0.3" - resolved "https://registry.yarnpkg.com/defaults/-/defaults-1.0.3.tgz#c656051e9817d9ff08ed881477f3fe4019f3ef7d" - integrity sha1-xlYFHpgX2f8I7YgUd/P+QBnz730= - dependencies: - clone "^1.0.2" - define-properties@^1.1.3: version "1.1.3" resolved "https://registry.yarnpkg.com/define-properties/-/define-properties-1.1.3.tgz#cf88da6cbee26fe6db7094f61d870cbd84cee9f1" @@ -3540,6 +4126,21 @@ error-ex@^1.2.0, error-ex@^1.3.1: dependencies: is-arrayish "^0.2.1" +error@7.0.2: + version "7.0.2" + resolved "https://registry.yarnpkg.com/error/-/error-7.0.2.tgz#a5f75fff4d9926126ddac0ea5dc38e689153cb02" + integrity sha512-UtVv4l5MhijsYUxPJo4390gzfZvAnTHreNnDjnTZaKIiZ/SemXxAhBkYSKtWa5RtBXbLP8tMgn/n0RUa/H7jXw== + dependencies: + string-template "~0.2.1" + xtend "~4.0.0" + +error@^7.0.0: + version "7.2.1" + resolved "https://registry.yarnpkg.com/error/-/error-7.2.1.tgz#eab21a4689b5f684fc83da84a0e390de82d94894" + integrity sha512-fo9HBvWnx3NGUKMvMwB/CBCMMrfEJgbDTVDEkPygA3Bdd3lM1OyCd+rbQ8BwnpF6GdVeOLDNmyL4N5Bg80ZvdA== + dependencies: + string-template "~0.2.1" + es-abstract@^1.18.0-next.1, es-abstract@^1.18.0-next.2: version "1.18.0" resolved "https://registry.yarnpkg.com/es-abstract/-/es-abstract-1.18.0.tgz#ab80b359eecb7ede4c298000390bc5ac3ec7b5a4" @@ -3661,6 +4262,18 @@ escodegen@^1.14.1, escodegen@^1.9.1: optionalDependencies: source-map "~0.6.1" +escodegen@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/escodegen/-/escodegen-2.0.0.tgz#5e32b12833e8aa8fa35e1bf0befa89380484c7dd" + integrity sha512-mmHKys/C8BFUGI+MAWNcSYoORYLMdPzjrknd2Vc+bUsjN5bXcr8EhrNB+UTqfL1y3I9c4fw2ihgtMPQLBRiQxw== + dependencies: + esprima "^4.0.1" + estraverse "^5.2.0" + esutils "^2.0.2" + optionator "^0.8.1" + optionalDependencies: + source-map "~0.6.1" + eslint-config-prettier@^8.3.0: version "8.3.0" resolved "https://registry.yarnpkg.com/eslint-config-prettier/-/eslint-config-prettier-8.3.0.tgz#f7471b20b6fe8a9a9254cc684454202886a2dd7a" @@ -3900,12 +4513,10 @@ expect@^24.9.0: jest-message-util "^24.9.0" jest-regex-util "^24.9.0" -express-rate-limit@^3.5.1: - version "3.5.1" - resolved "https://registry.yarnpkg.com/express-rate-limit/-/express-rate-limit-3.5.1.tgz#159e3bb2b92b8d55949a416cb1fa8ad431b8044d" - integrity sha512-aoxJLcqOAs2nEDwrQKrwCRoWdYxS7Qu+W1lSe4revazBxT/mTgEQrltJxt4z/AnAy/Qcm42M4ND+q3vI7AHL5Q== - dependencies: - defaults "^1.0.3" +express-rate-limit@^5.5.0: + version "5.5.0" + resolved "https://registry.yarnpkg.com/express-rate-limit/-/express-rate-limit-5.5.0.tgz#27dc48b5cc325448df47d02d5f4a2183b723781d" + integrity sha512-/1mrKggjXMxd1/ghPub5N3d36u5VlK8KjbQFQLxYub09BWSSgSXMQbXgFiIW0BYxjM49YCj8bkihONZR2U4+mQ== express@^4.16.4: version "4.16.4" @@ -4026,7 +4637,7 @@ fast-glob@^3.1.1: micromatch "^4.0.2" picomatch "^2.2.1" -fast-json-stable-stringify@2.x, fast-json-stable-stringify@^2.0.0: +fast-json-stable-stringify@^2.0.0: version "2.1.0" resolved "https://registry.yarnpkg.com/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz#874bf69c6f404c2b5d99c481341399fd55892633" integrity sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw== @@ -4369,6 +4980,14 @@ gauge@~2.7.3: strip-ansi "^3.0.1" wide-align "^1.1.0" +gc-stats@1.4.0: + version "1.4.0" + resolved "https://registry.yarnpkg.com/gc-stats/-/gc-stats-1.4.0.tgz#66cd194c5a8eae1138407300bc6cb42c2f6f3cd6" + integrity sha512-4FcCj9e8j8rCjvLkqRpGZBLgTC/xr9XEf5By3x77cDucWWB3pJK6FEwXZCTCbb4z8xdaOoi4owBNrvn3ciDdxA== + dependencies: + nan "^2.13.2" + node-pre-gyp "^0.13.0" + gensync@^1.0.0-beta.2: version "1.0.0-beta.2" resolved "https://registry.yarnpkg.com/gensync/-/gensync-1.0.0-beta.2.tgz#32a6ee76c3d7f52d46b2b1ae5d93fea8580a25e0" @@ -4384,6 +5003,11 @@ get-caller-file@^2.0.1, get-caller-file@^2.0.5: resolved "https://registry.yarnpkg.com/get-caller-file/-/get-caller-file-2.0.5.tgz#4f94412a82db32f36e3b0b9741f8a97feb031f7e" integrity sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg== +get-func-name@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/get-func-name/-/get-func-name-2.0.0.tgz#ead774abee72e20409433a066366023dd6887a41" + integrity sha1-6td0q+5y4gQJQzoGY2YCPdaIekE= + get-intrinsic@^1.0.2, get-intrinsic@^1.1.1: version "1.1.1" resolved "https://registry.yarnpkg.com/get-intrinsic/-/get-intrinsic-1.1.1.tgz#15f59f376f855c446963948f0d24cd3637b4abc6" @@ -4629,6 +5253,18 @@ globby@^11.0.1: merge2 "^1.3.0" slash "^3.0.0" +globby@^11.0.3: + version "11.0.4" + resolved "https://registry.yarnpkg.com/globby/-/globby-11.0.4.tgz#2cbaff77c2f2a62e71e9b2813a67b97a3a3001a5" + integrity sha512-9O4MVG9ioZJ08ffbcyVYyLOJLk5JQ688pJ4eMGLpdWLHq/Wr1D9BlriLQyL0E+jbkuePVZXYFj47QM/v093wHg== + dependencies: + array-union "^2.1.0" + dir-glob "^3.0.1" + fast-glob "^3.1.1" + ignore "^5.1.4" + merge2 "^1.3.0" + slash "^3.0.0" + globrex@^0.1.2: version "0.1.2" resolved "https://registry.yarnpkg.com/globrex/-/globrex-0.1.2.tgz#dd5d9ec826232730cd6793a5e33a9302985e6098" @@ -4641,7 +5277,7 @@ glogg@^1.0.0: dependencies: sparkles "^1.0.0" -graceful-fs@^4.0.0, graceful-fs@^4.1.15, graceful-fs@^4.2.2, graceful-fs@^4.2.3, graceful-fs@^4.2.4: +graceful-fs@^4.0.0, graceful-fs@^4.1.15, graceful-fs@^4.2.2, graceful-fs@^4.2.3: version "4.2.6" resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.6.tgz#ff040b2b0853b23c3d31027523706f1885d76bee" integrity sha512-nTnJ528pbqxYanhpDYsi4Rd8MAeaBA67+RZ10CM1m3bTAVFEDcd5AuA4a6W5YkGZ1iNXHzZz8T6TBKLeBuNriQ== @@ -4661,6 +5297,11 @@ grapheme-splitter@^1.0.2: resolved "https://registry.yarnpkg.com/grapheme-splitter/-/grapheme-splitter-1.0.4.tgz#9cf3a665c6247479896834af35cf1dbb4400767e" integrity sha512-bzh50DW9kTPM00T8y4o8vQg89Di9oLJVLW/KaOGIXJWP/iqCN6WKYkbNOF04vFLJhwcpYUh9ydh/+5vpOqV4YQ== +graphql@^15.5.1: + version "15.8.0" + resolved "https://registry.yarnpkg.com/graphql/-/graphql-15.8.0.tgz#33410e96b012fa3bdb1091cc99a94769db212b38" + integrity sha512-5gghUc24tP9HRznNpV2+FIoq3xKkj5dTQqf4v0CpdPbFVwFkWoxOM+o+2OC9ZSvjEMTjfmG9QT+gcvggTwW1zw== + growly@^1.3.0: version "1.3.0" resolved "https://registry.yarnpkg.com/growly/-/growly-1.3.0.tgz#f10748cbe76af964b7c96c93c6bcc28af120c081" @@ -4818,6 +5459,16 @@ has@^1.0.3: dependencies: function-bind "^1.1.1" +hexer@^1.5.0: + version "1.5.0" + resolved "https://registry.yarnpkg.com/hexer/-/hexer-1.5.0.tgz#b86ce808598e8a9d1892c571f3cedd86fc9f0653" + integrity sha512-dyrPC8KzBzUJ19QTIo1gXNqIISRXQ0NwteW6OeQHRN4ZuZeHkdODfj0zHBdOlHbRY8GqbqK57C9oWSvQZizFsg== + dependencies: + ansi-color "^0.2.1" + minimist "^1.1.0" + process "^0.10.0" + xtend "^4.0.0" + hoek@5.x.x: version "5.0.4" resolved "https://registry.yarnpkg.com/hoek/-/hoek-5.0.4.tgz#0f7fa270a1cafeb364a4b2ddfaa33f864e4157da" @@ -4976,7 +5627,7 @@ inflight@^1.0.4: once "^1.3.0" wrappy "1" -inherits@2, inherits@^2.0.1, inherits@^2.0.4, inherits@~2.0.0, inherits@~2.0.3, inherits@~2.0.4: +inherits@2, inherits@^2.0.1, inherits@^2.0.4, inherits@~2.0.0, inherits@~2.0.3: version "2.0.4" resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c" integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ== @@ -4996,7 +5647,7 @@ interpret@^1.0.0, interpret@^1.4.0: resolved "https://registry.yarnpkg.com/interpret/-/interpret-1.4.0.tgz#665ab8bc4da27a774a40584e812e3e0fa45b1a1e" integrity sha512-agE4QfB2Lkp9uICn7BAqoscw4SZP9kTE2hxiFI3jBPmXJfdqiahTbUuKGsMoN2GtqL9AxhYioAcVvgsb1HvRbA== -interpret@^2.0.0: +interpret@^2.2.0: version "2.2.0" resolved "https://registry.yarnpkg.com/interpret/-/interpret-2.2.0.tgz#1a78a0b5965c40a5416d007ad6f50ad27c417df9" integrity sha512-Ju0Bz/cEia55xDwUWEa8+olFpCiQoypjnQySseKtmjNrnps3P+xfpUmGr90T7yjlVJmOtybRvPXhKMbHr+fWnw== @@ -5009,6 +5660,14 @@ into-stream@^5.1.1: from2 "^2.3.0" p-is-promise "^3.0.0" +into-stream@^6.0.0: + version "6.0.0" + resolved "https://registry.yarnpkg.com/into-stream/-/into-stream-6.0.0.tgz#4bfc1244c0128224e18b8870e85b2de8e66c6702" + integrity sha512-XHbaOAvP+uFKUFsOgoNPRjLkwB+I22JFPFe5OjTkQ0nwgj6+pSjb4NmB6VMxaPshLiOf+zcpOCBQuLwC1KHhZA== + dependencies: + from2 "^2.3.0" + p-is-promise "^3.0.0" + invariant@^2.2.4: version "2.2.4" resolved "https://registry.yarnpkg.com/invariant/-/invariant-2.2.4.tgz#610f3c92c9359ce1db616e538008d23ff35158e6" @@ -5120,6 +5779,13 @@ is-core-module@^2.2.0: dependencies: has "^1.0.3" +is-core-module@^2.8.0: + version "2.8.1" + resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.8.1.tgz#f59fdfca701d5879d0a6b100a40aa1560ce27211" + integrity sha512-SdNCUs284hr40hFTFP6l0IfZ/RSrMXF3qgoRHd3/79unUTvrFO/JoXwkGm+5J/Oe3E/b5GsnG330uUNgRpu1PA== + dependencies: + has "^1.0.3" + is-data-descriptor@^0.1.4: version "0.1.4" resolved "https://registry.yarnpkg.com/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz#0b5ee648388e2c860282e793f1856fec3f301b56" @@ -5266,6 +5932,11 @@ is-plain-obj@^1.1.0: resolved "https://registry.yarnpkg.com/is-plain-obj/-/is-plain-obj-1.1.0.tgz#71a50c8429dfca773c92a390a4a03b39fcd51d3e" integrity sha1-caUMhCnfync8kqOQpKA7OfzVHT4= +is-plain-obj@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/is-plain-obj/-/is-plain-obj-2.1.0.tgz#45e42e37fccf1f40da8e5f76ee21515840c09287" + integrity sha512-YWnfyRwxL/+SsrWYfOpUtz5b3YD+nyfkHvjbcanzk8zgyO4ASD67uVMRt8k5bM4lLMDnXfriRhOpemw+NfT1eA== + is-plain-object@^2.0.1, is-plain-object@^2.0.3, is-plain-object@^2.0.4: version "2.0.4" resolved "https://registry.yarnpkg.com/is-plain-object/-/is-plain-object-2.0.4.tgz#2c163b3fafb1b606d9d17928f05c2a1c38e07677" @@ -5440,6 +6111,17 @@ istanbul-reports@^2.2.6: dependencies: html-escaper "^2.0.0" +jaeger-client@^3.15.0: + version "3.19.0" + resolved "https://registry.yarnpkg.com/jaeger-client/-/jaeger-client-3.19.0.tgz#9b5bd818ebd24e818616ee0f5cffe1722a53ae6e" + integrity sha512-M0c7cKHmdyEUtjemnJyx/y9uX16XHocL46yQvyqDlPdvAcwPDbHrIbKjQdBqtiE4apQ/9dmr+ZLJYYPGnurgpw== + dependencies: + node-int64 "^0.4.0" + opentracing "^0.14.4" + thriftrw "^3.5.0" + uuid "^8.3.2" + xorshift "^1.1.1" + javascript-natural-sort@0.7.1: version "0.7.1" resolved "https://registry.yarnpkg.com/javascript-natural-sort/-/javascript-natural-sort-0.7.1.tgz#f9e2303d4507f6d74355a73664d1440fb5a0ef59" @@ -5773,18 +6455,6 @@ jest-util@^24.9.0: slash "^2.0.0" source-map "^0.6.0" -jest-util@^26.1.0: - version "26.6.2" - resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-26.6.2.tgz#907535dbe4d5a6cb4c47ac9b926f6af29576cbc1" - integrity sha512-MDW0fKfsn0OI7MS7Euz6h8HNDXVQ0gaM9uW6RjfDmd1DAFcaxX9OqIakHIqhbnmF08Cf2DLDG+ulq8YQQ0Lp0Q== - dependencies: - "@jest/types" "^26.6.2" - "@types/node" "*" - chalk "^4.0.0" - graceful-fs "^4.2.4" - is-ci "^2.0.0" - micromatch "^4.0.2" - jest-validate@^24.9.0: version "24.9.0" resolved "https://registry.yarnpkg.com/jest-validate/-/jest-validate-24.9.0.tgz#0775c55360d173cd854e40180756d4ff52def8ab" @@ -5936,6 +6606,11 @@ json-schema@0.2.3: resolved "https://registry.yarnpkg.com/json-schema/-/json-schema-0.2.3.tgz#b480c892e59a2f05954ce727bd3f2a4e882f9e13" integrity sha1-tIDIkuWaLwWVTOcnvT8qTogvnhM= +json-schema@^0.4.0: + version "0.4.0" + resolved "https://registry.yarnpkg.com/json-schema/-/json-schema-0.4.0.tgz#f7de4cf6efab838ebaeb3236474cbba5a1930ab5" + integrity sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA== + json-stable-stringify-without-jsonify@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz#9db7b59496ad3f3cfef30a75142d2d930ad72651" @@ -5946,13 +6621,6 @@ json-stringify-safe@^5.0.1, json-stringify-safe@~5.0.1: resolved "https://registry.yarnpkg.com/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz#1296a2d58fd45f19a0f6ce01d65701e2c735b6eb" integrity sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus= -json5@2.x, json5@^2.1.2: - version "2.2.0" - resolved "https://registry.yarnpkg.com/json5/-/json5-2.2.0.tgz#2dfefe720c6ba525d9ebd909950f0515316c89a3" - integrity sha512-f+8cldu7X/y7RAJurMEJmdoKXGB/X550w2Nr3tTbezL6RwEE/iMcm+tZnXeoZtKuOq6ft8+CqzEkrIgx1fPoQA== - dependencies: - minimist "^1.2.5" - json5@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/json5/-/json5-1.0.1.tgz#779fb0018604fa854eacbf6252180d83543e3dbe" @@ -5960,6 +6628,13 @@ json5@^1.0.1: dependencies: minimist "^1.2.0" +json5@^2.1.2: + version "2.2.0" + resolved "https://registry.yarnpkg.com/json5/-/json5-2.2.0.tgz#2dfefe720c6ba525d9ebd909950f0515316c89a3" + integrity sha512-f+8cldu7X/y7RAJurMEJmdoKXGB/X550w2Nr3tTbezL6RwEE/iMcm+tZnXeoZtKuOq6ft8+CqzEkrIgx1fPoQA== + dependencies: + minimist "^1.2.5" + jsonfile@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/jsonfile/-/jsonfile-4.0.0.tgz#8771aae0799b64076b76640fca058f9c10e33ecb" @@ -6039,26 +6714,24 @@ kleur@^3.0.3: resolved "https://registry.yarnpkg.com/kleur/-/kleur-3.0.3.tgz#a79c9ecc86ee1ce3fa6206d1216c501f147fc07e" integrity sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w== -knex@^0.20.1: - version "0.20.15" - resolved "https://registry.yarnpkg.com/knex/-/knex-0.20.15.tgz#b7e9e1efd9cf35d214440d9439ed21153574679d" - integrity sha512-WHmvgfQfxA5v8pyb9zbskxCS1L1WmYgUbwBhHojlkmdouUOazvroUWlCr6KIKMQ8anXZh1NXOOtIUMnxENZG5Q== +knex@^0.95.15: + version "0.95.15" + resolved "https://registry.yarnpkg.com/knex/-/knex-0.95.15.tgz#39d7e7110a6e2ad7de5d673d2dea94143015e0e7" + integrity sha512-Loq6WgHaWlmL2bfZGWPsy4l8xw4pOE+tmLGkPG0auBppxpI0UcK+GYCycJcqz9W54f2LiGewkCVLBm3Wq4ur/w== dependencies: - colorette "1.1.0" - commander "^4.1.1" - debug "4.1.1" + colorette "2.0.16" + commander "^7.1.0" + debug "4.3.2" + escalade "^3.1.1" esm "^3.2.25" getopts "2.2.5" - inherits "~2.0.4" - interpret "^2.0.0" - liftoff "3.1.0" - lodash "^4.17.15" - mkdirp "^0.5.1" - pg-connection-string "2.1.0" - tarn "^2.0.0" + interpret "^2.2.0" + lodash "^4.17.21" + pg-connection-string "2.5.0" + rechoir "0.7.0" + resolve-from "^5.0.0" + tarn "^3.0.1" tildify "2.0.0" - uuid "^7.0.1" - v8flags "^3.1.3" last-run@^1.1.0: version "1.1.1" @@ -6115,7 +6788,7 @@ levn@~0.3.0: prelude-ls "~1.1.2" type-check "~0.3.2" -liftoff@3.1.0, liftoff@^3.1.0: +liftoff@^3.1.0: version "3.1.0" resolved "https://registry.yarnpkg.com/liftoff/-/liftoff-3.1.0.tgz#c9ba6081f908670607ee79062d700df062c52ed3" integrity sha512-DlIPlJUkCV0Ips2zf2pJP0unEoT1kwYhiiPUGF3s/jtxTCjziNLoiVVh+jqWOWeFi6mmwQ5fNxvAUyPad4Dfog== @@ -6238,11 +6911,16 @@ lodash.max@^4.0.1: resolved "https://registry.yarnpkg.com/lodash.max/-/lodash.max-4.0.1.tgz#8735566c618b35a9f760520b487ae79658af136a" integrity sha1-hzVWbGGLNan3YFILSHrnllivE2o= -lodash.memoize@4.x: +lodash.memoize@4.1.2: version "4.1.2" resolved "https://registry.yarnpkg.com/lodash.memoize/-/lodash.memoize-4.1.2.tgz#bcc6c49a42a2840ed997f323eada5ecd182e0bfe" integrity sha1-vMbEmkKihA7Zl/Mj6tpezRguC/4= +lodash.once@4.1.1: + version "4.1.1" + resolved "https://registry.yarnpkg.com/lodash.once/-/lodash.once-4.1.1.tgz#0dd3971213c7c56df880977d504c88fb471a97ac" + integrity sha1-DdOXEhPHxW34gJd9UEyI+0cal6w= + lodash.sortby@^4.7.0: version "4.7.0" resolved "https://registry.yarnpkg.com/lodash.sortby/-/lodash.sortby-4.7.0.tgz#edd14c824e2cc9c1e0b0a1b42bb5210516a42438" @@ -6263,16 +6941,26 @@ lodash.truncate@^4.4.2: resolved "https://registry.yarnpkg.com/lodash.truncate/-/lodash.truncate-4.4.2.tgz#5a350da0b1113b837ecfffd5812cbe58d6eae193" integrity sha1-WjUNoLERO4N+z//VgSy+WNbq4ZM= -lodash@4.x, lodash@^4.17.20, lodash@^4.17.21: - version "4.17.21" - resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c" - integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg== - lodash@^4.17.15, lodash@^4.17.19: version "4.17.19" resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.19.tgz#e48ddedbe30b3321783c5b4301fbd353bc1e4a4b" integrity sha512-JNvd8XER9GQX0v2qJgsaN/mzFCNA5BRe/j8JN9d+tWyGLSodKQHKFicdwNYzWwI3wjRnaKPsGj1XkBjx/F96DQ== +lodash@^4.17.20, lodash@^4.17.21: + version "4.17.21" + resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c" + integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg== + +long@^2.4.0: + version "2.4.0" + resolved "https://registry.yarnpkg.com/long/-/long-2.4.0.tgz#9fa180bb1d9500cdc29c4156766a1995e1f4524f" + integrity sha512-ijUtjmO/n2A5PaosNG9ZGDsQ3vxJg7ZW8vsY8Kp0f2yIZWhSJvjmegV7t+9RPQKxKrvj8yKGehhS+po14hPLGQ== + +long@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/long/-/long-4.0.0.tgz#9a7b71cfb7d361a194ea555241c92f7468d5bf28" + integrity sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA== + loose-envify@^1.0.0: version "1.4.0" resolved "https://registry.yarnpkg.com/loose-envify/-/loose-envify-1.4.0.tgz#71ee51fa7be4caec1a63839f7e682d8132d30caf" @@ -6322,7 +7010,7 @@ make-dir@^3.0.0: dependencies: semver "^6.0.0" -make-error@1.x, make-error@^1.1.1: +make-error@^1.1.1: version "1.3.6" resolved "https://registry.yarnpkg.com/make-error/-/make-error-1.3.6.tgz#2eb2e37ea9b67c4891f684a1394799af484cf7a2" integrity sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw== @@ -6430,6 +7118,13 @@ merge-descriptors@1.0.1: resolved "https://registry.yarnpkg.com/merge-descriptors/-/merge-descriptors-1.0.1.tgz#b00aaa556dd8b44568150ec9d1b953f3f90cbb61" integrity sha1-sAqqVW3YtEVoFQ7J0blT8/kMu2E= +merge-options@3.0.4: + version "3.0.4" + resolved "https://registry.yarnpkg.com/merge-options/-/merge-options-3.0.4.tgz#84709c2aa2a4b24c1981f66c179fe5565cc6dbb7" + integrity sha512-2Sug1+knBjkaMsMgf1ctR1Ujx+Ayku4EdJN4Z+C2+JzoeF7A3OZ9KM2GY0CpQS51NR61LTurMJrRKPhSs3ZRTQ== + dependencies: + is-plain-obj "^2.1.0" + merge-stream@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/merge-stream/-/merge-stream-2.0.0.tgz#52823629a14dd00c9770fb6ad47dc6310f2c1f60" @@ -6537,6 +7232,11 @@ minimist@0.0.8: resolved "https://registry.yarnpkg.com/minimist/-/minimist-0.0.8.tgz#857fcabfc3397d2625b8228262e86aa7a011b05d" integrity sha1-hX/Kv8M5fSYluCKCYuhqp6ARsF0= +minimist@^1.1.0: + version "1.2.6" + resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.6.tgz#8637a5b759ea0d6e98702cfb3a9283323c93af44" + integrity sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q== + minimist@^1.1.1, minimist@^1.2.3, minimist@^1.2.5: version "1.2.5" resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.5.tgz#67d66014b66a6a8aaa0c083c5fd58df4e4e97602" @@ -6605,25 +7305,25 @@ mkdirp-classic@^0.5.2, mkdirp-classic@^0.5.3: resolved "https://registry.yarnpkg.com/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz#fa10c9115cc6d8865be221ba47ee9bed78601113" integrity sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A== -mkdirp@0.x, mkdirp@^0.5.1: +"mkdirp@>=0.5 0", mkdirp@^0.5.0: + version "0.5.1" + resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-0.5.1.tgz#30057438eac6cf7f8c4767f38648d6697d75c903" + integrity sha1-MAV0OOrGz3+MR2fzhkjWaX11yQM= + dependencies: + minimist "0.0.8" + +mkdirp@^0.5.1: version "0.5.5" resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-0.5.5.tgz#d91cefd62d1436ca0f41620e251288d420099def" integrity sha512-NKmAlESf6jMGym1++R0Ra7wvhV+wFW63FaSOFPwRahvea0gMUcGUhVeAg/0BC0wiv9ih5NYPB1Wn1UEI1/L+xQ== dependencies: minimist "^1.2.5" -mkdirp@1.x, mkdirp@^1.0.3, mkdirp@^1.0.4: +mkdirp@^1.0.3, mkdirp@^1.0.4: version "1.0.4" resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-1.0.4.tgz#3eb5ed62622756d79a5f0e2a221dfebad75c2f7e" integrity sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw== -"mkdirp@>=0.5 0", mkdirp@^0.5.0: - version "0.5.1" - resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-0.5.1.tgz#30057438eac6cf7f8c4767f38648d6697d75c903" - integrity sha1-MAV0OOrGz3+MR2fzhkjWaX11yQM= - dependencies: - minimist "0.0.8" - ml-array-max@^1.1.1: version "1.1.1" resolved "https://registry.yarnpkg.com/ml-array-max/-/ml-array-max-1.1.1.tgz#0192a646853a5cd1085133def02c1271101ddd86" @@ -6693,10 +7393,20 @@ modify-values@^1.0.0: resolved "https://registry.yarnpkg.com/modify-values/-/modify-values-1.0.1.tgz#b3939fa605546474e3e3e3c63d64bd43b4ee6022" integrity sha512-xV2bxeN6F7oYjZWTe/YPAy6MN2M+sL4u/Rlm2AHCIVGfo2p1yGmBHQ6vHehl4bRTZBdHu3TSkWdYgkwpYzAGSw== +module-details-from-path@^1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/module-details-from-path/-/module-details-from-path-1.0.3.tgz#114c949673e2a8a35e9d35788527aa37b679da2b" + integrity sha1-EUyUlnPiqKNenTV4hSeqN7Z52is= + +moment@^2.24.0: + version "2.24.0" + resolved "https://registry.yarnpkg.com/moment/-/moment-2.24.0.tgz#0d055d53f5052aa653c9f6eb68bb5d12bf5c2b5b" + integrity sha512-bV7f+6l2QigeBBZSM/6yTNq4P2fNpSWj/0e7jQcy87A8e7o2nAfP/34/2ky5Vw4B9S446EtIhodAzkFCcR4dQg== + moment@^2.29.2: - version "2.29.2" - resolved "https://registry.yarnpkg.com/moment/-/moment-2.29.2.tgz#00910c60b20843bcba52d37d58c628b47b1f20e4" - integrity sha512-UgzG4rvxYpN15jgCmVJwac49h9ly9NurikMWGPdVxm8GZD6XjkKPxDTjQQ43gtGgnV3X0cAyWDdP2Wexoquifg== + version "2.29.3" + resolved "https://registry.yarnpkg.com/moment/-/moment-2.29.3.tgz#edd47411c322413999f7a5940d526de183c031f3" + integrity sha512-c6YRvhEo//6T2Jz/vVtYzqBzwvPT95JBQ+smCytzf7c50oMZRsR/a4w88aD34I+/QVSfnoAnSBFPJHItlOMJVw== ms@2.0.0: version "2.0.0" @@ -6721,11 +7431,24 @@ multistream@^2.1.1: inherits "^2.0.1" readable-stream "^2.0.5" +multistream@^4.1.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/multistream/-/multistream-4.1.0.tgz#7bf00dfd119556fbc153cff3de4c6d477909f5a8" + integrity sha512-J1XDiAmmNpRCBfIWJv+n0ymC4ABcf/Pl+5YvC5B/D2f/2+8PtHvCNxMPKiQcZyi922Hq69J2YOpb1pTywfifyw== + dependencies: + once "^1.4.0" + readable-stream "^3.6.0" + mute-stdout@^1.0.0: version "1.0.1" resolved "https://registry.yarnpkg.com/mute-stdout/-/mute-stdout-1.0.1.tgz#acb0300eb4de23a7ddeec014e3e96044b3472331" integrity sha512-kDcwXR4PS7caBpuRYYBUz9iVixUk3anO3f5OYFiIPwK/20vCzKCHyKoulbiDY1S53zD2bxUpxN/IJ+TnXjfvxg== +nan@^2.13.2: + version "2.15.0" + resolved "https://registry.yarnpkg.com/nan/-/nan-2.15.0.tgz#3f34a473ff18e15c1b5626b62903b5ad6e665fee" + integrity sha512-8ZtvEnA2c5aYCZYd1cvgdnU6cqwixRoYg70xPLWUws5ORTa/lnw+u4amixRS/Ac5U5mQVgp9pnlSUnbNWFaWZQ== + nan@^2.9.2: version "2.13.2" resolved "https://registry.yarnpkg.com/nan/-/nan-2.13.2.tgz#f51dc7ae66ba7d5d55e1e6d4d8092e802c9aefe7" @@ -6814,6 +7537,13 @@ node-addon-api@^3.0.0: resolved "https://registry.yarnpkg.com/node-addon-api/-/node-addon-api-3.1.0.tgz#98b21931557466c6729e51cb77cd39c965f42239" integrity sha512-flmrDNB06LIl5lywUz7YlNGZH/5p0M7W28k8hzd9Lshtdh1wshD2Y+U4h9LD6KObOy1f+fEVdgprPrEymjM5uw== +node-fetch@^2.6.1: + version "2.6.6" + resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.6.tgz#1751a7c01834e8e1697758732e9efb6eeadfaf89" + integrity sha512-Z8/6vRlTUChSdIgMa51jxQ4lrw/Jy5SOW10ObaA47/RElsAN2c5Pn8bTgFGWn/ibwzXTE8qwr1Yzx28vsecXEA== + dependencies: + whatwg-url "^5.0.0" + node-gyp@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/node-gyp/-/node-gyp-4.0.0.tgz#972654af4e5dd0cd2a19081b4b46fe0442ba6f45" @@ -6918,6 +7648,22 @@ node-pre-gyp@^0.10.0: semver "^5.3.0" tar "^4" +node-pre-gyp@^0.13.0: + version "0.13.0" + resolved "https://registry.yarnpkg.com/node-pre-gyp/-/node-pre-gyp-0.13.0.tgz#df9ab7b68dd6498137717838e4f92a33fc9daa42" + integrity sha512-Md1D3xnEne8b/HGVQkZZwV27WUi1ZRuZBij24TNaZwUPU3ZAFtvT6xxJGaUVillfmMKnn5oD1HoGsp2Ftik7SQ== + dependencies: + detect-libc "^1.0.2" + mkdirp "^0.5.1" + needle "^2.2.1" + nopt "^4.0.1" + npm-packlist "^1.1.6" + npmlog "^4.0.2" + rc "^1.2.7" + rimraf "^2.6.1" + semver "^5.3.0" + tar "^4" + node-releases@^1.1.71: version "1.1.71" resolved "https://registry.yarnpkg.com/node-releases/-/node-releases-1.1.71.tgz#cb1334b179896b1c89ecfdd4b725fb7bbdfc7dbb" @@ -7163,6 +7909,16 @@ once@^1.3.0, once@^1.3.1, once@^1.3.2, once@^1.4.0: dependencies: wrappy "1" +opentracing@^0.14.4: + version "0.14.7" + resolved "https://registry.yarnpkg.com/opentracing/-/opentracing-0.14.7.tgz#25d472bd0296dc0b64d7b94cbc995219031428f5" + integrity sha512-vz9iS7MJ5+Bp1URw8Khvdyw1H/hGvzHWlKQ7eRrQojSCDL1/SrWfrY9QebLw97n2deyRtzHRC3MkQfVNUCo91Q== + +optional@0.1.4: + version "0.1.4" + resolved "https://registry.yarnpkg.com/optional/-/optional-0.1.4.tgz#cdb1a9bedc737d2025f690ceeb50e049444fd5b3" + integrity sha512-gtvrrCfkE08wKcgXaVwQVgwEQ8vel2dc5DDBn9RLQZ3YtmtkBss6A2HY6BnJH4N/4Ku97Ri/SF8sNWE2225WJw== + optionator@^0.8.1: version "0.8.3" resolved "https://registry.yarnpkg.com/optionator/-/optionator-0.8.3.tgz#84fa1d036fe9d3c7e21d99884b601167ec8fb495" @@ -7411,6 +8167,11 @@ path-parse@^1.0.6: resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.6.tgz#d62dbb5679405d72c4737ec58600e9ddcf06d24c" integrity sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw== +path-parse@^1.0.7: + version "1.0.7" + resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.7.tgz#fbc114b60ca42b30d9daf5858e4bd68bbedb6735" + integrity sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw== + path-root-regex@^0.1.0: version "0.1.2" resolved "https://registry.yarnpkg.com/path-root-regex/-/path-root-regex-0.1.2.tgz#bfccdc8df5b12dc52c8b43ec38d18d72c04ba96d" @@ -7464,22 +8225,17 @@ path@^0.12.7: process "^0.11.1" util "^0.10.3" +pathval@^1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/pathval/-/pathval-1.1.1.tgz#8534e77a77ce7ac5a2512ea21e0fdb8fcf6c3d8d" + integrity sha512-Dp6zGqpTdETdR63lehJYPeIOqpiNBNtc7BpWSLrOje7UaIsE5aY92r/AunQA7rsXvet3lrJ3JnZX29UPTKXyKQ== + performance-now@^2.1.0: version "2.1.0" resolved "https://registry.yarnpkg.com/performance-now/-/performance-now-2.1.0.tgz#6309f4e0e5fa913ec1c69307ae364b4b377c9e7b" integrity sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns= -pg-connection-string@0.1.3: - version "0.1.3" - resolved "https://registry.yarnpkg.com/pg-connection-string/-/pg-connection-string-0.1.3.tgz#da1847b20940e42ee1492beaf65d49d91b245df7" - integrity sha1-2hhHsglA5C7hSSvq9l1J2RskXfc= - -pg-connection-string@2.1.0: - version "2.1.0" - resolved "https://registry.yarnpkg.com/pg-connection-string/-/pg-connection-string-2.1.0.tgz#e07258f280476540b24818ebb5dca29e101ca502" - integrity sha512-bhlV7Eq09JrRIvo1eKngpwuqKtJnNhZdpdOlvrPrA4dxqXPjxSrbNrfnIDmTpwMyRszrcV4kU5ZA4mMsQUrjdg== - -pg-connection-string@^2.5.0: +pg-connection-string@2.5.0, pg-connection-string@^2.5.0: version "2.5.0" resolved "https://registry.yarnpkg.com/pg-connection-string/-/pg-connection-string-2.5.0.tgz#538cadd0f7e603fc09a12590f3b8a452c2c0cf34" integrity sha512-r5o/V/ORTA6TmUnyWZR9nCj1klXCO2CEKNRlVuJptZe85QuhFayC7WeMic7ndayT5IRIR0S0xFxFi2ousartlQ== @@ -7494,11 +8250,6 @@ pg-int8@1.0.1: resolved "https://registry.yarnpkg.com/pg-int8/-/pg-int8-1.0.1.tgz#943bd463bf5b71b4170115f80f8efc9a0c0eb78c" integrity sha512-WCtabS6t3c8SkpDBUlb1kjOs7l66xsGdKpIPZsg4wR+B3+u9UAum2odSsF9tnvxg80h4ZxLWMy4pRjOsFIqQpw== -pg-pool@^2.0.4: - version "2.0.6" - resolved "https://registry.yarnpkg.com/pg-pool/-/pg-pool-2.0.6.tgz#7b561a482feb0a0e599b58b5137fd2db3ad8111c" - integrity sha512-hod2zYQxM8Gt482q+qONGTYcg/qVcV32VHVPtktbBJs0us3Dj7xibISw0BAAXVMCzt8A/jhfJvpZaxUlqtqs0g== - pg-pool@^3.4.1: version "3.4.1" resolved "https://registry.yarnpkg.com/pg-pool/-/pg-pool-3.4.1.tgz#0e71ce2c67b442a5e862a9c182172c37eda71e9c" @@ -7530,31 +8281,7 @@ pg-types@^2.1.0, pg-types@^2.2.0: postgres-date "~1.0.4" postgres-interval "^1.1.0" -pg-types@~2.0.0: - version "2.0.0" - resolved "https://registry.yarnpkg.com/pg-types/-/pg-types-2.0.0.tgz#038ddc302a0340efcdb46d0581cc7caa2303cbba" - integrity sha512-THUD7gQll5tys+5eQ8Rvs7DjHiIC3bLqixk3gMN9Hu8UrCBAOjf35FoI39rTGGc3lM2HU/R+Knpxvd11mCwOMA== - dependencies: - pg-int8 "1.0.1" - postgres-array "~2.0.0" - postgres-bytea "~1.0.0" - postgres-date "~1.0.0" - postgres-interval "^1.1.0" - -pg@^7.8.0: - version "7.9.0" - resolved "https://registry.yarnpkg.com/pg/-/pg-7.9.0.tgz#04f0024d810544463f47dbb5aada2486aa7dcc36" - integrity sha512-GkzteBFpsIoIBCSuomqik3IGvhqAtTr32jclR24RmUg170Jrn6ypwR97YalFHrsE1iaW8T0aAH13dmij8QUQ0g== - dependencies: - buffer-writer "2.0.0" - packet-reader "1.0.0" - pg-connection-string "0.1.3" - pg-pool "^2.0.4" - pg-types "~2.0.0" - pgpass "1.x" - semver "4.3.2" - -pg@^8.5.1, pg@^8.7.1: +pg@^8.0.3, pg@^8.5.1: version "8.7.1" resolved "https://registry.yarnpkg.com/pg/-/pg-8.7.1.tgz#9ea9d1ec225980c36f94e181d009ab9f4ce4c471" integrity sha512-7bdYcv7V6U3KAtWjpQJJBww0UEsWuh4yQ/EjNf2HeO/NnvKjpvhEIe/A/TleP6wtmSKnUnghs5A9jUoK6iDdkA== @@ -7649,7 +8376,20 @@ pkg-fetch@2.6.9: semver "^6.3.0" unique-temp-dir "^1.0.0" -pkg@^4.3.7, pkg@^4.5.1: +pkg-fetch@3.1.1: + version "3.1.1" + resolved "https://registry.yarnpkg.com/pkg-fetch/-/pkg-fetch-3.1.1.tgz#8f94115d926e71359ed96c211fe022b7a2452f8d" + integrity sha512-3GfpNwbwoTxge2TrVp6Oyz/FZJOoxF1r0+1YikOhnBXa2Di/VOJKtUObFHap76BFnyFo1fwh5vARWFR9TzLKUg== + dependencies: + chalk "^4.1.0" + fs-extra "^9.1.0" + https-proxy-agent "^5.0.0" + node-fetch "^2.6.1" + progress "^2.0.3" + semver "^7.3.5" + yargs "^16.2.0" + +pkg@^4.3.7: version "4.5.1" resolved "https://registry.yarnpkg.com/pkg/-/pkg-4.5.1.tgz#0f915110d726b17a7e66a76e5406ff9d393bccc8" integrity sha512-UXKL88jGQ+FD4//PyrFeRcqurVQ3BVIfUNaEU9cXY24EJz08JyBj85qrGh0CFGvyzNb1jpwHOnns5Sw0M5H92Q== @@ -7669,6 +8409,27 @@ pkg@^4.3.7, pkg@^4.5.1: resolve "^1.15.1" stream-meter "^1.0.4" +pkg@~5.2.0: + version "5.2.1" + resolved "https://registry.yarnpkg.com/pkg/-/pkg-5.2.1.tgz#929294d2dedbcd4427cfc00121a80c151a2a1d4c" + integrity sha512-kQ5Fla+76rdmFJNrEOgoklxAURl3uvhvX+m4LCQCRrI+q2lOgsx9pS02NhNuDVnyXsthluAuXCh5SKcMyw+cWw== + dependencies: + "@babel/parser" "7.13.13" + "@babel/types" "7.13.12" + chalk "^4.1.0" + escodegen "^2.0.0" + fs-extra "^9.1.0" + globby "^11.0.3" + into-stream "^6.0.0" + minimist "^1.2.5" + multistream "^4.1.0" + pkg-fetch "3.1.1" + prebuild-install "6.0.1" + progress "^2.0.3" + resolve "^1.20.0" + stream-meter "^1.0.4" + tslib "2.1.0" + pn@^1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/pn/-/pn-1.1.0.tgz#e2f4cef0e219f463c179ab37463e4e1ecdccbafb" @@ -7689,11 +8450,6 @@ postgres-bytea@~1.0.0: resolved "https://registry.yarnpkg.com/postgres-bytea/-/postgres-bytea-1.0.0.tgz#027b533c0aa890e26d172d47cf9ccecc521acd35" integrity sha1-AntTPAqokOJtFy1Hz5zOzFIazTU= -postgres-date@~1.0.0: - version "1.0.3" - resolved "https://registry.yarnpkg.com/postgres-date/-/postgres-date-1.0.3.tgz#e2d89702efdb258ff9d9cee0fe91bd06975257a8" - integrity sha1-4tiXAu/bJY/52c7g/pG9BpdSV6g= - postgres-date@~1.0.4: version "1.0.7" resolved "https://registry.yarnpkg.com/postgres-date/-/postgres-date-1.0.7.tgz#51bc086006005e5061c591cee727f2531bf641a8" @@ -7780,6 +8536,11 @@ process-nextick-args@^2.0.0, process-nextick-args@~2.0.0: resolved "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz#7820d9b16120cc55ca9ae7792680ae7dba6d7fe2" integrity sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag== +process@^0.10.0: + version "0.10.1" + resolved "https://registry.yarnpkg.com/process/-/process-0.10.1.tgz#842457cc51cfed72dc775afeeafb8c6034372725" + integrity sha512-dyIett8dgGIZ/TXKUzeYExt7WA6ldDzys9vTDU/cCA9L17Ypme+KzS+NjQCjpn9xsvi/shbMC+yP/BcFMBz0NA== + process@^0.11.1: version "0.11.10" resolved "https://registry.yarnpkg.com/process/-/process-0.11.10.tgz#7332300e840161bda3e69a1d1d91a7d4bc16f182" @@ -7790,6 +8551,13 @@ progress@^2.0.0, progress@^2.0.3: resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8" integrity sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA== +prom-client@^14.0.1: + version "14.0.1" + resolved "https://registry.yarnpkg.com/prom-client/-/prom-client-14.0.1.tgz#bdd9583e02ec95429677c0e013712d42ef1f86a8" + integrity sha512-HxTArb6fkOntQHoRGvv4qd/BkorjliiuO2uSWC2KC17MUTKYttWdDoXX/vxOhQdkoECEM9BBH0pj2l8G8kev6w== + dependencies: + tdigest "^0.1.1" + promised-retry@^0.4.0: version "0.4.0" resolved "https://registry.yarnpkg.com/promised-retry/-/promised-retry-0.4.0.tgz#c6ef32eda6651950fe5b64bf5b9611072a4b5a56" @@ -7803,6 +8571,25 @@ prompts@^2.0.1: kleur "^3.0.3" sisteransi "^1.0.5" +protobufjs@^6.11.2: + version "6.11.2" + resolved "https://registry.yarnpkg.com/protobufjs/-/protobufjs-6.11.2.tgz#de39fabd4ed32beaa08e9bb1e30d08544c1edf8b" + integrity sha512-4BQJoPooKJl2G9j3XftkIXjoC9C0Av2NOrWmbLWT1vH32GcSUHjM0Arra6UfTsVyfMAuFzaLucXn1sadxJydAw== + dependencies: + "@protobufjs/aspromise" "^1.1.2" + "@protobufjs/base64" "^1.1.2" + "@protobufjs/codegen" "^2.0.4" + "@protobufjs/eventemitter" "^1.1.0" + "@protobufjs/fetch" "^1.1.0" + "@protobufjs/float" "^1.0.2" + "@protobufjs/inquire" "^1.1.0" + "@protobufjs/path" "^1.1.2" + "@protobufjs/pool" "^1.1.0" + "@protobufjs/utf8" "^1.1.0" + "@types/long" "^4.0.1" + "@types/node" ">=13.7.0" + long "^4.0.0" + proxy-addr@~2.0.4: version "2.0.4" resolved "https://registry.yarnpkg.com/proxy-addr/-/proxy-addr-2.0.4.tgz#ecfc733bf22ff8c6f407fa275327b9ab67e48b93" @@ -7846,6 +8633,11 @@ pumpify@^1.3.5: inherits "^2.0.3" pump "^2.0.0" +punycode@1.3.2: + version "1.3.2" + resolved "https://registry.yarnpkg.com/punycode/-/punycode-1.3.2.tgz#9653a036fb7c1ee42342f2325cceefea3926c48d" + integrity sha1-llOgNvt8HuQjQvIyXM7v6jkmxI0= + punycode@2.x.x, punycode@^2.1.0, punycode@^2.1.1: version "2.1.1" resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.1.1.tgz#b58b010ac40c22c5657616c8d2c2c02c7bf479ec" @@ -7873,6 +8665,11 @@ qs@^6.9.4: dependencies: side-channel "^1.0.4" +querystring@0.2.0: + version "0.2.0" + resolved "https://registry.yarnpkg.com/querystring/-/querystring-0.2.0.tgz#b209849203bb25df820da756e747005878521620" + integrity sha1-sgmEkgO7Jd+CDadW50cAWHhSFiA= + quick-lru@^4.0.1: version "4.0.1" resolved "https://registry.yarnpkg.com/quick-lru/-/quick-lru-4.0.1.tgz#5b8878f113a58217848c6482026c73e1ba57727f" @@ -8049,6 +8846,13 @@ realpath-native@^1.1.0: dependencies: util.promisify "^1.0.0" +rechoir@0.7.0: + version "0.7.0" + resolved "https://registry.yarnpkg.com/rechoir/-/rechoir-0.7.0.tgz#32650fd52c21ab252aa5d65b19310441c7e03aca" + integrity sha512-ADsDEH2bvbjltXEP+hTIAmeFekTFK0V2BTxMkok6qILyAJEXV0AFfoWcAq4yfll5VdIMd/RVXq0lR+wQi5ZU3Q== + dependencies: + resolve "^1.9.0" + rechoir@^0.6.2: version "0.6.2" resolved "https://registry.yarnpkg.com/rechoir/-/rechoir-0.6.2.tgz#85204b54dba82d5742e28c96756ef43af50e3384" @@ -8111,6 +8915,11 @@ regex-not@^1.0.0, regex-not@^1.0.2: extend-shallow "^3.0.2" safe-regex "^1.1.0" +regex-parser@^2.2.11: + version "2.2.11" + resolved "https://registry.yarnpkg.com/regex-parser/-/regex-parser-2.2.11.tgz#3b37ec9049e19479806e878cabe7c1ca83ccfe58" + integrity sha512-jbD/FT0+9MBU2XAZluI7w2OBs1RBi6p9M83nkoZayQXXU9e8Robt69FcZc7wU4eJD/YFTjn1JdCk3rbMJajz8Q== + regexpp@^3.0.0, regexpp@^3.1.0: version "3.1.0" resolved "https://registry.yarnpkg.com/regexpp/-/regexpp-3.1.0.tgz#206d0ad0a5648cffbdb8ae46438f3dc51c9f78e2" @@ -8292,6 +9101,15 @@ require-from-string@^2.0.2: resolved "https://registry.yarnpkg.com/require-from-string/-/require-from-string-2.0.2.tgz#89a7fdd938261267318eafe14f9c32e598c36909" integrity sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw== +require-in-the-middle@^5.0.3: + version "5.1.0" + resolved "https://registry.yarnpkg.com/require-in-the-middle/-/require-in-the-middle-5.1.0.tgz#b768f800377b47526d026bbf5a7f727f16eb412f" + integrity sha512-M2rLKVupQfJ5lf9OvqFGIT+9iVLnTmjgbOmpil12hiSQNn5zJTKGPoIisETNjfK+09vP3rpm1zJajmErpr2sEQ== + dependencies: + debug "^4.1.1" + module-details-from-path "^1.0.3" + resolve "^1.12.0" + require-main-filename@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/require-main-filename/-/require-main-filename-1.0.1.tgz#97f717b69d48784f5f526a6c5aa8ffdda055a4d1" @@ -8327,6 +9145,11 @@ resolve-from@^4.0.0: resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-4.0.0.tgz#4abcd852ad32dd7baabfe9b40e00a36db5f392e6" integrity sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g== +resolve-from@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-5.0.0.tgz#c35225843df8f776df21c57557bc087e9dfdfc69" + integrity sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw== + resolve-options@^1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/resolve-options/-/resolve-options-1.1.0.tgz#32bb9e39c06d67338dc9378c0d6d6074566ad131" @@ -8344,7 +9167,7 @@ resolve@1.1.7: resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.1.7.tgz#203114d82ad2c5ed9e8e0411b3932875e889e97b" integrity sha1-IDEU2CrSxe2ejgQRs5ModeiJ6Xs= -resolve@1.x, resolve@^1.0.0, resolve@^1.13.1, resolve@^1.14.2, resolve@^1.17.0, resolve@^1.20.0, resolve@^1.4.0: +resolve@^1.0.0, resolve@^1.12.0, resolve@^1.13.1, resolve@^1.14.2, resolve@^1.17.0, resolve@^1.20.0, resolve@^1.4.0: version "1.20.0" resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.20.0.tgz#629a013fb3f70755d6f0b7935cc1c2c5378b1975" integrity sha512-wENBPt4ySzg4ybFQW2TT1zMQucPK95HSh/nq2CFTZVOGut2+pQvSsgtda4d26YrYcr067wjbmzOG8byDPBX63A== @@ -8367,6 +9190,15 @@ resolve@^1.15.1: is-core-module "^2.0.0" path-parse "^1.0.6" +resolve@^1.9.0: + version "1.21.0" + resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.21.0.tgz#b51adc97f3472e6a5cf4444d34bc9d6b9037591f" + integrity sha512-3wCbTpk5WJlyE4mSOtDLhqQmGFi0/TD9VPwmiolnk8U0wRgMEktqCXd3vy5buTO3tljvalNvKrjHEfrd2WpEKA== + dependencies: + is-core-module "^2.8.0" + path-parse "^1.0.7" + supports-preserve-symlinks-flag "^1.0.0" + ret@~0.1.10: version "0.1.15" resolved "https://registry.yarnpkg.com/ret/-/ret-0.1.15.tgz#b8a4825d5bdb1fc3f6f53c2bc33f81388681c7bc" @@ -8477,24 +9309,19 @@ semver-greatest-satisfied-range@^1.1.0: resolved "https://registry.yarnpkg.com/semver/-/semver-5.7.0.tgz#790a7cf6fea5459bac96110b29b60412dc8ff96b" integrity sha512-Ya52jSX2u7QKghxeoFGpLwCtGlt7j0oY9DYb5apt9nPlJ42ID+ulTXESnt/qAQcoSERyZ5sl3LDIOw0nAn/5DA== -semver@4.3.2: - version "4.3.2" - resolved "https://registry.yarnpkg.com/semver/-/semver-4.3.2.tgz#c7a07158a80bedd052355b770d82d6640f803be7" - integrity sha1-x6BxWKgL7dBSNVt3DYLWZA+AO+c= - semver@7.0.0: version "7.0.0" resolved "https://registry.yarnpkg.com/semver/-/semver-7.0.0.tgz#5f3ca35761e47e05b206c6daff2cf814f0316b8e" integrity sha512-+GB6zVA9LWh6zovYQLALHwv5rb2PHGlJi3lfiqIHxR0uuwCgefcOJc59v9fv1w8GbStwxuuqqAjI9NMAOOgq1A== -semver@7.x, semver@^7.2.1, semver@^7.3.4, semver@^7.3.5: +semver@7.3.5, semver@^7.2.1, semver@^7.3.4, semver@^7.3.5: version "7.3.5" resolved "https://registry.yarnpkg.com/semver/-/semver-7.3.5.tgz#0b621c879348d8998e4b0e4be94b3f12e6018ef7" integrity sha512-PoeGJYh8HK4BTO/a9Tf6ZG3veo/A7ZVsYrSA6J8ny9nb3B1VrpkuN+z9OE5wfE5p6H4LchYZsegiQgbJD94ZFQ== dependencies: lru-cache "^6.0.0" -semver@^5.4.1, semver@^5.5, semver@^5.7.1: +semver@^5.4.1, semver@^5.7.1: version "5.7.1" resolved "https://registry.yarnpkg.com/semver/-/semver-5.7.1.tgz#a954f931aeba508d307bbf069eff0c01c96116f7" integrity sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ== @@ -8608,6 +9435,11 @@ shellwords@^0.1.1: resolved "https://registry.yarnpkg.com/shellwords/-/shellwords-0.1.1.tgz#d6b9181c1a48d397324c84871efbcfc73fc0654b" integrity sha512-vFwSUfQvqybiICwZY5+DAWIPLKsWO31Q91JSKl3UYv+K5c2QRPzn0qzec6QPu1Qc9eHYItiP3NdJqNVqetYAww== +shimmer@^1.2.1: + version "1.2.1" + resolved "https://registry.yarnpkg.com/shimmer/-/shimmer-1.2.1.tgz#610859f7de327b587efebf501fb43117f9aff337" + integrity sha512-sQTKC1Re/rM6XyFM6fIAGHRPVGvyXfgzIDvzoq608vM+jeyVD0Tu1E6Np0Kc2zAIFWIj963V2800iF/9LPieQw== + side-channel@^1.0.4: version "1.0.4" resolved "https://registry.yarnpkg.com/side-channel/-/side-channel-1.0.4.tgz#efce5c8fdc104ee751b25c58d4290011fa5ea2cf" @@ -8872,6 +9704,11 @@ string-length@^2.0.0: astral-regex "^1.0.0" strip-ansi "^4.0.0" +string-template@~0.2.1: + version "0.2.1" + resolved "https://registry.yarnpkg.com/string-template/-/string-template-0.2.1.tgz#42932e598a352d01fc22ec3367d9d84eec6c9add" + integrity sha512-Yptehjogou2xm4UJbxJ4CxgZx12HBfeystp0y3x7s4Dj32ltVVG1Gg8YhKjHZkHicuKpZX/ffilA8505VbUbpw== + string-width@^1.0.1, string-width@^1.0.2: version "1.0.2" resolved "https://registry.yarnpkg.com/string-width/-/string-width-1.0.2.tgz#118bdf5b8cdc51a2a7e70d211e07e2b0b9b107d3" @@ -8907,6 +9744,15 @@ string-width@^4.1.0, string-width@^4.2.0: is-fullwidth-code-point "^3.0.0" strip-ansi "^6.0.0" +string-width@^4.2.3: + version "4.2.3" + resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" + integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== + dependencies: + emoji-regex "^8.0.0" + is-fullwidth-code-point "^3.0.0" + strip-ansi "^6.0.1" + string.prototype.trimend@^1.0.4: version "1.0.4" resolved "https://registry.yarnpkg.com/string.prototype.trimend/-/string.prototype.trimend-1.0.4.tgz#e75ae90c2942c63504686c18b287b4a0b1a45f80" @@ -8965,6 +9811,13 @@ strip-ansi@^6.0.0: dependencies: ansi-regex "^5.0.0" +strip-ansi@^6.0.1: + version "6.0.1" + resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" + integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== + dependencies: + ansi-regex "^5.0.1" + strip-bom@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/strip-bom/-/strip-bom-2.0.0.tgz#6219a85616520491f35788bdbf1447a99c7e6b0e" @@ -9052,6 +9905,11 @@ supports-color@^7.1.0: dependencies: has-flag "^4.0.0" +supports-preserve-symlinks-flag@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz#6eda4bd344a3c94aea376d4cc31bc77311039e09" + integrity sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w== + sver-compat@^1.5.0: version "1.5.0" resolved "https://registry.yarnpkg.com/sver-compat/-/sver-compat-1.5.0.tgz#3cf87dfeb4d07b4a3f14827bc186b3fd0c645cd8" @@ -9137,10 +9995,17 @@ tar@^6.0.2: mkdirp "^1.0.3" yallist "^4.0.0" -tarn@^2.0.0: - version "2.0.0" - resolved "https://registry.yarnpkg.com/tarn/-/tarn-2.0.0.tgz#c68499f69881f99ae955b4317ca7d212d942fdee" - integrity sha512-7rNMCZd3s9bhQh47ksAQd92ADFcJUjjbyOvyFjNLwTPpGieFHMC84S+LOzw0fx1uh6hnDz/19r8CPMnIjJlMMA== +tarn@^3.0.1: + version "3.0.2" + resolved "https://registry.yarnpkg.com/tarn/-/tarn-3.0.2.tgz#73b6140fbb881b71559c4f8bfde3d9a4b3d27693" + integrity sha512-51LAVKUSZSVfI05vjPESNc5vwqqZpbXCsU+/+wxlOrUjk2SnFTt97v9ZgQrD4YmxYW1Px6w2KjaDitCfkvgxMQ== + +tdigest@^0.1.1: + version "0.1.1" + resolved "https://registry.yarnpkg.com/tdigest/-/tdigest-0.1.1.tgz#2e3cb2c39ea449e55d1e6cd91117accca4588021" + integrity sha1-Ljyyw56kSeVdHmzZEReszKRYgCE= + dependencies: + bintrees "1.0.1" temp-dir@^1.0.0: version "1.0.0" @@ -9178,6 +10043,15 @@ text-table@^0.2.0: resolved "https://registry.yarnpkg.com/text-table/-/text-table-0.2.0.tgz#7f5ee823ae805207c00af2df4a84ec3fcfa570b4" integrity sha1-f17oI66AUgfACvLfSoTsP8+lcLQ= +thriftrw@^3.5.0: + version "3.12.0" + resolved "https://registry.yarnpkg.com/thriftrw/-/thriftrw-3.12.0.tgz#30857847755e7f036b2e0a79d11c9f55075539d9" + integrity sha512-4YZvR4DPEI41n4Opwr4jmrLGG4hndxr7387kzRFIIzxHQjarPusH4lGXrugvgb7TtPrfZVTpZCVe44/xUxowEw== + dependencies: + bufrw "^1.3.0" + error "7.0.2" + long "^2.4.0" + throat@^4.0.0: version "4.1.0" resolved "https://registry.yarnpkg.com/throat/-/throat-4.1.0.tgz#89037cbc92c56ab18926e6ba4cbb200e15672a6a" @@ -9325,6 +10199,11 @@ tr46@^1.0.1: dependencies: punycode "^2.1.0" +tr46@~0.0.3: + version "0.0.3" + resolved "https://registry.yarnpkg.com/tr46/-/tr46-0.0.3.tgz#8184fd347dac9cdc185992f3a6622e14b9d9ab6a" + integrity sha1-gYT9NH2snNwYWZLzpmIuFLnZq2o= + "traverse@>=0.3.0 <0.4": version "0.3.9" resolved "https://registry.yarnpkg.com/traverse/-/traverse-0.3.9.tgz#717b8f220cc0bb7b44e40514c22b2e8bbc70d8b9" @@ -9350,37 +10229,10 @@ trim-off-newlines@^1.0.0: resolved "https://registry.yarnpkg.com/trim-off-newlines/-/trim-off-newlines-1.0.1.tgz#9f9ba9d9efa8764c387698bcbfeb2c848f11adb3" integrity sha1-n5up2e+odkw4dpi8v+sshI8RrbM= -ts-jest@^24.3.0: - version "24.3.0" - resolved "https://registry.yarnpkg.com/ts-jest/-/ts-jest-24.3.0.tgz#b97814e3eab359ea840a1ac112deae68aa440869" - integrity sha512-Hb94C/+QRIgjVZlJyiWwouYUF+siNJHJHknyspaOcZ+OQAIdFG/UrdQVXw/0B8Z3No34xkUXZJpOTy9alOWdVQ== - dependencies: - bs-logger "0.x" - buffer-from "1.x" - fast-json-stable-stringify "2.x" - json5 "2.x" - lodash.memoize "4.x" - make-error "1.x" - mkdirp "0.x" - resolve "1.x" - semver "^5.5" - yargs-parser "10.x" - -ts-jest@^26.5.5: - version "26.5.5" - resolved "https://registry.yarnpkg.com/ts-jest/-/ts-jest-26.5.5.tgz#e40481b6ee4dd162626ba481a2be05fa57160ea5" - integrity sha512-7tP4m+silwt1NHqzNRAPjW1BswnAhopTdc2K3HEkRZjF0ZG2F/e/ypVH0xiZIMfItFtD3CX0XFbwPzp9fIEUVg== - dependencies: - bs-logger "0.x" - buffer-from "1.x" - fast-json-stable-stringify "2.x" - jest-util "^26.1.0" - json5 "2.x" - lodash "4.x" - make-error "1.x" - mkdirp "1.x" - semver "7.x" - yargs-parser "20.x" +ts-essentials@7.0.1: + version "7.0.1" + resolved "https://registry.yarnpkg.com/ts-essentials/-/ts-essentials-7.0.1.tgz#d205508cae0cdadfb73c89503140cf2228389e2d" + integrity sha512-8lwh3QJtIc1UWhkQtr9XuksXu3O0YQdEE5g79guDfhCaU1FWTDIEDZ1ZSx4HTHUmlJZ8L812j3BZQ4a0aOUkSA== ts-node-dev@^1.1.6: version "1.1.8" @@ -9448,6 +10300,16 @@ tsconfig@^7.0.0: strip-bom "^3.0.0" strip-json-comments "^2.0.0" +tslib@2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.1.0.tgz#da60860f1c2ecaa5703ab7d39bc05b6bf988b97a" + integrity sha512-hcVC3wYEziELGGmEEXue7D75zbwIIVUMWAVbHItGPx0ziyXxrOMQx4rQEVEV45Ut/1IotuEvwqPopzIOkDMf0A== + +tslib@2.2.0: + version "2.2.0" + resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.2.0.tgz#fb2c475977e35e241311ede2693cee1ec6698f5c" + integrity sha512-gS9GVHRU+RGn5KQM2rllAlR3dU6m7AcpJKdtH8gFvQiC4Otgk98XnmMU+nZenHt/+VhnBPWwgrJsyrdcw6i23w== + tslib@^1.8.1, tslib@^1.9.3: version "1.14.1" resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.14.1.tgz#cf2d38bdc34a134bcaf1091c41f6619e2f672d00" @@ -9486,6 +10348,11 @@ type-check@~0.3.2: dependencies: prelude-ls "~1.1.2" +type-detect@^4.0.0, type-detect@^4.0.5: + version "4.0.8" + resolved "https://registry.yarnpkg.com/type-detect/-/type-detect-4.0.8.tgz#7646fb5f18871cfbb7749e69bd39a6388eb7450c" + integrity sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g== + type-fest@^0.18.0: version "0.18.1" resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.18.1.tgz#db4bc151a4a2cf4eebf9add5db75508db6cc841f" @@ -9534,10 +10401,10 @@ typedarray@^0.0.6: resolved "https://registry.yarnpkg.com/typedarray/-/typedarray-0.0.6.tgz#867ac74e3864187b1d3d47d996a78ec5c8830777" integrity sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c= -typescript@^3.9.10: - version "3.9.10" - resolved "https://registry.yarnpkg.com/typescript/-/typescript-3.9.10.tgz#70f3910ac7a51ed6bef79da7800690b19bf778b8" - integrity sha512-w6fIxVE/H1PkLKcCPsFqKE7Kv7QUwhU8qQY2MueZXWx5cPZdwFupLgKK3vntcK98BtNHZtAF4LA/yl2a7k8R6Q== +typescript@^5.0.4: + version "5.0.4" + resolved "https://registry.yarnpkg.com/typescript/-/typescript-5.0.4.tgz#b217fd20119bd61a94d4011274e0ab369058da3b" + integrity sha512-cW9T5W9xY37cc+jfEnaUvX91foxtHkza3Nw3wkoF4sSlKn0MONdkdEndig/qPBWXNkmplh3NzayQzCiHM4/hqw== uglify-js@^3.1.4: version "3.13.8" @@ -9695,6 +10562,19 @@ urix@^0.1.0: resolved "https://registry.yarnpkg.com/urix/-/urix-0.1.0.tgz#da937f7a62e21fec1fd18d49b35c2935067a6c72" integrity sha1-2pN/emLiH+wf0Y1Js1wpNQZ6bHI= +url-value-parser@2.0.3: + version "2.0.3" + resolved "https://registry.yarnpkg.com/url-value-parser/-/url-value-parser-2.0.3.tgz#cd4b8d6754e458d65e8125260c09718d926e6e21" + integrity sha512-FjIX+Q9lYmDM9uYIGdMYfQW0uLbWVwN2NrL2ayAI7BTOvEwzH+VoDdNquwB9h4dFAx+u6mb0ONLa3sHD5DvyvA== + +url@0.11.0: + version "0.11.0" + resolved "https://registry.yarnpkg.com/url/-/url-0.11.0.tgz#3838e97cfc60521eb73c525a8e55bfdd9e2e28f1" + integrity sha1-ODjpfPxgUh63PFJajlW/3Z4uKPE= + dependencies: + punycode "1.3.2" + querystring "0.2.0" + use@^3.1.0: version "3.1.1" resolved "https://registry.yarnpkg.com/use/-/use-3.1.1.tgz#d50c8cac79a19fbc20f2911f56eb973f4e10070f" @@ -9733,17 +10613,17 @@ uuid@^3.3.2: resolved "https://registry.yarnpkg.com/uuid/-/uuid-3.4.0.tgz#b23e4358afa8a202fe7a100af1f5f883f02007ee" integrity sha512-HjSDRw6gZE5JMggctHBcjVak08+KEVhSIiDzFnT9S9aegmp85S/bReBVTb4QTFaRNptJ9kuYaNhnbNEOkbKb/A== -uuid@^7.0.1: - version "7.0.3" - resolved "https://registry.yarnpkg.com/uuid/-/uuid-7.0.3.tgz#c5c9f2c8cf25dc0a372c4df1441c41f5bd0c680b" - integrity sha512-DPSke0pXhTZgoF/d+WSt2QaKMCFSfx7QegxEWT+JOuHF5aWrKEn0G+ztjuJg/gG8/ItK+rbPCD/yNv8yyih6Cg== +uuid@^8.3.2: + version "8.3.2" + resolved "https://registry.yarnpkg.com/uuid/-/uuid-8.3.2.tgz#80d5b5ced271bb9af6c445f21a1a04c606cefbe2" + integrity sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg== v8-compile-cache@^2.0.3: version "2.3.0" resolved "https://registry.yarnpkg.com/v8-compile-cache/-/v8-compile-cache-2.3.0.tgz#2de19618c66dc247dcfb6f99338035d8245a2cee" integrity sha512-l8lCEmLcLYZh4nbunNZvQCJc5pv7+RCwa8q/LdUx8u7lsWvPDKmpodJAJNwkAhJC//dFY48KuIEmjtd4RViDrA== -v8flags@^3.1.3, v8flags@^3.2.0: +v8flags@^3.2.0: version "3.2.0" resolved "https://registry.yarnpkg.com/v8flags/-/v8flags-3.2.0.tgz#b243e3b4dfd731fa774e7492128109a0fe66d656" integrity sha512-mH8etigqMfiGWdeXpaaqGfs6BndypxusHHcv2qSHyZkGEznCd/qAXCWWRzeowtL54147cktFOC4P5y+kl8d8Jg== @@ -9839,6 +10719,11 @@ walker@^1.0.7, walker@~1.0.5: dependencies: makeerror "1.0.x" +webidl-conversions@^3.0.0: + version "3.0.1" + resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-3.0.1.tgz#24534275e2a7bc6be7bc86611cc16ae0a5654871" + integrity sha1-JFNCdeKnvGvnvIZhHMFq4KVlSHE= + webidl-conversions@^4.0.2: version "4.0.2" resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-4.0.2.tgz#a855980b1f0b6b359ba1d5d9fb39ae941faa63ad" @@ -9856,6 +10741,14 @@ whatwg-mimetype@^2.1.0, whatwg-mimetype@^2.2.0: resolved "https://registry.yarnpkg.com/whatwg-mimetype/-/whatwg-mimetype-2.3.0.tgz#3d4b1e0312d2079879f826aff18dbeeca5960fbf" integrity sha512-M4yMwr6mAnQz76TbJm914+gPpB/nCwvZbJU28cUD6dR004SAxDLOOSUaB1JDRqLtaOV/vi0IC5lEAGFgrjGv/g== +whatwg-url@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/whatwg-url/-/whatwg-url-5.0.0.tgz#966454e8765462e37644d3626f6742ce8b70965d" + integrity sha1-lmRU6HZUYuN2RNNib2dCzotwll0= + dependencies: + tr46 "~0.0.3" + webidl-conversions "^3.0.0" + whatwg-url@^6.4.1: version "6.5.0" resolved "https://registry.yarnpkg.com/whatwg-url/-/whatwg-url-6.5.0.tgz#f2df02bff176fd65070df74ad5ccbb5a199965a8" @@ -9988,6 +10881,11 @@ xml-name-validator@^3.0.0: resolved "https://registry.yarnpkg.com/xml-name-validator/-/xml-name-validator-3.0.0.tgz#6ae73e06de4d8c6e47f9fb181f78d648ad457c6a" integrity sha512-A5CUptxDsvxKJEU3yO6DuWBSJz/qizqzJKOMIfUJHETbBw/sFaDxgd6fxm1ewUaM0jZ444Fc5vC5ROYurg/4Pw== +xorshift@^1.1.1: + version "1.2.0" + resolved "https://registry.yarnpkg.com/xorshift/-/xorshift-1.2.0.tgz#30a4cdd8e9f8d09d959ed2a88c42a09c660e8148" + integrity sha512-iYgNnGyeeJ4t6U11NpA/QiKy+PXn5Aa3Azg5qkwIFz1tBLllQrjjsk9yzD7IAK0naNU4JxdeDgqW9ov4u/hc4g== + xtend@^4.0.0: version "4.0.1" resolved "https://registry.yarnpkg.com/xtend/-/xtend-4.0.1.tgz#a5c6d532be656e23db820efb943a1f04998d63af" @@ -10033,18 +10931,6 @@ yallist@^4.0.0: resolved "https://registry.yarnpkg.com/yallist/-/yallist-4.0.0.tgz#9bb92790d9c0effec63be73519e11a35019a3a72" integrity sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A== -yargs-parser@10.x: - version "10.1.0" - resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-10.1.0.tgz#7202265b89f7e9e9f2e5765e0fe735a905edbaa8" - integrity sha512-VCIyR1wJoEBZUqk5PA+oOBF6ypbwh5aNB3I50guxAL/quggdfs4TtNHQrSazFA3fYZ+tEqfs0zIGlv0c/rgjbQ== - dependencies: - camelcase "^4.1.0" - -yargs-parser@20.x, yargs-parser@^20.2.3: - version "20.2.7" - resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.7.tgz#61df85c113edfb5a7a4e36eb8aa60ef423cbc90a" - integrity sha512-FiNkvbeHzB/syOjIUxFDCnhSfzAL8R5vs40MgLFBorXACCOAEaWu0gRZl14vG8MR9AOJIZbmkjhusqBYZ3HTHw== - yargs-parser@^13.1.1: version "13.1.1" resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-13.1.1.tgz#d26058532aa06d365fe091f6a1fc06b2f7e5eca0" @@ -10071,6 +10957,16 @@ yargs-parser@^20.2.2: resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.9.tgz#2eb7dc3b0289718fc295f362753845c41a0c94ee" integrity sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w== +yargs-parser@^20.2.3: + version "20.2.7" + resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.7.tgz#61df85c113edfb5a7a4e36eb8aa60ef423cbc90a" + integrity sha512-FiNkvbeHzB/syOjIUxFDCnhSfzAL8R5vs40MgLFBorXACCOAEaWu0gRZl14vG8MR9AOJIZbmkjhusqBYZ3HTHw== + +yargs-parser@^21.0.0: + version "21.0.1" + resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-21.0.1.tgz#0267f286c877a4f0f728fceb6f8a3e4cb95c6e35" + integrity sha512-9BK1jFpLzJROCI5TzwZL/TU4gqjK5xiHV/RfWLOahrjAko/e4DJkRDZQXfvqAsiZzzYhgAzbgz6lg48jcm4GLg== + yargs-parser@^5.0.1: version "5.0.1" resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-5.0.1.tgz#7ede329c1d8cdbbe209bd25cdb990e9b1ebbb394" @@ -10124,6 +11020,19 @@ yargs@^16.0.3: y18n "^5.0.1" yargs-parser "^20.0.0" +yargs@^16.2.0: + version "16.2.0" + resolved "https://registry.yarnpkg.com/yargs/-/yargs-16.2.0.tgz#1c82bf0f6b6a66eafce7ef30e376f49a12477f66" + integrity sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw== + dependencies: + cliui "^7.0.2" + escalade "^3.1.1" + get-caller-file "^2.0.5" + require-directory "^2.1.1" + string-width "^4.2.0" + y18n "^5.0.5" + yargs-parser "^20.2.2" + yargs@^17.2.1: version "17.2.1" resolved "https://registry.yarnpkg.com/yargs/-/yargs-17.2.1.tgz#e2c95b9796a0e1f7f3bf4427863b42e0418191ea" @@ -10137,6 +11046,19 @@ yargs@^17.2.1: y18n "^5.0.5" yargs-parser "^20.2.2" +yargs@^17.5.1: + version "17.5.1" + resolved "https://registry.yarnpkg.com/yargs/-/yargs-17.5.1.tgz#e109900cab6fcb7fd44b1d8249166feb0b36e58e" + integrity sha512-t6YAJcxDkNX7NFYiVtKvWUz8l+PaKTLiL63mJYWR2GnHq2gjEWISzsLp9wg3aY36dY1j+gfIEL3pIF+XlJJfbA== + dependencies: + cliui "^7.0.2" + escalade "^3.1.1" + get-caller-file "^2.0.5" + require-directory "^2.1.1" + string-width "^4.2.3" + y18n "^5.0.5" + yargs-parser "^21.0.0" + yargs@^7.1.0: version "7.1.2" resolved "https://registry.yarnpkg.com/yargs/-/yargs-7.1.2.tgz#63a0a5d42143879fdbb30370741374e0641d55db" @@ -10156,11 +11078,6 @@ yargs@^7.1.0: y18n "^3.2.1" yargs-parser "^5.0.1" -yn@*, yn@^4.0.0: - version "4.0.0" - resolved "https://registry.yarnpkg.com/yn/-/yn-4.0.0.tgz#611480051ea43b510da1dfdbe177ed159f00a979" - integrity sha512-huWiiCS4TxKc4SfgmTwW1K7JmXPPAmuXWYy4j9qjQo4+27Kni8mGhAAi1cloRWmBe2EqcLgt3IGqQoRL/MtPgg== - yn@3.1.1: version "3.1.1" resolved "https://registry.yarnpkg.com/yn/-/yn-3.1.1.tgz#1e87401a09d767c1d5eab26a6e4c185182d2eb50" @@ -10170,3 +11087,8 @@ yn@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/yn/-/yn-2.0.0.tgz#e5adabc8acf408f6385fc76495684c88e6af689a" integrity sha1-5a2ryKz0CPY4X8dklWhMiOavaJo= + +yn@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/yn/-/yn-4.0.0.tgz#611480051ea43b510da1dfdbe177ed159f00a979" + integrity sha512-huWiiCS4TxKc4SfgmTwW1K7JmXPPAmuXWYy4j9qjQo4+27Kni8mGhAAi1cloRWmBe2EqcLgt3IGqQoRL/MtPgg==