From 3da0e3d4bf8ebab910e1932e5844360e702f97c7 Mon Sep 17 00:00:00 2001 From: SleeplessOne1917 Date: Fri, 12 May 2023 05:28:33 -0400 Subject: [PATCH] feat: Add webpage translation functionality --- README.md | 24 +++++++++++++++++++ src/client.ts | 42 +++++++++++++++++++++++++------- src/errors.ts | 2 ++ src/index.ts | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 125 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 433ef8a..6094109 100644 --- a/README.md +++ b/README.md @@ -230,6 +230,30 @@ directly: - `getDocumentStatus()` (or `isDocumentTranslationComplete()`), and - `downloadDocument()` +#### Translating webpages + +Webpages can be translated as well by calling `translateWebpage()`. It has the same signature as `translateDocument` except for the +first parameter, which is the URL for the webpage you would like translated. + +```javascript +// Translate the English DeepL wikipedia page into German: +await translator.translateWebpage( + 'https://en.wikipedia.org/wiki/DeepL_Translator', + 'DeepLWiki.html', + 'en', + 'de' +); +``` + +Like `translateDocument()`, `translateWebpage()` wraps multiple API calls: uploading, polling status until +the translation is complete, and downloading. If your application needs to +execute these steps individually, you can instead use the following functions +directly: + +- `uploadWebpage()`, +- `getDocumentStatus()` (or `isDocumentTranslationComplete()`), and +- `downloadDocument()` + #### Document translation options - `formality`: same as in [Text translation options](#text-translation-options). diff --git a/src/client.ts b/src/client.ts index 4454e71..1073dce 100644 --- a/src/client.ts +++ b/src/client.ts @@ -106,14 +106,16 @@ export class HttpClient { url: string, timeoutMs: number, responseAsStream: boolean, + isDeepL: boolean, options: SendRequestOptions, ): AxiosRequestConfig { const headers = Object.assign({}, this.headers, options.headers); + logDebug(`isDeepL: ${isDeepL}`); const axiosRequestConfig: AxiosRequestConfig = { url, method, - baseURL: this.serverUrl, + baseURL: isDeepL ? this.serverUrl : undefined, headers, responseType: responseAsStream ? 'stream' : 'text', timeout: timeoutMs, @@ -147,19 +149,26 @@ export class HttpClient { /** * Makes API request retrying if necessary, and returns (as Promise) response. * @param method HTTP method, for example 'GET' - * @param url Path to endpoint, excluding base server URL. + * @param url Path to endpoint, excluding base server URL if DeepL API request, including base server URL if a webpage. * @param options Additional options controlling request. * @param responseAsStream Set to true if the return type is IncomingMessage. - * @return Fulfills with status code and response (as text or stream). + * @return Fulfills with status code, content type, and response (as text or stream). */ async sendRequestWithBackoff( method: HttpMethod, url: string, options?: SendRequestOptions, responseAsStream = false, - ): Promise<{ statusCode: number; content: TContent }> { + ): Promise<{ statusCode: number; content: TContent; contentType?: string }> { + let isDeepLUrl: boolean; + try { + isDeepLUrl = !!new URL(url); + } catch { + isDeepLUrl = true; + } + options = options === undefined ? {} : options; - logInfo(`Request to DeepL API ${method} ${url}`); + logInfo(`${isDeepLUrl ? 'Request to DeepL API' : 'Request to webpage'} ${method} ${url}`); logDebug(`Request details: ${options.data}`); const backoff = new BackoffTimer(); let response, error; @@ -170,8 +179,14 @@ export class HttpClient { url, timeoutMs, responseAsStream, + isDeepLUrl, options, ); + + if (!isDeepLUrl && axiosRequestConfig.headers) { + delete axiosRequestConfig.headers.Authorization; + } + try { response = await HttpClient.sendAxiosRequest(axiosRequestConfig); error = undefined; @@ -199,8 +214,12 @@ export class HttpClient { } if (response !== undefined) { - const { statusCode, content } = response; - logInfo(`DeepL API response ${method} ${url} ${statusCode}`); + const { statusCode, content, contentType } = response; + logInfo( + `${ + isDeepLUrl ? 'DeepL API response' : 'Webpage response' + } ${method} ${url} ${statusCode}${!isDeepLUrl ? ` ${contentType}` : ''}`, + ); if (!responseAsStream) { logDebug('Response details:', { content: content }); } @@ -217,7 +236,7 @@ export class HttpClient { */ private static async sendAxiosRequest( axiosRequestConfig: AxiosRequestConfig, - ): Promise<{ statusCode: number; content: TContent }> { + ): Promise<{ statusCode: number; content: TContent; contentType?: string }> { try { const response = await axios.request(axiosRequestConfig); @@ -227,7 +246,12 @@ export class HttpClient { response.data = JSON.stringify(response.data); } } - return { statusCode: response.status, content: response.data }; + + return { + statusCode: response.status, + content: response.data, + contentType: response.headers['content-type'], + }; } catch (axios_error_raw) { const axiosError = axios_error_raw as AxiosError; const message: string = axiosError.message || ''; diff --git a/src/errors.ts b/src/errors.ts index 17f5baf..78325f0 100644 --- a/src/errors.ts +++ b/src/errors.ts @@ -41,3 +41,5 @@ export class DocumentTranslationError extends DeepLError { export class GlossaryNotFoundError extends DeepLError {} export class DocumentNotReadyError extends DeepLError {} + +export class WebsiteDownloadError extends DeepLError {} diff --git a/src/index.ts b/src/index.ts index af2e45d..6f61dfe 100644 --- a/src/index.ts +++ b/src/index.ts @@ -11,6 +11,7 @@ import { GlossaryNotFoundError, QuotaExceededError, TooManyRequestsError, + WebsiteDownloadError, } from './errors'; import { GlossaryEntries } from './glossaryEntries'; import { @@ -645,6 +646,37 @@ export class Translator { } } + /** + * Uploads the HTML of the specified webpage to DeepL to translate into given target language, waits for + * translation to complete, then downloads translated webpage to specified output path. + * @param webpageUrl String or URL containing the URL of the webpage to be translated. + * @param outputFile String containing file path to create translated document, or Stream or + * FileHandle to write translated document content. + * @param sourceLang Language code of input document, or null to use auto-detection. + * @param targetLang Language code of language to translate into. + * @param options Optional DocumentTranslateOptions object containing additional options controlling translation. + * @return Fulfills with a DocumentStatus object for the completed translation. You can use the + * billedCharacters property to check how many characters were billed for the document. + * @throws {Error} If no file exists at the input file path, or a file already exists at the output file path. + * @throws {DocumentTranslationError} If any error occurs during document upload, translation or + * download. The `documentHandle` property of the error may be used to recover the document. + */ + async translateWebpage( + webpageUrl: string | URL, + outputFile: string | fs.WriteStream | fs.promises.FileHandle, + sourceLang: SourceLanguageCode | null, + targetLang: TargetLanguageCode, + options?: DocumentTranslateOptions, + ): Promise { + return this.translateDocument( + Buffer.from(await this.getContentFromWebpage(webpageUrl)), + outputFile, + sourceLang, + targetLang, + { filename: 'webpage.html', ...options }, + ); + } + /** * Uploads specified document to DeepL to translate into target language, and returns handle associated with the document. * @param inputFile String containing file path, stream containing file data, or FileHandle. @@ -709,6 +741,28 @@ export class Translator { } } + /** + * Uploads specified webpage HTML to DeepL to translate into target language, and returns handle associated with the document. + * @param webpageUrl String or URL containing the URL of the webpage to be translated. + * @param sourceLang Language code of input document, or null to use auto-detection. + * @param targetLang Language code of language to translate into. + * @param options Optional DocumentTranslateOptions object containing additional options controlling translation. + * @return Fulfills with DocumentHandle associated with the in-progress translation. + */ + async uploadWebpage( + webpageUrl: string | URL, + sourceLang: SourceLanguageCode | null, + targetLang: TargetLanguageCode, + options?: DocumentTranslateOptions, + ): Promise { + return this.uploadDocument( + Buffer.from(await this.getContentFromWebpage(webpageUrl)), + sourceLang, + targetLang, + { filename: 'webpage.html', ...options }, + ); + } + /** * Retrieves the status of the document translation associated with the given document handle. * @param handle Document handle associated with document. @@ -1003,6 +1057,18 @@ export class Translator { return libraryInfoString; } + private async getContentFromWebpage(webpageUrl: string | URL): Promise { + const { statusCode, content, contentType } = + await this.httpClient.sendRequestWithBackoff('GET', webpageUrl.toString()); + await checkStatusCode(statusCode, content); + + if (!contentType?.includes('text/html')) { + throw new WebsiteDownloadError('URL to translate must return HTML'); + } + + return content; + } + /** * HttpClient implements all HTTP requests and retries. * @private