Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add markdown support using md4c #606

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion crengine/include/bookformats.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ typedef enum {
doc_format_pdb,
doc_format_odt,
doc_format_svg,
doc_format_max = doc_format_svg
doc_format_md,
doc_format_max = doc_format_md
// don't forget update getDocFormatName() when changing this enum
} doc_format_t;

Expand Down
32 changes: 32 additions & 0 deletions crengine/src/lvdocview.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "../include/fb3fmt.h"
#include "../include/docxfmt.h"
#include "../include/odtfmt.h"
#include "mdfmt.h"

/// to show page bounds rectangles
//#define SHOW_PAGE_RECT
Expand Down Expand Up @@ -4649,6 +4650,37 @@ bool LVDocView::LoadDocument(LVStreamRef stream, bool metadataOnly) {
}
#endif

#if (USE_MD4C == 1)
if (DetectMarkdownFormat(m_stream, stream->GetName())) {
CRLog::info("Markdown format detected");
createEmptyDocument();
m_doc->setProps(m_doc_props);
setRenderProps(0, 0);
setDocFormat(doc_format_md);
if (m_callback)
m_callback->OnLoadFileFormatDetected(doc_format_md);
updateDocStyleSheet();
bool res = ImportMarkdownDocument(m_stream, stream->GetName(), m_doc, m_callback, this);
if (!res) {
setDocFormat(doc_format_none);
createDefaultDocument(cs32("ERROR: Error reading Markdown format"), cs32("Cannot open document"));
if (m_callback) {
m_callback->OnLoadFileError(cs32("Error reading Markdown document"));
}
return false;
} else {
setRenderProps(0, 0);
REQUEST_RENDER("loadDocument")
if (m_callback) {
m_callback->OnLoadFileEnd();
//m_doc->compact();
m_doc->dumpStatistics();
}
return true;
}
}
#endif

bool repeat_recursively = false;
m_arc = LVOpenArchieve( m_stream );
if (!m_arc.isNull())
Expand Down
130 changes: 130 additions & 0 deletions crengine/src/mdfmt.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/***************************************************************************
* crengine-ng *
* Copyright (C) 2022,2024 Aleksey Chernov <[email protected]> *
* *
* This program is free software; you can redistribute it and/or *
* modify it under the terms of the GNU General Public License *
* as published by the Free Software Foundation; either version 2 *
* of the License, or (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the Free Software *
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, *
* MA 02110-1301, USA. *
***************************************************************************/

#include "mdfmt.h"

#if (USE_MD4C == 1)

#include <lvtinydom.h>

#include <string.h>

#include <md4c-html.h>

#define TEXT_PARSER_CHUNK_SIZE 16384

bool DetectMarkdownFormat(LVStreamRef stream, const lString32& fileName) {
// Check file extension
lString32 nm = fileName;
nm = nm.lowercase();
if (!nm.endsWith(".md"))
return false;
// Check file size
lvsize_t sz = stream->GetSize();
if (sz < 5 || sz > MARKDOWN_MAX_FILE_SIZE)
return false;
// Checking for compliance with the text format
LVTextParser textParser(stream, NULL, true);
bool res = textParser.CheckFormat();
stream->SetPos(0);
return res;
}

typedef struct cre_md4c_parse_data_tag
{
lString8* htmlData;
} cre_md4c_parse_data;

static void my_md4c_process_output(const MD_CHAR* chunk, MD_SIZE sz, void* userData) {
cre_md4c_parse_data* data = (cre_md4c_parse_data*)userData;
data->htmlData->append(chunk, sz);
}

bool ImportMarkdownDocument(LVStreamRef stream, const lString32& fileName, ldomDocument* doc, LVDocViewCallback* progressCallback, CacheLoadingCallback* formatCallback) {
if (doc->openFromCache(formatCallback)) {
if (progressCallback) {
progressCallback->OnLoadFileEnd();
}
return true;
}
bool res = false;
// Read stream
lString8 rawData;
lString8 htmlData;
char buffer[TEXT_PARSER_CHUNK_SIZE];
lvsize_t bytesRead = 0;
stream->SetPos(0);
while (stream->Read(buffer, TEXT_PARSER_CHUNK_SIZE, &bytesRead) == LVERR_OK) {
rawData.append(buffer, bytesRead);
if (bytesRead < TEXT_PARSER_CHUNK_SIZE)
break;
}
// Parse and convert to html
cre_md4c_parse_data parseData;
parseData.htmlData = &htmlData;
int parse_res = md_html(rawData.c_str(), rawData.length(), my_md4c_process_output, (void*)&parseData,
MD_FLAG_COLLAPSEWHITESPACE | MD_FLAG_TABLES | MD_FLAG_TASKLISTS |
MD_FLAG_STRIKETHROUGH | MD_FLAG_PERMISSIVEURLAUTOLINKS |
MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS |
MD_FLAG_LATEXMATHSPANS,
0);
rawData.clear();
if (0 != parse_res) {
// Parse failed
CRLog::error("MD4C: Failed to parse Markdown document!");
return res;
}
// Write document content to stream to parse them
lvsize_t result_len = htmlData.length();
lString32 title = LVExtractFilenameWithoutExtension(fileName);
lString8 gen_preamble = cs8("<html><head><title>") + UnicodeToUtf8(title) + cs8("</title></head><body>");
lString8 gen_tail = cs8("</body></html>");
lvsize_t dw;
LVStreamRef memStream = LVCreateMemoryStream();
res = !memStream.isNull();
if (res)
res = LVERR_OK == memStream->Write(gen_preamble.c_str(), gen_preamble.length(), &dw);
if (res)
res = dw == (lvsize_t)gen_preamble.length();
if (res) {
res = LVERR_OK == memStream->Write(htmlData.data(), result_len, &dw);
}
htmlData.clear();
if (res)
res = dw == result_len;
if (res)
res = LVERR_OK == memStream->Write(gen_tail.c_str(), gen_tail.length(), &dw);
if (res)
res = dw == (lvsize_t)gen_tail.length();
if (res) {
// Parse stream to document
ldomDocumentWriter writer(doc);
LVHTMLParser parser(memStream, &writer);
parser.setProgressCallback(progressCallback);
res = parser.CheckFormat() && parser.Parse();
}
if (res) {
doc->getProps()->setString(DOC_PROP_TITLE, title);
doc->buildTocFromHeadings();
}
return res;
}

#endif
42 changes: 42 additions & 0 deletions crengine/src/mdfmt.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/***************************************************************************
* crengine-ng *
* Copyright (C) 2022,2024 Aleksey Chernov <[email protected]> *
* *
* This program is free software; you can redistribute it and/or *
* modify it under the terms of the GNU General Public License *
* as published by the Free Software Foundation; either version 2 *
* of the License, or (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the Free Software *
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, *
* MA 02110-1301, USA. *
***************************************************************************/

#ifndef __MDFMT_H_INCLUDED__
#define __MDFMT_H_INCLUDED__

#include <crsetup.h>

#if (USE_MD4C == 1)

#include <lvstream.h>
#include <lvstring.h>

class ldomDocument;
class LVDocViewCallback;
class CacheLoadingCallback;

#define MARKDOWN_MAX_FILE_SIZE 10 * 1024 * 1024 // 10M

bool DetectMarkdownFormat(LVStreamRef stream, const lString32& fileName);
bool ImportMarkdownDocument(LVStreamRef stream, const lString32& fileName, ldomDocument* doc, LVDocViewCallback* progressCallback, CacheLoadingCallback* formatCallback);

#endif

#endif // __MDFMT_H_INCLUDED__
Loading