-
Notifications
You must be signed in to change notification settings - Fork 10
/
index.html
110 lines (92 loc) · 4.09 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<script src="https://cdnjs.cloudflare.com/ajax/libs/downloadjs/1.4.8/download.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/umd/comlink.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/bluebird/3.5.4/bluebird.min.js"></script>
<link rel="stylesheet" type="text/css" href="https://cdnjs.cloudflare.com/ajax/libs/dropzone/5.1.1/dropzone.css">
<link rel="stylesheet" type="text/css" href="https://cdnjs.cloudflare.com/ajax/libs/dropzone/5.1.1/basic.css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/dropzone/5.1.1/dropzone.js"></script>
</head>
<body>
<h2>Demo of using wasm to extract first page of PDF</h2>
<div>Note: May not work on Android</div>
<form action=""
class="dropzone"
id="my-drop-zone"></form>
<textarea id="outtext" rows=24 cols=80>
Output: (More in DevTools Console)
</textarea>
<div style="margin-top: 20px;">
Using <a href="https://github.com/hhrutter/pdfcpu">https://github.com/hhrutter/pdfcpu</a>
</div>
<script>
(async function() {
if (!WebAssembly.instantiateStreaming) { // polyfill
WebAssembly.instantiateStreaming = async (resp, importObject) => {
const source = await (await resp).arrayBuffer()
return await WebAssembly.instantiate(source, importObject)
}
}
Dropzone.autoDiscover = false;
let myDropZone = new Dropzone('#my-drop-zone', {
url: '#',
acceptedFiles: ".pdf",
maxFiles: 1,
autoProcessQueue: false, // don't do real uploading (HTTP POST)
addRemoveLinks: true,
// previewTemplate: document.querySelector('#preview-template').innerHTML,
maxFilesize: Number.MAX_SAFE_INTEGER, // i.e. do not impose file size limit on DropZone, because we are not uploading any file
accept: function(file, done) {
console.log(file)
done()
processPDF(this.getAcceptedFiles()[0])
}
});
document.querySelector('div.dz-message span').innerText = "Drop PDF file here or click to select, only 1 file supported"
let GoWorker = Comlink.proxy(new Worker('worker.js'));
console.log("pre inst");
let worker = await new GoWorker();
console.log("post inst");
let outtext = document.querySelector("#outtext");
async function processPDF(file) {
let st = Date.now();
outtext.value += `Reading file ${file.name} ...\n`;
let reader = new FileReader();
async function callback(e) {
outtext.value += `DONE Reading file (took: ${Date.now() - st} ms)\n`;
let buffer = e.target.result.slice();
st = Date.now();
outtext.value += "Validating file...\n";
let result = await worker.validate(buffer);
outtext.value += `Done Validating file (took: ${Date.now() - st} ms)\n`;
outtext.value += `File valid: ${result} \n`;
if(!result) {
outtext.value += `====> Something wrong with the PDF file because it does not conform to the PDF spec, so this tool could not proceed to process it\n`;
outtext.value += `-----------\n\n`;
return;
}
st = Date.now();
outtext.value += "Extracting the first page to a separate PDF...\n";
// FIXME: extra copying of buffer
buffer = e.target.result.slice();
try {
result = await worker.extractPage(buffer, 1);
outtext.value += `DONE Extracting (took: ${Date.now() - st} ms)...\n`;
outtext.value += `-----------\n\n`;
download(new Blob([result]), 'first_page.pdf');
} catch(e) {
console.log("caught in index.html!", e);
outtext.value += `Error in extracting (took: ${Date.now() - st} ms)...\n`;
outtext.value += e + '\n';
outtext.value += `-----------\n\n`;
}
}
reader.onload = callback;
reader.readAsArrayBuffer(file);
}
})();
</script>
</body>
</html>