Skip to content

Instantly share code, notes, and snippets.

@wuast94
Created April 22, 2025 13:04
Show Gist options
  • Save wuast94/5d788b577d8fc5d6c68b0820b9caf602 to your computer and use it in GitHub Desktop.
Save wuast94/5d788b577d8fc5d6c68b0820b9caf602 to your computer and use it in GitHub Desktop.
Nodered PDF Parseer
[
{
"id": "d197b28e9395b61b",
"type": "ui-file-input",
"z": "e93f3133440568a7",
"group": "8948f2c8ad9f604f",
"name": "",
"order": 1,
"width": 0,
"height": 0,
"topic": "topic",
"topicType": "msg",
"label": "File Input",
"icon": "paperclip",
"allowMultiple": false,
"accept": "",
"className": "",
"x": 600,
"y": 340,
"wires": [
[
"81ca42c6330d3361",
"d649e707401cb0a7"
]
]
},
{
"id": "1308ad842a729aae",
"type": "debug",
"z": "e93f3133440568a7",
"name": "SUCCESS",
"active": true,
"tosidebar": true,
"console": false,
"tostatus": false,
"complete": "true",
"targetType": "full",
"statusVal": "",
"statusType": "auto",
"x": 790,
"y": 380,
"wires": []
},
{
"id": "81ca42c6330d3361",
"type": "function",
"z": "e93f3133440568a7",
"name": "Parse PDF",
"func": "/**\n * Extracts text content from a PDF buffer using the globally available pdf-parse library.\n * Assumes 'pdfParse' has been added to functionGlobalContext in settings.js.\n * Expects msg.payload to be a Buffer containing the PDF data.\n * Sends the result to one of two outputs:\n * - Output 1: Success - msg.payload contains the extracted text (string).\n * msg.pdfInfo contains metadata from pdf-parse.\n * - Output 2: Failure - Original msg with msg.error containing the error object.\n *\n * Requires the 'pdf-parse' library (https://www.npmjs.com/package/pdf-parse)\n * Library MUST be configured in settings.js -> functionGlobalContext.\n * @param {NodeMessage & {payload: Buffer}} msg The input message.\n */\n\n// --- Input Validation ---\n// Check if pdfParse is available globally (configured in settings.js)\nif (typeof pdfParse === 'undefined') {\n // node.error can take the msg object as the second argument\n node.error(\"Global 'pdfParse' not found. Ensure 'pdf-parse' is installed and added to functionGlobalContext in settings.js, then restart Node-RED.\", msg);\n // Send to error output\n node.send([null, msg]);\n return; // Stop execution\n}\n\nif (!msg.payload || !Buffer.isBuffer(msg.payload)) {\n // CORRECTED: node.warn only takes one argument (the message string)\n node.warn(\"msg.payload is missing or not a Buffer.\");\n // Send to error output\n msg.error = new Error(\"Input payload is not a valid Buffer.\");\n node.send([null, msg]);\n return; // Stop execution\n}\n\nconst pdfBuffer = msg.payload;\n// node.log only takes one argument\nnode.log(`Received PDF buffer, size: ${pdfBuffer.length} bytes. Starting parsing.`);\n\n// --- Asynchronous PDF Parsing ---\n// Use the globally available pdfParse function\npdfParse(pdfBuffer)\n .then(function(data) {\n // --- Success ---\n // node.log only takes one argument\n node.log(`Successfully parsed PDF: ${data.numpages} pages, ${data.text.length} characters.`);\n\n // Prepare the output message for the success path (Output 1)\n msg.payload = data.text; // Replace buffer with extracted text\n\n // Optionally add other extracted info\n msg.pdfInfo = {\n numPages: data.numpages,\n numRender: data.numrender,\n info: data.info,\n metadata: data.metadata,\n version: data.version\n };\n\n // Send the message to the first output\n node.send([msg, null]);\n\n })\n .catch(function(err) {\n // --- Failure ---\n // node.error can take the msg object as the second argument\n node.error(\"Error parsing PDF: \" + err.message, msg);\n\n // Prepare the output message for the error path (Output 2)\n msg.error = err; // Add the error object\n\n // Send the original message (with error info) to the second output\n node.send([null, msg]);\n });\n\n// Return null because the work is asynchronous and handled by node.send()\nreturn null;",
"outputs": 2,
"timeout": 0,
"noerr": 0,
"initialize": "",
"finalize": "",
"libs": [
{
"var": "pdfParse",
"module": "pdf-parse"
}
],
"x": 610,
"y": 400,
"wires": [
[
"1308ad842a729aae"
],
[
"dd667b4ee49e474c"
]
]
},
{
"id": "dd667b4ee49e474c",
"type": "debug",
"z": "e93f3133440568a7",
"name": "FAIL",
"active": true,
"tosidebar": true,
"console": false,
"tostatus": false,
"complete": "true",
"targetType": "full",
"statusVal": "",
"statusType": "auto",
"x": 770,
"y": 420,
"wires": []
},
{
"id": "d649e707401cb0a7",
"type": "debug",
"z": "e93f3133440568a7",
"name": "RAW UPLOAD",
"active": true,
"tosidebar": true,
"console": false,
"tostatus": false,
"complete": "true",
"targetType": "full",
"statusVal": "",
"statusType": "auto",
"x": 780,
"y": 340,
"wires": []
},
{
"id": "01fb87c40c24d01c",
"type": "comment",
"z": "e93f3133440568a7",
"name": "UI Element, kann ueber die einstellungen optisch angepasst werden",
"info": "UI Element,",
"x": 280,
"y": 340,
"wires": []
},
{
"id": "8948f2c8ad9f604f",
"type": "ui-group",
"name": "Group 1",
"page": "afdf2c859e3351de",
"width": 6,
"height": 1,
"order": 1,
"showTitle": true,
"className": "",
"visible": true,
"disabled": false,
"groupType": "default"
},
{
"id": "afdf2c859e3351de",
"type": "ui-page",
"name": "Page 1",
"ui": "c44752c1f7bc883a",
"path": "/page1",
"icon": "home",
"layout": "grid",
"theme": "91ce8000ccb00b38",
"breakpoints": [
{
"name": "Default",
"px": 0,
"cols": 3
},
{
"name": "Tablet",
"px": 576,
"cols": 6
},
{
"name": "Small Desktop",
"px": 768,
"cols": 9
},
{
"name": "Desktop",
"px": 1024,
"cols": 12
}
],
"order": 1,
"className": "",
"visible": "true",
"disabled": "false"
},
{
"id": "c44752c1f7bc883a",
"type": "ui-base",
"name": "My Dashboard",
"path": "/dashboard",
"appIcon": "",
"includeClientData": true,
"acceptsClientConfig": [
"ui-notification",
"ui-control"
],
"showPathInSidebar": false,
"headerContent": "page",
"navigationStyle": "default",
"titleBarStyle": "default",
"showReconnectNotification": true,
"notificationDisplayTime": 1,
"showDisconnectNotification": true,
"allowInstall": true
},
{
"id": "91ce8000ccb00b38",
"type": "ui-theme",
"name": "Default Theme",
"colors": {
"surface": "#ffffff",
"primary": "#0094CE",
"bgPage": "#eeeeee",
"groupBg": "#ffffff",
"groupOutline": "#cccccc"
},
"sizes": {
"density": "default",
"pagePadding": "12px",
"groupGap": "12px",
"groupBorderRadius": "4px",
"widgetGap": "12px"
}
}
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment