UKPLab · melolw · May 5, 2026 · May 5, 2026 · May 11, 2026 · May 11, 2026
diff --git a/backend/package-lock.json b/backend/package-lock.json
diff --git a/backend/webserver/routes/export.js b/backend/webserver/routes/export.js
@@ -5,6 +5,8 @@ const { faker } = require('@faker-js/faker');
 const JSZip = require('jszip');
 const { deriveUserSeed } = require('../auth/utils');
 const Papa = require('papaparse');
+const { dbToDelta } = require('editor-delta-conversion');
+const storageDir = path.join(__dirname, "..", "..", "..", "files");
 
 module.exports = function (server) {
 
@@ -35,9 +37,10 @@ module.exports = function (server) {
         }
 
         // Input parsing
-        const { projectId, exportType, generateAliases, fakerSeed } = req.body;
-        let { userIds = [] } = req.body;
+        const { projectId, exportType, generateAliases, fakerSeed, includeNonConsentingEdits } = req.body;
+        let { userIds = [], documentTypes = [0, 1, 2, 4] } = req.body;
         const shouldGenerateAliases = String(generateAliases) === 'true';
+        const shouldIncludeNonConsenting = String(includeNonConsentingEdits) === 'true';
 
         try {
             userIds = typeof userIds === 'string' ? JSON.parse(userIds) : userIds;
@@ -47,6 +50,14 @@ module.exports = function (server) {
             userIds = [];
         }
 
+        try {
+            documentTypes = typeof documentTypes === 'string' ? JSON.parse(documentTypes) : documentTypes;
+            if (!Array.isArray(documentTypes)) documentTypes = [0, 1, 2, 4];
+        } catch (e) {
+            console.warn("Could not parse documentTypes:", documentTypes);
+            documentTypes = [0, 1, 2, 4];
+        }
+
         try {
             // check if the project is valid
             const projectCheck = await server.db.models.project.findOne({ where: { id: projectId } });
@@ -57,7 +68,7 @@ module.exports = function (server) {
 
             const users = await server.db.models.user.findAll({ where: { id: userIds } });
 
-            if (userIds.length === 0) {
+            if (userIds.length === 0 && exportType != 'documents') {
                 console.warn(`Export aborted: No authorized users to export.`);
                 return res.status(400).send("No authorized users to export.");
             }
@@ -94,6 +105,18 @@ module.exports = function (server) {
                         exportFolderName.split('.')[0],
                         archive
                     );
+                    break;
+                case 'documents':
+                    await processDocumentBasedExport(
+                        server,
+                        projectId,
+                        userIds,
+                        documentTypes,
+                        shouldIncludeNonConsenting,
+                        exportFolderName.split('.')[0],
+                        archive
+                    );
+                    break;
                 default:
                     console.warn(`Export type ${exportType} not implemented.`);
             }
@@ -230,7 +253,6 @@ module.exports = function (server) {
             1: ".html",
             4: ".zip"
         };
-        const storageDir = path.join(__dirname, "..", "..", "..", "files");
 
         for (const submission of submissions) {
             const student = users.find(u => u.id === submission.userId);
@@ -298,4 +320,221 @@ module.exports = function (server) {
         }
         return version;
     }
+
+    /**
+     * Converts a Quill Delta object to plain text by concatenating all insert strings.
+     * @param {Object} delta - A Quill Delta object with an `ops` array.
+     * @returns {string} - The plain text content of the delta.
+     */
+    function deltaToText(delta) {
+        return delta.ops
+            .map(op => (typeof op.insert === 'string' ? op.insert : ''))
+            .join('');
+    }
+
+    /**
+     * Converts a Quill Delta object to an HTML string.
+     * Each newline in the delta marks the end of a paragraph and is flushed as a <p> tag.
+     * Supports bold, italic, underline, and link attributes.
+     * @param {Object} delta - A Quill Delta object with an `ops` array.
+     * @returns {string} - A full HTML document string.
+     */
+    function deltaToHtml(delta) {
+        let html = '';
+        let lineBuffer = [];
+
+        const flushLine = () => {
+            html += '<p>' + (lineBuffer.join('') || '<br>') + '</p>\n';
+            lineBuffer = [];
+        };
+
+        for (const op of delta.ops) {
+            if (typeof op.insert !== 'string') continue;
+
+            const lines = op.insert.split('\n');
+            lines.forEach((segment, i) => {
+                if (segment.length > 0) {
+                    let content = segment
+                        .replace(/&/g, '&amp;')
+                        .replace(/</g, '&lt;')
+                        .replace(/>/g, '&gt;');
+
+                    if (op.attributes) {
+                        if (op.attributes.bold)      content = `<strong>${content}</strong>`;
+                        if (op.attributes.italic)    content = `<em>${content}</em>`;
+                        if (op.attributes.underline) content = `<u>${content}</u>`;
+                        if (op.attributes.link)      content = `<a href="${op.attributes.link}">${content}</a>`;
+                    }
+                    lineBuffer.push(content);
+                }
+                if (i < lines.length - 1) flushLine();
+            });
+        }
+        if (lineBuffer.length > 0) flushLine();
+
+        return `<!DOCTYPE html>\n<html>\n<body>\n${html}</body>\n</html>`;
+    }
+
+    /**
+     * Exports a single document to the archive based on its type.
+     * - Type 0 (PDF): exports annotations, comments (with votes), document_data, and the PDF file.
+     * - Type 1 (HTML) / Type 2 (Modal): exports edits, plain text, HTML, and document_data.
+     * - Type 4 (ZIP): exports the zip file and document_data.
+     * @param {Object} server - The server instance providing database models.
+     * @param {Object} doc - The document record from the database.
+     * @param {string} docFolder - The target folder path inside the archive.
+     * @param {Object} archive - The archiver instance to append files to.
+     * @returns {Promise<void>}
+     */
+    async function processDocumentForExport(server, doc, docFolder, includeNonConsentingEdits, archive) {
+        // document_data for all types, at the doc level
+        const documentData = await server.db.models.document_data.findAll({
+            where: { documentId: doc.id, deleted: false },
+            raw: true,
+        });
+        if (documentData.length > 0) {
+            archive.append(JSON.stringify(documentData, null, 2), { name: `${docFolder}/document_data.json` });
+        }
+
+        switch (doc.type) {
+            case 0: { // PDF
+                const [annotations, comments] = await Promise.all([
+                    server.db.models.annotation.findAll({ where: { documentId: doc.id }, raw: true }),
+                    server.db.models.comment.findAll({ where: { documentId: doc.id }, raw: true }),
+                ]);
+                const commentVotes = await server.db.models.comment_vote.findAll({
+                    where: { commentId: comments.map(c => c.id), deleted: false },
+                    raw: true,
+                });
+                const commentsWithVotes = comments.map(c => ({
+                    ...c,
+                    votes: commentVotes.filter(v => v.commentId === c.id),
+                }));
+                if (annotations.length > 0) {
+                    archive.append(JSON.stringify(annotations, null, 2), { name: `${docFolder}/annotations.json` });
+                }
+                if (commentsWithVotes.length > 0) {
+                    archive.append(JSON.stringify(commentsWithVotes, null, 2), { name: `${docFolder}/comments.json` });
+                }
+                const pdfPath = path.join(storageDir, `${doc.hash}.pdf`);
+                if (fs.existsSync(pdfPath)) {
+                    archive.file(pdfPath, { name: `${docFolder}/document.pdf` });
+                } else {
+                    console.warn(`[DocumentExport] PDF not found for document ${doc.hash}`);
+                }
+                break;
+            }
+
+            case 1: // HTML
+            case 2: { // MODAL
+                // fetch all edits for this document, ordered chronologically
+                let allEdits = await server.db.models.document_edit.findAll({
+                    where: { documentId: doc.id, deleted: false },
+                    order: [['createdAt', 'ASC']],
+                    raw: true,
+                });
+
+                // filter by consent unless the option is enabled
+                if (!includeNonConsentingEdits) {
+                    const editorUserIds = [...new Set(allEdits.map(e => e.userId).filter(Boolean))];
+                    const editorUsers = await server.db.models.user.findAll({
+                        where: { id: editorUserIds },
+                        attributes: ['id', 'acceptDataSharing'],
+                        raw: true,
+                    });
+                    const consentedUserIds = new Set(
+                        editorUsers.filter(u => u.acceptDataSharing).map(u => u.id)
+                    );
+                    allEdits = allEdits.filter(e => !e.userId || consentedUserIds.has(e.userId));
+                }
+
+                // group edits by studySessionId (null = template)
+                const sessionGroups = new Map();
+                for (const edit of allEdits) {
+                    const key = edit.studySessionId ?? '__template__';
+                    if (!sessionGroups.has(key)) sessionGroups.set(key, []);
+                    sessionGroups.get(key).push(edit);
+                }
+
+                // fetch study sessions to resolve hashes
+                const sessionIds = [...sessionGroups.keys()].filter(k => k !== '__template__');
+                const sessions = sessionIds.length > 0
+                    ? await server.db.models.study_session.findAll({
+                        where: { id: sessionIds },
+                        attributes: ['id', 'hash'],
+                        raw: true,
+                    })
+                    : [];
+                const sessionHashMap = new Map(sessions.map(s => [s.id, s.hash]));
+
+                for (const [key, edits] of sessionGroups.entries()) {
+                    const isTemplate = key === '__template__';
+                    const delta = dbToDelta(edits);
+
+                    // skip empty content
+                    const text = deltaToText(delta);
+                    if (!text.trim()) continue;
+
+                    const subFolder = isTemplate
+                        ? `${docFolder}/template`
+                        : `${docFolder}/${sessionHashMap.get(key) ?? key}`;
+
+                    archive.append(text,                                  { name: `${subFolder}/text.txt` });
+                    archive.append(deltaToHtml(delta),                    { name: `${subFolder}/html.html` });
+                    archive.append(JSON.stringify(edits, null, 2),        { name: `${subFolder}/edits.json` });
+                }
+                break;
+            }
+
+            case 4: { // ZIP — unchanged
+                const zipPath = path.join(storageDir, `${doc.hash}.zip`);
+                if (fs.existsSync(zipPath)) {
+                    archive.file(zipPath, { name: `${docFolder}/document.zip` });
+                } else {
+                    console.warn(`[DocumentExport] ZIP not found for document ${doc.hash}`);
+                }
+                break;
+            }
+
+            default:
+                console.warn(`[DocumentExport] Unhandled document type ${doc.type} for document ${doc.hash}, skipping.`);
+        }
+    }
+
+    /**
+     * Main export function for the "documents" export type.
+     * Fetches all studies and steps for a project, collects unique documents,
+     * filters by owner data sharing consent, and exports each document to the archive.
+     * @param {Object} server - The server instance providing database models.
+     * @param {number|string} projectId - The ID of the project to export.
+     * @param {string} baseFolderName - The root folder name inside the ZIP archive.
+     * @param {Object} archive - The archiver instance to append files to.
+     * @param {Array<number>} userIds - List of user IDs to filter documents by.
+     * @param {Array<number>} documentTypes - List of document types to include (0=PDF, 1=HTML, 2=Modal, 4=ZIP).
+     * @returns {Promise<void>}
+     */
+    async function processDocumentBasedExport(server, projectId, userIds, documentTypes, includeNonConsentingEdits, baseFolderName, archive) {
+        const docs = await server.db.models.document.findAll({
+            where: { projectId, userId: userIds, deleted: false, parentDocumentId: null },
+        });
+
+        if (docs.length === 0) {
+            console.warn(`[DocumentExport] No documents found for project ${projectId}`);
+            return;
+        }
+
+        const filteredDocs = docs.filter(doc =>
+            documentTypes.includes(doc.type) || documentTypes.includes(String(doc.type))
+        );
+
+        if (filteredDocs.length === 0) {
+            console.warn(`[DocumentExport] No documents matching selected types found for project ${projectId}`);
+            return;
+        }
+
+        for (const doc of filteredDocs) {
+            const docFolder = `${baseFolderName}/${doc.hash}`;
+            await processDocumentForExport(server, doc, docFolder, includeNonConsentingEdits, archive);
+        }
+    }
 };