Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion backend/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

247 changes: 243 additions & 4 deletions backend/webserver/routes/export.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ const { faker } = require('@faker-js/faker');
const JSZip = require('jszip');
const { deriveUserSeed } = require('../auth/utils');
const Papa = require('papaparse');
const { dbToDelta } = require('editor-delta-conversion');
const storageDir = path.join(__dirname, "..", "..", "..", "files");

module.exports = function (server) {

Expand Down Expand Up @@ -35,9 +37,10 @@ module.exports = function (server) {
}

// Input parsing
const { projectId, exportType, generateAliases, fakerSeed } = req.body;
let { userIds = [] } = req.body;
const { projectId, exportType, generateAliases, fakerSeed, includeNonConsentingEdits } = req.body;
let { userIds = [], documentTypes = [0, 1, 2, 4] } = req.body;
const shouldGenerateAliases = String(generateAliases) === 'true';
const shouldIncludeNonConsenting = String(includeNonConsentingEdits) === 'true';

try {
userIds = typeof userIds === 'string' ? JSON.parse(userIds) : userIds;
Expand All @@ -47,6 +50,14 @@ module.exports = function (server) {
userIds = [];
}

try {
documentTypes = typeof documentTypes === 'string' ? JSON.parse(documentTypes) : documentTypes;
if (!Array.isArray(documentTypes)) documentTypes = [0, 1, 2, 4];
} catch (e) {
console.warn("Could not parse documentTypes:", documentTypes);
documentTypes = [0, 1, 2, 4];
}

try {
// check if the project is valid
const projectCheck = await server.db.models.project.findOne({ where: { id: projectId } });
Expand All @@ -57,7 +68,7 @@ module.exports = function (server) {

const users = await server.db.models.user.findAll({ where: { id: userIds } });

if (userIds.length === 0) {
if (userIds.length === 0 && exportType != 'documents') {
console.warn(`Export aborted: No authorized users to export.`);
return res.status(400).send("No authorized users to export.");
}
Expand Down Expand Up @@ -94,6 +105,18 @@ module.exports = function (server) {
exportFolderName.split('.')[0],
archive
);
break;
case 'documents':
await processDocumentBasedExport(
server,
projectId,
userIds,
documentTypes,
shouldIncludeNonConsenting,
exportFolderName.split('.')[0],
archive
);
break;
default:
console.warn(`Export type ${exportType} not implemented.`);
}
Expand Down Expand Up @@ -230,7 +253,6 @@ module.exports = function (server) {
1: ".html",
4: ".zip"
};
const storageDir = path.join(__dirname, "..", "..", "..", "files");

for (const submission of submissions) {
const student = users.find(u => u.id === submission.userId);
Expand Down Expand Up @@ -298,4 +320,221 @@ module.exports = function (server) {
}
return version;
}

/**
* Converts a Quill Delta object to plain text by concatenating all insert strings.
* @param {Object} delta - A Quill Delta object with an `ops` array.
* @returns {string} - The plain text content of the delta.
*/
function deltaToText(delta) {
return delta.ops
.map(op => (typeof op.insert === 'string' ? op.insert : ''))
.join('');
}

/**
* Converts a Quill Delta object to an HTML string.
* Each newline in the delta marks the end of a paragraph and is flushed as a <p> tag.
* Supports bold, italic, underline, and link attributes.
* @param {Object} delta - A Quill Delta object with an `ops` array.
* @returns {string} - A full HTML document string.
*/
function deltaToHtml(delta) {
let html = '';
let lineBuffer = [];

const flushLine = () => {
html += '<p>' + (lineBuffer.join('') || '<br>') + '</p>\n';
lineBuffer = [];
};

for (const op of delta.ops) {
if (typeof op.insert !== 'string') continue;

const lines = op.insert.split('\n');
lines.forEach((segment, i) => {
if (segment.length > 0) {
let content = segment
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;');

if (op.attributes) {
if (op.attributes.bold) content = `<strong>${content}</strong>`;
if (op.attributes.italic) content = `<em>${content}</em>`;
if (op.attributes.underline) content = `<u>${content}</u>`;
if (op.attributes.link) content = `<a href="${op.attributes.link}">${content}</a>`;
}
lineBuffer.push(content);
}
if (i < lines.length - 1) flushLine();
});
}
if (lineBuffer.length > 0) flushLine();

return `<!DOCTYPE html>\n<html>\n<body>\n${html}</body>\n</html>`;
}

/**
* Exports a single document to the archive based on its type.
* - Type 0 (PDF): exports annotations, comments (with votes), document_data, and the PDF file.
* - Type 1 (HTML) / Type 2 (Modal): exports edits, plain text, HTML, and document_data.
* - Type 4 (ZIP): exports the zip file and document_data.
* @param {Object} server - The server instance providing database models.
* @param {Object} doc - The document record from the database.
* @param {string} docFolder - The target folder path inside the archive.
* @param {Object} archive - The archiver instance to append files to.
* @returns {Promise<void>}
*/
async function processDocumentForExport(server, doc, docFolder, includeNonConsentingEdits, archive) {
// document_data for all types, at the doc level
const documentData = await server.db.models.document_data.findAll({
where: { documentId: doc.id, deleted: false },
raw: true,
});
if (documentData.length > 0) {
archive.append(JSON.stringify(documentData, null, 2), { name: `${docFolder}/document_data.json` });
}

switch (doc.type) {
case 0: { // PDF
const [annotations, comments] = await Promise.all([
server.db.models.annotation.findAll({ where: { documentId: doc.id }, raw: true }),
server.db.models.comment.findAll({ where: { documentId: doc.id }, raw: true }),
]);
const commentVotes = await server.db.models.comment_vote.findAll({
where: { commentId: comments.map(c => c.id), deleted: false },
raw: true,
});
const commentsWithVotes = comments.map(c => ({
...c,
votes: commentVotes.filter(v => v.commentId === c.id),
}));
if (annotations.length > 0) {
archive.append(JSON.stringify(annotations, null, 2), { name: `${docFolder}/annotations.json` });
}
if (commentsWithVotes.length > 0) {
archive.append(JSON.stringify(commentsWithVotes, null, 2), { name: `${docFolder}/comments.json` });
}
const pdfPath = path.join(storageDir, `${doc.hash}.pdf`);
if (fs.existsSync(pdfPath)) {
archive.file(pdfPath, { name: `${docFolder}/document.pdf` });
} else {
console.warn(`[DocumentExport] PDF not found for document ${doc.hash}`);
}
break;
}

case 1: // HTML
case 2: { // MODAL
// fetch all edits for this document, ordered chronologically
let allEdits = await server.db.models.document_edit.findAll({
where: { documentId: doc.id, deleted: false },
order: [['createdAt', 'ASC']],
raw: true,
});

// filter by consent unless the option is enabled
if (!includeNonConsentingEdits) {
const editorUserIds = [...new Set(allEdits.map(e => e.userId).filter(Boolean))];
const editorUsers = await server.db.models.user.findAll({
where: { id: editorUserIds },
attributes: ['id', 'acceptDataSharing'],
raw: true,
});
const consentedUserIds = new Set(
editorUsers.filter(u => u.acceptDataSharing).map(u => u.id)
);
allEdits = allEdits.filter(e => !e.userId || consentedUserIds.has(e.userId));
}

// group edits by studySessionId (null = template)
const sessionGroups = new Map();
for (const edit of allEdits) {
const key = edit.studySessionId ?? '__template__';
if (!sessionGroups.has(key)) sessionGroups.set(key, []);
sessionGroups.get(key).push(edit);
}

// fetch study sessions to resolve hashes
const sessionIds = [...sessionGroups.keys()].filter(k => k !== '__template__');
const sessions = sessionIds.length > 0
? await server.db.models.study_session.findAll({
where: { id: sessionIds },
attributes: ['id', 'hash'],
raw: true,
})
: [];
const sessionHashMap = new Map(sessions.map(s => [s.id, s.hash]));

for (const [key, edits] of sessionGroups.entries()) {
const isTemplate = key === '__template__';
const delta = dbToDelta(edits);

// skip empty content
const text = deltaToText(delta);
if (!text.trim()) continue;

const subFolder = isTemplate
? `${docFolder}/template`
: `${docFolder}/${sessionHashMap.get(key) ?? key}`;

archive.append(text, { name: `${subFolder}/text.txt` });
archive.append(deltaToHtml(delta), { name: `${subFolder}/html.html` });
archive.append(JSON.stringify(edits, null, 2), { name: `${subFolder}/edits.json` });
}
break;
}

case 4: { // ZIP — unchanged
const zipPath = path.join(storageDir, `${doc.hash}.zip`);
if (fs.existsSync(zipPath)) {
archive.file(zipPath, { name: `${docFolder}/document.zip` });
} else {
console.warn(`[DocumentExport] ZIP not found for document ${doc.hash}`);
}
break;
}

default:
console.warn(`[DocumentExport] Unhandled document type ${doc.type} for document ${doc.hash}, skipping.`);
}
}

/**
* Main export function for the "documents" export type.
* Fetches all studies and steps for a project, collects unique documents,
* filters by owner data sharing consent, and exports each document to the archive.
* @param {Object} server - The server instance providing database models.
* @param {number|string} projectId - The ID of the project to export.
* @param {string} baseFolderName - The root folder name inside the ZIP archive.
* @param {Object} archive - The archiver instance to append files to.
* @param {Array<number>} userIds - List of user IDs to filter documents by.
* @param {Array<number>} documentTypes - List of document types to include (0=PDF, 1=HTML, 2=Modal, 4=ZIP).
* @returns {Promise<void>}
*/
async function processDocumentBasedExport(server, projectId, userIds, documentTypes, includeNonConsentingEdits, baseFolderName, archive) {
const docs = await server.db.models.document.findAll({
where: { projectId, userId: userIds, deleted: false, parentDocumentId: null },
});

if (docs.length === 0) {
console.warn(`[DocumentExport] No documents found for project ${projectId}`);
return;
}

const filteredDocs = docs.filter(doc =>
documentTypes.includes(doc.type) || documentTypes.includes(String(doc.type))
);

if (filteredDocs.length === 0) {
console.warn(`[DocumentExport] No documents matching selected types found for project ${projectId}`);
return;
}

for (const doc of filteredDocs) {
const docFolder = `${baseFolderName}/${doc.hash}`;
await processDocumentForExport(server, doc, docFolder, includeNonConsentingEdits, archive);
}
}
};
Loading
Loading