diff --git a/CHANGELOG.md b/CHANGELOG.md index 30842dfaa9d..0b97501a990 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv ### Added - We added automatic date-based groups that create year/month/day subgroups from an entry’s date fields. [#10822](https://github.com/JabRef/jabref/issues/10822) +- We added automatic remove of duplicated entries in SLR's study results. [#14226](https://github.com/JabRef/jabref/pull/14226) ### Changed diff --git a/jablib/src/main/java/org/jabref/logic/crawler/AutomaticDuplicateRemover.java b/jablib/src/main/java/org/jabref/logic/crawler/AutomaticDuplicateRemover.java new file mode 100644 index 00000000000..49959bb78d4 --- /dev/null +++ b/jablib/src/main/java/org/jabref/logic/crawler/AutomaticDuplicateRemover.java @@ -0,0 +1,49 @@ +package org.jabref.logic.crawler; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.jabref.logic.database.DuplicateCheck; +import org.jabref.model.database.BibDatabase; +import org.jabref.model.database.BibDatabaseContext; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.BibEntryTypesManager; + +public class AutomaticDuplicateRemover { + private final BibEntryTypesManager bibEntryTypesManager; + + public AutomaticDuplicateRemover(BibEntryTypesManager bibEntryTypesManager) { + this.bibEntryTypesManager = bibEntryTypesManager; + } + + public void removeDuplicates(BibDatabaseContext databaseContext) { + DuplicateCheck duplicateCheck = new DuplicateCheck(bibEntryTypesManager); + BibDatabase database = databaseContext.getDatabase(); + List entries = database.getEntries(); + Set entriesToRemove = new HashSet<>(); + + for (int i = 0; i < entries.size(); i++) { + BibEntry entry1 = entries.get(i); + if (entriesToRemove.contains(entry1)) { + continue; + } + + for (int j = i + 1; j < entries.size(); j++) { + BibEntry entry2 = entries.get(j); + if (entriesToRemove.contains(entry2)) { + continue; + } + + if (duplicateCheck.isDuplicate(entry1, entry2, databaseContext.getMode())) { + entry1.mergeWith(entry2); + entriesToRemove.add(entry2); + } + } + } + + for (BibEntry entry : entriesToRemove) { + database.removeEntry(entry); + } + } +} diff --git a/jablib/src/main/java/org/jabref/logic/crawler/StudyRepository.java b/jablib/src/main/java/org/jabref/logic/crawler/StudyRepository.java index 6f6c4db5b10..f71d83cbe45 100644 --- a/jablib/src/main/java/org/jabref/logic/crawler/StudyRepository.java +++ b/jablib/src/main/java/org/jabref/logic/crawler/StudyRepository.java @@ -419,6 +419,10 @@ private void persistResults(List crawlResults) throws IOException, // Merge new entries into study result file merger.merge(existingStudyResultEntries.getDatabase(), newStudyResultEntries); + LOGGER.info("Removing duplicates from study results (initially {} entries)", existingStudyResultEntries.getEntries().size()); + new AutomaticDuplicateRemover(bibEntryTypesManager).removeDuplicates(existingStudyResultEntries); + LOGGER.info("Removed {} entries", existingStudyResultEntries.getEntries().size()); + writeResultToFile(getPathToStudyResultFile(), existingStudyResultEntries); }