Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
### Added

- We added automatic date-based groups that create year/month/day subgroups from an entry’s date fields. [#10822](https://github.com/JabRef/jabref/issues/10822)
- We added automatic remove of duplicated entries in SLR's study results. [#14226](https://github.com/JabRef/jabref/pull/14226)

### Changed

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package org.jabref.logic.crawler;

import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.jabref.logic.database.DuplicateCheck;
import org.jabref.model.database.BibDatabase;
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.BibEntryTypesManager;

public class AutomaticDuplicateRemover {
private final BibEntryTypesManager bibEntryTypesManager;

public AutomaticDuplicateRemover(BibEntryTypesManager bibEntryTypesManager) {
this.bibEntryTypesManager = bibEntryTypesManager;
}

public void removeDuplicates(BibDatabaseContext databaseContext) {
DuplicateCheck duplicateCheck = new DuplicateCheck(bibEntryTypesManager);
BibDatabase database = databaseContext.getDatabase();
List<BibEntry> entries = database.getEntries();
Set<BibEntry> entriesToRemove = new HashSet<>();

for (int i = 0; i < entries.size(); i++) {
BibEntry entry1 = entries.get(i);
if (entriesToRemove.contains(entry1)) {
continue;
}

for (int j = i + 1; j < entries.size(); j++) {
BibEntry entry2 = entries.get(j);
if (entriesToRemove.contains(entry2)) {
continue;
}

if (duplicateCheck.isDuplicate(entry1, entry2, databaseContext.getMode())) {
entry1.mergeWith(entry2);
entriesToRemove.add(entry2);
}
}
}

for (BibEntry entry : entriesToRemove) {
database.removeEntry(entry);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,10 @@ private void persistResults(List<QueryResult> crawlResults) throws IOException,
// Merge new entries into study result file
merger.merge(existingStudyResultEntries.getDatabase(), newStudyResultEntries);

LOGGER.info("Removing duplicates from study results (initially {} entries)", existingStudyResultEntries.getEntries().size());
new AutomaticDuplicateRemover(bibEntryTypesManager).removeDuplicates(existingStudyResultEntries);
LOGGER.info("Removed {} entries", existingStudyResultEntries.getEntries().size());

writeResultToFile(getPathToStudyResultFile(), existingStudyResultEntries);
}

Expand Down
Loading