diff --git a/base/src/main/java/org/gorpipe/base/config/PropsHelper.java b/base/src/main/java/org/gorpipe/base/config/PropsHelper.java new file mode 100644 index 00000000..76fe8049 --- /dev/null +++ b/base/src/main/java/org/gorpipe/base/config/PropsHelper.java @@ -0,0 +1,22 @@ +package org.gorpipe.base.config; + +/** + * Helper class for system properties related operations. + * + */ +public class PropsHelper { + /** + * Parse boolan system property. + * @param name property name + * @param defValue default value + * @return boolean value of the property or default value if property is not set or has invalid value. + */ + public static boolean getBoolean(String name, boolean defValue) { + boolean result = defValue; + try { + result = Boolean.parseBoolean(System.getProperty(name)); + } catch (IllegalArgumentException | NullPointerException e) { + } + return result; + } +} diff --git a/gortools/src/main/scala/gorsat/Commands/Write.scala b/gortools/src/main/scala/gorsat/Commands/Write.scala index 8ed758cf..861aca51 100644 --- a/gortools/src/main/scala/gorsat/Commands/Write.scala +++ b/gortools/src/main/scala/gorsat/Commands/Write.scala @@ -56,7 +56,9 @@ class Write extends CommandInfo("WRITE", context.getSession.getProjectContext.getFileReader.getCommonRoot, null, null, true); // Infer the full file name from the link (and defautl locations) LinkFileUtil.inferDataFileNameFromLinkFile( - context.getSession.getProjectContext.getFileReader.resolveDataSource(linkSourceRef).asInstanceOf[StreamSource], linkMetaInfo.linkFileMeta); + context.getSession.getProjectContext.getFileReader.resolveDataSource(linkSourceRef).asInstanceOf[StreamSource], + linkMetaInfo.linkFileMeta, + context.getSession.getProjectContext.getFileReader); } else { fileName } diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java index b45f1b00..eade7cd0 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java @@ -10,6 +10,7 @@ import com.github.benmanes.caffeine.cache.Caffeine; import com.google.common.util.concurrent.UncheckedExecutionException; +import org.gorpipe.base.config.PropsHelper; import org.gorpipe.exceptions.GorResourceException; import org.gorpipe.gor.driver.meta.SourceReference; import org.gorpipe.gor.driver.providers.stream.StreamUtils; @@ -59,11 +60,14 @@ public abstract class LinkFile { private static final org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(LinkFile.class); - // Approx max size of link file content to read or write. Stopp adding lines if exceeded. Dont load if twice this size. + // Approx max size of link file content to read or write. Stop adding lines if exceeded. Don't load if twice this size. public static final int LINK_FILE_MAX_SIZE = Integer.parseInt(System.getProperty("gor.driver.link.maxfilesize", "10000")); private static final boolean USE_LINK_CACHE = Boolean.parseBoolean(System.getProperty("gor.driver.link.cache", "true")); private static final boolean USE_LINK_CACHE_SESSION = Boolean.parseBoolean(System.getProperty("gor.driver.link.cache.session", "true")); + public static final String LINK_FILE_VALIDATE_LOAD = "gor.driver.link.validate.load"; + public static final String LINK_FILE_VALIDATE_SAVE = "gor.driver.link.validate.save"; + private static final Cache staticLinkCache = Caffeine.newBuilder() .maximumSize(10000) .expireAfterWrite(5, TimeUnit.MINUTES).build(); @@ -128,6 +132,10 @@ protected LinkFile(StreamSource source, LinkFileMeta meta, String content) { this.source = source; this.meta = meta; this.entries = parseEntries(content); + + if (PropsHelper.getBoolean(LINK_FILE_VALIDATE_LOAD, false)) { + validate(); + } } public LinkFileMeta getMeta() { @@ -281,6 +289,10 @@ public void save(long timestamp, FileReader reader) { private void save(OutputStream os, long timestamp, FileReader reader) { + if (PropsHelper.getBoolean(LINK_FILE_VALIDATE_SAVE, false)) { + validate(); + } + meta.setProperty(LinkFileMeta.HEADER_SERIAL_KEY, Integer.toString(Integer.parseInt(meta.getProperty(LinkFileMeta.HEADER_SERIAL_KEY, "0")) + 1)); var currentTimestamp = timestamp > 0 ? timestamp : System.currentTimeMillis(); @@ -309,6 +321,24 @@ private void save(OutputStream os, long timestamp, FileReader reader) { protected abstract List parseEntries(String content); + /** + * Check internal consistency of this link file. + * + * @return list of human-readable violation strings; empty means the file is clean + */ + public abstract List checkIntegrity(); + + /** + * Validate the link file content. + */ + public void validate() { + var vialations = checkIntegrity(); + if (!vialations.isEmpty()) { + throw new GorResourceException("Link file integrity check failed with %d violation(s):\n%s".formatted(vialations.size(), String.join("\n", vialations)), + source.getFullPath()); + } + } + // Check if we can garbage collect entries between fromIndex and toIndex (inclusive). /** diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java index 56ee429e..ea0b64dd 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java @@ -2,7 +2,9 @@ import gorsat.Commands.CommandParseUtilities; import org.apache.commons.io.FilenameUtils; +import org.apache.commons.lang3.RandomStringUtils; import org.apache.commons.lang3.StringUtils; +import org.gorpipe.exceptions.GorResourceException; import org.gorpipe.gor.driver.GorDriverConfig; import org.gorpipe.gor.driver.providers.stream.sources.StreamSource; import org.gorpipe.gor.model.FileReader; @@ -30,6 +32,22 @@ public class LinkFileUtil { * @return the data file path */ public static String inferDataFileNameFromLinkFile(StreamSource linkSource, String linkFileMeta) throws IOException { + return inferDataFileNameFromLinkFile(linkSource, linkFileMeta, null); + } + + /** + * Infer the data file name from the link file name. + * When a {@code fileReader} is supplied and the link file is lifecycle-managed, this method also + * verifies that the inferred target path does not already exist — overwriting a managed file + * would silently corrupt the version history stored in the link file. + * + * @param linkSource the link file path with the link extension + * @param linkFileMeta additional link file meta data + * @param fileReader file reader used to check target existence; may be {@code null} to skip the check + * @return the data file path + * @throws GorResourceException if the link is managed and the inferred target already exists + */ + public static String inferDataFileNameFromLinkFile(StreamSource linkSource, String linkFileMeta, FileReader fileReader) throws IOException { if (linkSource == null || Strings.isNullOrEmpty(linkSource.getFullPath())) { throw new IllegalArgumentException("Link file path is null or empty. Can not infer data file name."); } @@ -68,12 +86,21 @@ public static String inferDataFileNameFromLinkFile(StreamSource linkSource, Stri var fileName = PathUtils.getFileName(linkSource.getFullPath()); var extraFolder = PathUtils.removeExtensions(fileName); - var uniqueFileName = PathUtils.injectStringIntoFileName(fileName, Integer.toString(link.getSerial() + 1)); + var uniqueIdPart = (link.getSerial() + 1) + "-" + RandomStringUtils.insecure().nextAlphanumeric(3); + var uniqueFileName = PathUtils.injectStringIntoFileName(fileName, uniqueIdPart); + var resolvedPath = PathUtils.resolve(PathUtils.resolve(dataFileParentPath, extraFolder), uniqueFileName); + + log.debug("Inferred file name for link file {} is {}", linkSource.getFullPath(), resolvedPath); - log.warn("Inferred file name for link file {} is {}", linkSource.getFullPath(), - PathUtils.resolve(PathUtils.resolve(dataFileParentPath, extraFolder), uniqueFileName)); + if (fileReader != null && link.getMeta().getPropertyBool(LinkFileMeta.HEADER_DATA_LIFECYCLE_MANAGED_KEY, false)) { + if (fileReader.exists(resolvedPath)) { + throw new GorResourceException( + "Managed link file target already exists, overwrite would corrupt version history: " + resolvedPath, + resolvedPath); + } + } - return PathUtils.resolve(PathUtils.resolve(dataFileParentPath, extraFolder), uniqueFileName); + return resolvedPath; } private static Pattern linkPattern = Pattern.compile(".* -link ([^\\s]*) ?.*", Pattern.CASE_INSENSITIVE); diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileV0.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileV0.java index fb495a87..7f62fc5d 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileV0.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileV0.java @@ -3,7 +3,7 @@ import org.gorpipe.gor.driver.providers.stream.sources.StreamSource; import org.gorpipe.gor.model.FileReader; -import java.io.IOException; +import java.util.Collections; import java.util.List; /** @@ -35,6 +35,11 @@ public LinkFile appendEntry(String link, String md5, String info, FileReader rea return this; } + @Override + public List checkIntegrity() { + return Collections.emptyList(); + } + public static String getDefaultMetaContent() { return String.format(""" ## VERSION = 0 diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileV1.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileV1.java index 4ce999e1..9d5bd621 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileV1.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileV1.java @@ -2,9 +2,14 @@ import org.gorpipe.gor.driver.providers.stream.sources.StreamSource; import org.gorpipe.gor.model.FileReader; +import org.gorpipe.util.Strings; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import static org.gorpipe.gor.driver.linkfile.LinkFileMeta.HEADER_DATA_LIFECYCLE_MANAGED_KEY; + /** * Link file format, version 1. */ @@ -41,7 +46,7 @@ protected List parseEntries(String content) { public LinkFile appendEntry(String link, String md5, String info, FileReader reader) { var latestEntry = getLatestEntry(); var entry = new LinkFileEntryV1(link, System.currentTimeMillis(), md5, latestEntry != null ? latestEntry.serial() + 1 : 1, info); - validateEntry(entry, reader); + validateEntry(entry, latestEntry, reader); entry = handleRepeatedEntries(entry, reader); if (entry != null) { entries.add(entry); @@ -53,15 +58,25 @@ public LinkFile appendEntry(String link, String md5, String info, FileReader rea * Validate the entry to ensure it is of the correct type, format and does not violate integrity of the link file. * @param entry the link file entry to validate */ - private void validateEntry(LinkFileEntry entry, FileReader reader) { + private void validateEntry(LinkFileEntry entry, LinkFileEntry latestEntry, FileReader reader) { if (!(entry instanceof LinkFileEntryV1)) { throw new IllegalArgumentException("Invalid entry type: " + entry.getClass().getName()); } if (entry.url() == null || entry.url().isEmpty()) { throw new IllegalArgumentException("Entry URL cannot be null or empty"); } - if (!allowOverwriteOfTargets) { - // Only applies to non managed data. + if (latestEntry != null && entry.serial() > 0 && latestEntry.serial() > 0 + && entry.serial() <= latestEntry.serial()) { + throw new IllegalArgumentException( + "Entry serial %d must be greater than latest serial %d".formatted(entry.serial(), latestEntry.serial())); + } + if (latestEntry != null && entry.timestamp() > 0 && latestEntry.timestamp() > 0 + && entry.timestamp() < latestEntry.timestamp()) { + log.warn("Entry timestamp {} is before latest entry timestamp {} in link file {}", + entry.timestamp(), latestEntry.timestamp(), source.getFullPath()); + } + boolean isManaged = meta.getPropertyBool(HEADER_DATA_LIFECYCLE_MANAGED_KEY, false); + if (!allowOverwriteOfTargets || isManaged) { for (LinkFileEntry existingEntry : entries) { if (existingEntry.url().equals(entry.url()) && !canReuseEntryWithSameUrl(existingEntry, entry, reader)) { throw new IllegalArgumentException("Duplicate entry URL: " + entry.url()); @@ -71,12 +86,12 @@ private void validateEntry(LinkFileEntry entry, FileReader reader) { } private boolean canReuseEntryWithSameUrl(LinkFileEntry oldEntry, LinkFileEntry newEntry, FileReader reader) { - // We can reuse an entry (same url) if the entries have the same underlying file, as if not the integrity of the - // versioned link file is violated (as the new entry file overwrites the old entry file, but the old entry - // is still in the link file history). + // We can reuse an entry (same url) if the entries have the same underlying file (same MD5), as if not the + // integrity of the versioned link file is violated (as the new entry file overwrites the old entry file, + // but the old entry is still in the link file history). // BUT haven't we already ruined the integrity when we enter here!? - if ((oldEntry.md5() != null && newEntry.md5() != null)) { + if (!Strings.isNullOrEmpty(oldEntry.md5()) && !Strings.isNullOrEmpty(newEntry.md5())) { // Use md5 if available. return oldEntry.md5().equals(newEntry.md5()); } else { @@ -114,8 +129,9 @@ private LinkFileEntryV1 handleRepeatedEntries(LinkFileEntryV1 newEntry, FileRead } private LinkFileEntry findExistingEntryByMD5(LinkFileEntry entry) { + if (Strings.isNullOrEmpty(entry.md5())) return null; for (LinkFileEntry existingEntry : entries) { - if (existingEntry.md5().equals(entry.md5())) { + if (!Strings.isNullOrEmpty(existingEntry.md5()) && existingEntry.md5().equals(entry.md5())) { return existingEntry; } } @@ -149,6 +165,38 @@ private void cleanEntryDataIfManaged(LinkFileEntry candiateEntry, FileReader rea } } + @Override + public List checkIntegrity() { + var violations = new ArrayList(); + int prevSerial = -1; + long prevTimestamp = -1; + var urlToMd5 = new HashMap(); + + for (var entry : entries) { + if (entry.serial() > 0 && prevSerial > 0 && entry.serial() <= prevSerial) { + violations.add("Entry serial %d is not greater than previous serial %d (url: %s)" + .formatted(entry.serial(), prevSerial, entry.url())); + } + if (entry.serial() > 0) prevSerial = entry.serial(); + + if (prevTimestamp > 0 && entry.timestamp() > 0 && entry.timestamp() < prevTimestamp) { + violations.add("Entry timestamp %d is before previous timestamp %d (url: %s)" + .formatted(entry.timestamp(), prevTimestamp, entry.url())); + } + if (entry.timestamp() > 0) prevTimestamp = entry.timestamp(); + + if (!Strings.isNullOrEmpty(entry.md5())) { + var existingMd5 = urlToMd5.get(entry.url()); + if (existingMd5 != null && !existingMd5.equalsIgnoreCase(entry.md5())) { + violations.add("URL '%s' appears with different MD5 values (%s vs %s)" + .formatted(entry.url(), existingMd5, entry.md5())); + } + urlToMd5.put(entry.url(), entry.md5()); + } + } + return violations; + } + private void checkDefaultMeta() { if (!meta.getVersion().equals(VERSION)) { meta.loadAndMergeMeta(getDefaultMetaContent()); diff --git a/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java b/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java index 26152baf..070fd5c4 100644 --- a/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java +++ b/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java @@ -1,5 +1,6 @@ package org.gorpipe.gor.driver.linkfile; +import org.gorpipe.exceptions.GorResourceException; import org.gorpipe.gor.driver.GorDriverConfig; import org.gorpipe.gor.driver.meta.SourceReference; import org.gorpipe.gor.driver.providers.stream.sources.StreamSource; @@ -15,6 +16,8 @@ import java.nio.file.Files; import java.nio.file.Path; +import static org.gorpipe.gor.driver.linkfile.LinkFile.LINK_FILE_VALIDATE_LOAD; +import static org.gorpipe.gor.driver.linkfile.LinkFile.LINK_FILE_VALIDATE_SAVE; import static org.gorpipe.gor.driver.linkfile.LinkFileV1.LinkReuseStrategy.NO_REUSE; import static org.gorpipe.gor.driver.linkfile.LinkFileV1.LinkReuseStrategy.REUSE; import static org.junit.Assert.*; @@ -197,6 +200,39 @@ public void testSaveLinkFileV1ToV0() throws IOException { assertEquals(simpleFile, savedContent.trim()); } + @Test(expected = GorResourceException.class) + public void testInferDataFileNameFromLinkFile_Managed_TargetAlreadyExists_Throws() throws Exception { + // When managed and the inferred target already exists, throw to prevent silent overwrite. + // Mock the reader so exists() always returns true regardless of the (random) inferred path. + String linkFileMeta = "## VERSION = 1\n## " + LinkFileMeta.HEADER_DATA_LIFECYCLE_MANAGED_KEY + " = true"; + FileReader mockReader = mock(FileReader.class); + when(mockReader.exists(anyString())).thenReturn(true); + + LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource("managed.gor.link"), linkFileMeta, mockReader); + } + + @Test + public void testInferDataFileNameFromLinkFile_Managed_TargetMissing_Succeeds() throws Exception { + // When managed but the target does not yet exist, the call must succeed. + String linkFileMeta = "## VERSION = 1\n## " + LinkFileMeta.HEADER_DATA_LIFECYCLE_MANAGED_KEY + " = true"; + FileReader mockReader = mock(FileReader.class); + when(mockReader.exists(anyString())).thenReturn(false); + + String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource("managed2.gor.link"), linkFileMeta, mockReader); + assertNotNull(result); + } + + @Test + public void testInferDataFileNameFromLinkFile_Unmanaged_TargetAlreadyExists_NoThrow() throws Exception { + // When NOT managed, an existing target must not trigger an error. + String linkFileMeta = "## VERSION = 1\n## " + LinkFileMeta.HEADER_DATA_LIFECYCLE_MANAGED_KEY + " = false"; + FileReader mockReader = mock(FileReader.class); + when(mockReader.exists(anyString())).thenReturn(true); + + String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource("unmanaged.gor.link"), linkFileMeta, mockReader); + assertNotNull(result); + } + @Test(expected = IllegalArgumentException.class) public void testInferDataFileNameFromLinkFile_NullOrEmptyPath() throws Exception { LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource(""), null); @@ -452,6 +488,192 @@ public void testReuseStrategyReuseKeepsNewUrlUnManagedDontDeleteExisting() throw assertTrue(Files.exists(newFile)); // Verify the new file is not used and deleted. } + // --- MD5 fix: null/empty MD5 handling --- + + @Test + public void testReuseStrategy_emptyMd5_doesNotMatchAnotherEmptyMd5() throws IOException { + // Two entries with empty MD5 must NOT be treated as duplicates via MD5 match. + // With REUSE strategy they should not collapse into one entry just because both have no MD5. + LinkFile linkFile = LinkFile.create(source, v1LinkFileContent); + linkFile.getMeta().setProperty(LinkFileMeta.HEADER_REUSE_STRATEGY_KEY, REUSE.name()); + linkFile.getMeta().setProperty(LinkFileMeta.HEADER_DATA_LIFECYCLE_MANAGED_KEY, "false"); + + int initialCount = linkFile.getEntriesCount(); + + // Append two entries with no MD5 and distinct URLs — each should create a separate entry. + linkFile.appendEntry("no_md5_file1.gor", "", "first entry", fileReader); + linkFile.appendEntry("no_md5_file2.gor", "", "second entry", fileReader); + + assertEquals(initialCount + 2, linkFile.getEntriesCount()); + } + + @Test + public void testReuseStrategy_emptyMd5_usesTimestampFallback() throws IOException { + // With empty MD5, canReuseEntryWithSameUrl should fall through to timestamp comparison + // rather than incorrectly matching two entries just because both have no MD5. + System.setProperty("gor.link.versioned.allow.overwrite", "false"); + try { + LinkFile linkFile = LinkFile.createV1(source, ""); + Path file1 = workPath.resolve("shared_url.gor"); + Files.writeString(file1, "#Chrom\tPos\nchr1\t100\n"); + + // First append with empty MD5 — establishes the URL. + linkFile.appendEntry(file1.toString(), "", "first", fileReader); + + // Second append with the same URL and same (empty) MD5 — the fallback is to compare + // timestamps. Since ver1 timestamp <= file's last-modified, it should succeed. + linkFile.appendEntry(file1.toString(), "", "second", fileReader); + + // If we reach here without an exception the timestamp fallback was used (not a false MD5 match). + assertEquals(2, linkFile.getEntriesCount()); + } finally { + System.clearProperty("gor.link.versioned.allow.overwrite"); + } + } + + // --- checkIntegrity() --- + + @Test + public void testCheckIntegrity_cleanFile_noViolations() { + LinkFile linkFile = LinkFile.create(source, v1LinkFileContent); + var violations = linkFile.checkIntegrity(); + assertTrue("Expected no violations for a well-formed link file", violations.isEmpty()); + } + + @Test + public void testCheckIntegrity_serialNotMonotonicAfterTimestampSort() { + // Entries are stored with ascending timestamps but descending serials. + // After parsing (which sorts by timestamp) the serial order is violated. + String corruptContent = """ + ## SERIAL = 2 + ## VERSION = 1 + #FILE\tTIMESTAMP\tMD5\tSERIAL\tINFO + a.gorz\t2024-12-15T23:25:24.533Z\tAAA\t1\t + b.gorz\t2024-12-15T11:21:30.790Z\tBBB\t2\t + """; + LinkFile linkFile = LinkFile.create(source, corruptContent); + var violations = linkFile.checkIntegrity(); + assertFalse("Expected a serial-order violation", violations.isEmpty()); + assertTrue(violations.stream().anyMatch(v -> v.contains("serial"))); + } + + @Test + public void testCheckIntegrity_duplicateSerials() { + String corruptContent = """ + ## SERIAL = 2 + ## VERSION = 1 + #FILE\tTIMESTAMP\tMD5\tSERIAL\tINFO + a.gorz\t2024-12-15T11:21:30.790Z\tAAA\t2\t + b.gorz\t2024-12-15T23:25:24.533Z\tBBB\t2\t + """; + LinkFile linkFile = LinkFile.create(source, corruptContent); + var violations = linkFile.checkIntegrity(); + assertFalse("Expected a serial-order violation", violations.isEmpty()); + assertTrue(violations.stream().anyMatch(v -> v.contains("serial"))); + } + + @Test + public void testCheckIntegrity_urlReusedWithDifferentMd5() { + // The same URL appearing with two different non-empty MD5s means the file was overwritten. + String content = """ + ## SERIAL = 2 + ## VERSION = 1 + #FILE\tTIMESTAMP\tMD5\tSERIAL\tINFO + shared.gorz\t2024-12-15T11:21:30.790Z\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\t1\t + shared.gorz\t2024-12-15T23:25:24.533Z\tBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\t2\t + """; + LinkFile linkFile = LinkFile.create(source, content); + var violations = linkFile.checkIntegrity(); + assertFalse("Expected a URL-reuse-with-different-MD5 violation", violations.isEmpty()); + assertTrue(violations.stream().anyMatch(v -> v.contains("shared.gorz"))); + } + + @Test + public void testCheckIntegrity_urlReusedWithSameMd5_noViolation() { + // The same URL with the same MD5 is fine (same content, URL just repeated in history). + String content = """ + ## SERIAL = 2 + ## VERSION = 1 + #FILE\tTIMESTAMP\tMD5\tSERIAL\tINFO + shared.gorz\t2024-12-15T11:21:30.790Z\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\t1\t + shared.gorz\t2024-12-15T23:25:24.533Z\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\t2\t + """; + LinkFile linkFile = LinkFile.create(source, content); + var violations = linkFile.checkIntegrity(); + assertTrue("Same URL with same MD5 should not be a violation", violations.isEmpty()); + } + + @Test + public void testCheckIntegrity_v0_alwaysClean() { + LinkFile v0 = LinkFile.createV0(source, "some/path.gorz"); + assertTrue(v0.checkIntegrity().isEmpty()); + } + + // --- validate() --- + + // Corrupt V1 content: entries have ascending timestamps but descending serials, + // so after parsing (which sorts by timestamp) the serial order is violated. + private static final String CORRUPT_V1_CONTENT = """ + ## SERIAL = 2 + ## VERSION = 1 + #FILE\tTIMESTAMP\tMD5\tSERIAL\tINFO + a.gorz\t2024-12-15T23:25:24.533Z\tAAA\t1\t + b.gorz\t2024-12-15T11:21:30.790Z\tBBB\t2\t + """; + + @Test + public void testValidate_false_cleanFile_doesNotThrow() { + System.setProperty(LINK_FILE_VALIDATE_LOAD, "false"); + System.setProperty(LINK_FILE_VALIDATE_SAVE, "false"); + LinkFile linkFile = LinkFile.create(source, v1LinkFileContent); + assertNotNull(linkFile); + linkFile.save(fileReader); + } + + @Test + public void testValidate_false_corruptFile_doesNotThrow() { + System.setProperty(LINK_FILE_VALIDATE_LOAD, "false"); + System.setProperty(LINK_FILE_VALIDATE_SAVE, "false"); + LinkFile linkFile = LinkFile.create(source, CORRUPT_V1_CONTENT); + assertNotNull(linkFile); + linkFile.save(fileReader); + } + + @Test + public void testValidate_true_calledOnConstruction_cleanFile() { + System.setProperty(LINK_FILE_VALIDATE_LOAD, "true"); + LinkFile.create(source, v1LinkFileContent); + } + + @Test(expected = GorResourceException.class) + public void testValidate_true_calledOnConstruction_corruptFile() { + System.setProperty(LINK_FILE_VALIDATE_LOAD, "true"); + LinkFile.create(source, CORRUPT_V1_CONTENT); + } + + @Test + public void testValidate_calledOnSave_cleanFile() { + System.setProperty(LINK_FILE_VALIDATE_LOAD, "false"); + System.setProperty(LINK_FILE_VALIDATE_SAVE, "true"); + LinkFile linkFile = LinkFile.create(source, v1LinkFileContent); + linkFile.save(fileReader); + } + + @Test(expected = GorResourceException.class) + public void testValidate_calledOnSave_corruptFile() { + System.setProperty(LINK_FILE_VALIDATE_LOAD, "false"); + System.setProperty(LINK_FILE_VALIDATE_SAVE, "true"); + LinkFile linkFile = LinkFile.create(source, CORRUPT_V1_CONTENT); + linkFile.save(fileReader); + } + + @Test + public void testValidate_v0_doesNotThrow() { + // V0 has no invariants; validate() must always succeed silently. + LinkFile v0 = LinkFile.createV0(source, "some/path.gorz"); + v0.validate(); // must not throw + } + record ReuseStrategySetupResult(LinkFile linkFile, int initialCount, LinkFileEntry latestBefore, Path newFile) {} private ReuseStrategySetupResult setupReuseStrategyTest(LinkFileV1.LinkReuseStrategy reuseStrategy, boolean lifecycleManaged) throws IOException { LinkFile linkFile = LinkFile.create(source, v1LinkFileContent);