From 9640566814326385a73c0f794f4c6d36050200d5 Mon Sep 17 00:00:00 2001 From: Piotr Rzysko Date: Mon, 29 Sep 2025 18:21:32 +0200 Subject: [PATCH 1/8] Fix variable-byte encoding of large longs Previously, the branch encoding longs in 9 bytes was unreachable because the condition `val < (1L << 63)` is always false. Only negative longs should be encoded using 10 bytes. --- .../longcompression/LongVariableByte.java | 4 ++-- .../longcompression/TestLongVariableByte.java | 18 ++++++++++++------ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/main/java/me/lemire/longcompression/LongVariableByte.java b/src/main/java/me/lemire/longcompression/LongVariableByte.java index ad2b0eb..884d538 100644 --- a/src/main/java/me/lemire/longcompression/LongVariableByte.java +++ b/src/main/java/me/lemire/longcompression/LongVariableByte.java @@ -90,7 +90,7 @@ public void headlessCompress(long[] in, IntWrapper inpos, int inlength, long[] o buf.put((byte) extract7bits(5, val)); buf.put((byte) extract7bits(6, val)); buf.put((byte) (extract7bitsmaskless(7, (val)) | (1 << 7))); - } else if (val >= 0 && val < (1L << 63)) { + } else if (val >= 0) { buf.put((byte) extract7bits(0, val)); buf.put((byte) extract7bits(1, val)); buf.put((byte) extract7bits(2, val)); @@ -175,7 +175,7 @@ public void compress(long[] in, IntWrapper inpos, int inlength, byte[] out, out[outpostmp++] = (byte) extract7bits(5, val); out[outpostmp++] = (byte) extract7bits(6, val); out[outpostmp++] = (byte) (extract7bitsmaskless(7, (val)) | (1 << 7)); - } else if (val >= 0 && val < (1L << 63)) { + } else if (val >= 0) { out[outpostmp++] = (byte) extract7bits(0, val); out[outpostmp++] = (byte) extract7bits(1, val); out[outpostmp++] = (byte) extract7bits(2, val); diff --git a/src/test/java/me/lemire/longcompression/TestLongVariableByte.java b/src/test/java/me/lemire/longcompression/TestLongVariableByte.java index ee1755a..3cb2a49 100644 --- a/src/test/java/me/lemire/longcompression/TestLongVariableByte.java +++ b/src/test/java/me/lemire/longcompression/TestLongVariableByte.java @@ -24,11 +24,17 @@ public LongCODEC getCodec() { } @Test - public void testCodec_intermediateHighPowerOfTwo() { - Assert.assertEquals(1, LongTestUtils.compress((LongCODEC) codec, new long[] { 1L << 42 }).length); - Assert.assertEquals(7, LongTestUtils.compress((ByteLongCODEC) codec, new long[] { 1L << 42 }).length); - Assert.assertEquals(1, - LongTestUtils.compressHeadless((SkippableLongCODEC) codec, new long[] { 1L << 42 }).length); - } + public void testCodec_allBitWidths() { + for (int bitWidth = 0; bitWidth <= 64; bitWidth++) { + long value = bitWidth == 0 ? 0 : 1L << (bitWidth - 1); + + int expectedSizeInBytes = Math.max(1, (bitWidth + 6) / 7); + int expectedSizeInLongs = (expectedSizeInBytes > 8) ? 2 : 1; + Assert.assertEquals(expectedSizeInLongs, LongTestUtils.compress((LongCODEC) codec, new long[] { value }).length); + Assert.assertEquals(expectedSizeInBytes, LongTestUtils.compress((ByteLongCODEC) codec, new long[] { value }).length); + Assert.assertEquals(expectedSizeInLongs, + LongTestUtils.compressHeadless((SkippableLongCODEC) codec, new long[] { value }).length); + } + } } From a0a5affd5eb0e7b16186fd38fa8a19ff466f317d Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 3 Oct 2025 20:49:19 -0400 Subject: [PATCH 2/8] [maven-release-plugin] prepare for next development iteration --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index e88e3f8..e873dab 100644 --- a/pom.xml +++ b/pom.xml @@ -2,7 +2,7 @@ 4.0.0 me.lemire.integercompression JavaFastPFOR - 0.3.1 + 0.3.2-SNAPSHOT jar 21 @@ -22,7 +22,7 @@ scm:git:git@github.com:fast-pack/JavaFastPFOR.git scm:git:git@github.com:fast-pack/JavaFastPFOR.git scm:git:git@github.com:fast-pack/JavaFastPFOR.git - JavaFastPFOR-0.3.1 + HEAD From f92dc36074b43c3f7daf053f91de6bfc4a3abd5e Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 3 Oct 2025 21:08:07 -0400 Subject: [PATCH 3/8] Update JavaFastPFOR version to 0.3.1 Updated the dependency version for JavaFastPFOR in both Maven and Gradle sections of the README. --- README.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 38f6baf..b738fea 100644 --- a/README.md +++ b/README.md @@ -73,9 +73,13 @@ non-vectorized implementation. For an example usage, see examples/vector/Example.java. The feature requires JDK 19+ and is currently for advanced users. -JitPack +JavaFastPFOR as a dependency (JitPack) ------------------------ +We have a demo project using JavaFastPFOR as a dependency (both Maven and Gradle). See... + +https://github.com/fast-pack/JavaFastPFORDemo + 1. **Maven** Using this code in your own project is easy with maven, just add @@ -85,7 +89,7 @@ the following code in your pom.xml file: com.github.fast-pack JavaFastPFor - JavaFastPFOR-0.3.0 + JavaFastPFOR-0.3.1 ``` @@ -124,7 +128,7 @@ repositories { } dependencies { - implementation 'com.github.fast-pack:JavaFastPFor:JavaFastPFOR-0.3.0' + implementation 'com.github.fast-pack:JavaFastPFor:JavaFastPFOR-0.3.1' } ``` From 597a7ef18931c8986580abbecce08343d3926efb Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 3 Oct 2025 21:09:41 -0400 Subject: [PATCH 4/8] Update README.md --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index b738fea..f073db3 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ JavaFastPFOR: A simple integer compression library in Java ========================================================== -[![](https://jitpack.io/v/fast-pack/JavaFastPFor.svg)](https://jitpack.io/#fast-pack/JavaFastPFor) - [![][maven img]][maven] [![][license img]][license] [![docs-badge][]][docs] +[![](https://jitpack.io/v/fast-pack/JavaFastPFor.svg)](https://jitpack.io/#fast-pack/JavaFastPFor) [![][license img]][license] [![docs-badge][]][docs] [![Java CI](https://github.com/lemire/JavaFastPFOR/actions/workflows/basic.yml/badge.svg)](https://github.com/lemire/JavaFastPFOR/actions/workflows/basic.yml) From bc4478110417dba21de19de3b8e313b4e10ef1f5 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 3 Oct 2025 21:10:44 -0400 Subject: [PATCH 5/8] Update README by removing authors and changing JDK version Removed author section and updated JDK version requirement. --- README.md | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/README.md b/README.md index f073db3..2e6853a 100644 --- a/README.md +++ b/README.md @@ -153,20 +153,6 @@ Nevertheless, if you want to reuse codec instances, note that by convention, unless the documentation of a codec specify that it is not thread-safe, then it can be assumed to be thread-safe. -Authors -------- - -Main contributors -* Daniel Lemire, http://lemire.me/en/ -* Muraoka Taro, https://github.com/koron - -with contributions by -* the Terrier team (Matteo Catena, Craig Macdonald, Saúl Vargas and Iadh Ounis) -* Di Wu, http://www.facebook.com/diwu1989 -* Stefan Ackermann, https://github.com/Stivo -* Samit Roy, https://github.com/roysamit -* Mulugeta Mammo, https://github.com/mulugetam (for VectorFastPFOR) - How does it compare to the Kamikaze PForDelta library? ------------------------------------------------------ @@ -186,7 +172,7 @@ Requirements Releases up to 0.1.12 require Java 7 or better. -The current development versions assume JDK 11 or better. +The current development versions assume JDK 21 or better. @@ -270,8 +256,6 @@ Funding This work was supported by NSERC grant number 26143. -[maven img]:https://maven-badges.herokuapp.com/maven-central/me.lemire.integercompression/JavaFastPFOR/badge.svg -[maven]:http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22me.lemire.integercompression%22%20 [license]:LICENSE [license img]:https://img.shields.io/badge/License-Apache%202-blue.svg From 73f164beea740c9609336f20c1eed70841dc0a5b Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 3 Oct 2025 21:12:11 -0400 Subject: [PATCH 6/8] Revise README with Java example for compression Updated usage section with a complete Java example demonstrating the compression and uncompression process using FastPFOR128. --- README.md | 47 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 2e6853a..63193e9 100644 --- a/README.md +++ b/README.md @@ -48,13 +48,50 @@ as well as in GMAP and GSNAP (http://research-pub.gene.com/gmap/). Usage ------ -Really simple usage: ```java - IntegratedIntCompressor iic = new IntegratedIntCompressor(); - int[] data = ... ; // to be compressed - int[] compressed = iic.compress(data); // compressed array - int[] recov = iic.uncompress(compressed); // equals to data +package org.example; + +import me.lemire.integercompression.FastPFOR128; +import me.lemire.integercompression.IntWrapper; + +import java.util.Arrays; + +public class Main { + public static void main(String[] args) { + FastPFOR128 fastpfor = new FastPFOR128(); + + int N = 9984; + int[] data = new int[N]; + for (var i = 0; i < N; i += 150) { + data[i] = i; + } + + int[] compressedoutput1 = new int[N + 1024]; + + IntWrapper inputoffset1 = new IntWrapper(0); + IntWrapper outputoffset1 = new IntWrapper(0); + + fastpfor.compress(data, inputoffset1, N, compressedoutput1, outputoffset1); + int compressedsize1 = outputoffset1.get(); + + int[] recovered1 = new int[N]; + inputoffset1 = new IntWrapper(0); + outputoffset1 = new IntWrapper(0); + fastpfor.uncompress(compressedoutput1, outputoffset1, compressedsize1, recovered1, inputoffset1); + + // quick verification: count mismatches + int mismatches = 0; + for (int i = 0; i < N; i++) { + if (data[i] != recovered1[i]) mismatches++; + } + + System.out.println("N=" + N + " compressedSizeWords=" + compressedsize1 + " mismatches=" + mismatches); + System.out.println("first 20 original: " + Arrays.toString(Arrays.copyOf(data, 20))); + System.out.println("first 20 recovered: " + Arrays.toString(Arrays.copyOf(recovered1, 20))); + } +} + ``` For more examples, see example.java or the examples folder. From 1414beb03b85f0568c088937367219c57ff953fd Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 3 Oct 2025 21:13:20 -0400 Subject: [PATCH 7/8] Remove license information from README Removed license section from README. --- README.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/README.md b/README.md index 63193e9..77a86dc 100644 --- a/README.md +++ b/README.md @@ -4,11 +4,6 @@ JavaFastPFOR: A simple integer compression library in Java [![Java CI](https://github.com/lemire/JavaFastPFOR/actions/workflows/basic.yml/badge.svg)](https://github.com/lemire/JavaFastPFOR/actions/workflows/basic.yml) -License -------- - -This code is released under the -Apache License Version 2.0 http://www.apache.org/licenses/. What does this do? From 43888441156c73e6bcf7fcf4897e399fbec5ccdd Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 3 Oct 2025 22:33:57 -0400 Subject: [PATCH 8/8] [maven-release-plugin] prepare release JavaFastPFOR-0.3.2 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index e873dab..dd1665e 100644 --- a/pom.xml +++ b/pom.xml @@ -2,7 +2,7 @@ 4.0.0 me.lemire.integercompression JavaFastPFOR - 0.3.2-SNAPSHOT + 0.3.2 jar 21 @@ -22,7 +22,7 @@ scm:git:git@github.com:fast-pack/JavaFastPFOR.git scm:git:git@github.com:fast-pack/JavaFastPFOR.git scm:git:git@github.com:fast-pack/JavaFastPFOR.git - HEAD + JavaFastPFOR-0.3.2