diff --git a/README.md b/README.md index 38f6baf..77a86dc 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,9 @@ JavaFastPFOR: A simple integer compression library in Java ========================================================== -[![](https://jitpack.io/v/fast-pack/JavaFastPFor.svg)](https://jitpack.io/#fast-pack/JavaFastPFor) - [![][maven img]][maven] [![][license img]][license] [![docs-badge][]][docs] +[![](https://jitpack.io/v/fast-pack/JavaFastPFor.svg)](https://jitpack.io/#fast-pack/JavaFastPFor) [![][license img]][license] [![docs-badge][]][docs] [![Java CI](https://github.com/lemire/JavaFastPFOR/actions/workflows/basic.yml/badge.svg)](https://github.com/lemire/JavaFastPFOR/actions/workflows/basic.yml) -License -------- - -This code is released under the -Apache License Version 2.0 http://www.apache.org/licenses/. What does this do? @@ -49,13 +43,50 @@ as well as in GMAP and GSNAP (http://research-pub.gene.com/gmap/). Usage ------ -Really simple usage: ```java - IntegratedIntCompressor iic = new IntegratedIntCompressor(); - int[] data = ... ; // to be compressed - int[] compressed = iic.compress(data); // compressed array - int[] recov = iic.uncompress(compressed); // equals to data +package org.example; + +import me.lemire.integercompression.FastPFOR128; +import me.lemire.integercompression.IntWrapper; + +import java.util.Arrays; + +public class Main { + public static void main(String[] args) { + FastPFOR128 fastpfor = new FastPFOR128(); + + int N = 9984; + int[] data = new int[N]; + for (var i = 0; i < N; i += 150) { + data[i] = i; + } + + int[] compressedoutput1 = new int[N + 1024]; + + IntWrapper inputoffset1 = new IntWrapper(0); + IntWrapper outputoffset1 = new IntWrapper(0); + + fastpfor.compress(data, inputoffset1, N, compressedoutput1, outputoffset1); + int compressedsize1 = outputoffset1.get(); + + int[] recovered1 = new int[N]; + inputoffset1 = new IntWrapper(0); + outputoffset1 = new IntWrapper(0); + fastpfor.uncompress(compressedoutput1, outputoffset1, compressedsize1, recovered1, inputoffset1); + + // quick verification: count mismatches + int mismatches = 0; + for (int i = 0; i < N; i++) { + if (data[i] != recovered1[i]) mismatches++; + } + + System.out.println("N=" + N + " compressedSizeWords=" + compressedsize1 + " mismatches=" + mismatches); + System.out.println("first 20 original: " + Arrays.toString(Arrays.copyOf(data, 20))); + System.out.println("first 20 recovered: " + Arrays.toString(Arrays.copyOf(recovered1, 20))); + } +} + ``` For more examples, see example.java or the examples folder. @@ -73,9 +104,13 @@ non-vectorized implementation. For an example usage, see examples/vector/Example.java. The feature requires JDK 19+ and is currently for advanced users. -JitPack +JavaFastPFOR as a dependency (JitPack) ------------------------ +We have a demo project using JavaFastPFOR as a dependency (both Maven and Gradle). See... + +https://github.com/fast-pack/JavaFastPFORDemo + 1. **Maven** Using this code in your own project is easy with maven, just add @@ -85,7 +120,7 @@ the following code in your pom.xml file: com.github.fast-pack JavaFastPFor - JavaFastPFOR-0.3.0 + JavaFastPFOR-0.3.1 ``` @@ -124,7 +159,7 @@ repositories { } dependencies { - implementation 'com.github.fast-pack:JavaFastPFor:JavaFastPFOR-0.3.0' + implementation 'com.github.fast-pack:JavaFastPFor:JavaFastPFOR-0.3.1' } ``` @@ -150,20 +185,6 @@ Nevertheless, if you want to reuse codec instances, note that by convention, unless the documentation of a codec specify that it is not thread-safe, then it can be assumed to be thread-safe. -Authors -------- - -Main contributors -* Daniel Lemire, http://lemire.me/en/ -* Muraoka Taro, https://github.com/koron - -with contributions by -* the Terrier team (Matteo Catena, Craig Macdonald, Saúl Vargas and Iadh Ounis) -* Di Wu, http://www.facebook.com/diwu1989 -* Stefan Ackermann, https://github.com/Stivo -* Samit Roy, https://github.com/roysamit -* Mulugeta Mammo, https://github.com/mulugetam (for VectorFastPFOR) - How does it compare to the Kamikaze PForDelta library? ------------------------------------------------------ @@ -183,7 +204,7 @@ Requirements Releases up to 0.1.12 require Java 7 or better. -The current development versions assume JDK 11 or better. +The current development versions assume JDK 21 or better. @@ -267,8 +288,6 @@ Funding This work was supported by NSERC grant number 26143. -[maven img]:https://maven-badges.herokuapp.com/maven-central/me.lemire.integercompression/JavaFastPFOR/badge.svg -[maven]:http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22me.lemire.integercompression%22%20 [license]:LICENSE [license img]:https://img.shields.io/badge/License-Apache%202-blue.svg diff --git a/pom.xml b/pom.xml index e88e3f8..dd1665e 100644 --- a/pom.xml +++ b/pom.xml @@ -2,7 +2,7 @@ 4.0.0 me.lemire.integercompression JavaFastPFOR - 0.3.1 + 0.3.2 jar 21 @@ -22,7 +22,7 @@ scm:git:git@github.com:fast-pack/JavaFastPFOR.git scm:git:git@github.com:fast-pack/JavaFastPFOR.git scm:git:git@github.com:fast-pack/JavaFastPFOR.git - JavaFastPFOR-0.3.1 + JavaFastPFOR-0.3.2 diff --git a/src/main/java/me/lemire/longcompression/LongVariableByte.java b/src/main/java/me/lemire/longcompression/LongVariableByte.java index e60ebd0..63c194b 100644 --- a/src/main/java/me/lemire/longcompression/LongVariableByte.java +++ b/src/main/java/me/lemire/longcompression/LongVariableByte.java @@ -91,7 +91,7 @@ public void headlessCompress(long[] in, IntWrapper inpos, int inlength, long[] o buf.put((byte) extract7bits(5, val)); buf.put((byte) extract7bits(6, val)); buf.put((byte) (extract7bitsmaskless(7, (val)) | (1 << 7))); - } else if (val >= 0 && val < (1L << 63)) { + } else if (val >= 0) { buf.put((byte) extract7bits(0, val)); buf.put((byte) extract7bits(1, val)); buf.put((byte) extract7bits(2, val)); @@ -176,7 +176,7 @@ public void compress(long[] in, IntWrapper inpos, int inlength, byte[] out, out[outpostmp++] = (byte) extract7bits(5, val); out[outpostmp++] = (byte) extract7bits(6, val); out[outpostmp++] = (byte) (extract7bitsmaskless(7, (val)) | (1 << 7)); - } else if (val >= 0 && val < (1L << 63)) { + } else if (val >= 0) { out[outpostmp++] = (byte) extract7bits(0, val); out[outpostmp++] = (byte) extract7bits(1, val); out[outpostmp++] = (byte) extract7bits(2, val); diff --git a/src/test/java/me/lemire/longcompression/TestLongVariableByte.java b/src/test/java/me/lemire/longcompression/TestLongVariableByte.java index ee1755a..3cb2a49 100644 --- a/src/test/java/me/lemire/longcompression/TestLongVariableByte.java +++ b/src/test/java/me/lemire/longcompression/TestLongVariableByte.java @@ -24,11 +24,17 @@ public LongCODEC getCodec() { } @Test - public void testCodec_intermediateHighPowerOfTwo() { - Assert.assertEquals(1, LongTestUtils.compress((LongCODEC) codec, new long[] { 1L << 42 }).length); - Assert.assertEquals(7, LongTestUtils.compress((ByteLongCODEC) codec, new long[] { 1L << 42 }).length); - Assert.assertEquals(1, - LongTestUtils.compressHeadless((SkippableLongCODEC) codec, new long[] { 1L << 42 }).length); - } + public void testCodec_allBitWidths() { + for (int bitWidth = 0; bitWidth <= 64; bitWidth++) { + long value = bitWidth == 0 ? 0 : 1L << (bitWidth - 1); + + int expectedSizeInBytes = Math.max(1, (bitWidth + 6) / 7); + int expectedSizeInLongs = (expectedSizeInBytes > 8) ? 2 : 1; + Assert.assertEquals(expectedSizeInLongs, LongTestUtils.compress((LongCODEC) codec, new long[] { value }).length); + Assert.assertEquals(expectedSizeInBytes, LongTestUtils.compress((ByteLongCODEC) codec, new long[] { value }).length); + Assert.assertEquals(expectedSizeInLongs, + LongTestUtils.compressHeadless((SkippableLongCODEC) codec, new long[] { value }).length); + } + } }